/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ /* * This file uses code from the edu.umd.clip.mt.subsample.Subsampler * class from the University of Maryland's jmtTools project (in * conjunction with the umd-hadoop-mt-0.01 project). That project * is released under the terms of the Apache License 2.0, but with * special permission for the Joshua Machine Translation System to * release modifications under the LGPL version 2.1. LGPL version * 3 requires no special permission since it is compatible with * Apache License 2.0 */ package joshua.subsample; import java.io.IOException; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.HelpFormatter; /** * This class defines a callback closure to allow "overriding" the * main function in subclasses of {@link Subsampler}, without * duplicating code. For all subclasses, CLI <code>Options</code> * should be members of the class (so they're visible to * <code>runSubsampler</code> as well as <code>getCliOptions</code>), * the <code>getCliOptions</code> method should be overridden to * add the additional options (via <code>super</code> to keep the * old options), and the <code>runSubsampler</code> method should * be overridden to do the primary work for main. The <code>runMain</code> * method ties everything together and should not need modification. * Due to the one-use nature of subclasses of <code>SubsampleCLI</code>, * they generally should be implemented as anonymous local classes. * * @author wren ng thornton <wren@users.sourceforge.net> * @version $LastChangedDate: 2009-06-03 22:58:12 -0500 (Wed, 03 Jun 2009) $ */ @SuppressWarnings("static-access") public class SubsamplerCLI { //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option ot = OptionBuilder .withArgName("listfile") .hasArg() .withDescription("A file containing a list of training file basenames (what to sample from)") .isRequired() .create("training"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option otest = OptionBuilder .withArgName("file") .hasArgs() .withDescription("The test file (what to sample for)") .isRequired() .create("test"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option ooutput = OptionBuilder .withArgName("basename") .hasArgs() .withDescription("File basename for output training corpus") .isRequired() .create("output"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option of = OptionBuilder .withArgName("lang") .hasArg() .withDescription("Foreign language extension") .isRequired() .create("f"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option oe = OptionBuilder .withArgName("lang") .hasArg() .withDescription("Native language extension") .isRequired() .create("e"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option ofpath = OptionBuilder .withArgName("path") .hasArg() .withDescription("Directory containing foreign language files") .create("fpath"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option oepath = OptionBuilder .withArgName("path") .hasArg() .withDescription("Directory containing native language files") .create("epath"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option oratio = OptionBuilder .withArgName("ratio") .hasArg() .withDescription("Target F/E ratio") .create("ratio"); /** * Return all Options. The HelpFormatter will print them * in sorted order, so it doesn't matter when we add them. * Subclasses should override this method by adding more * options. */ public Options getCliOptions() { return new Options() .addOption(ot) .addOption(otest) .addOption(of) .addOption(oe) .addOption(ofpath) .addOption(oepath) .addOption(oratio) .addOption(ooutput); } /** * This method should be overridden to return the class * used in runSubsampler. */ public String getClassName() { return Subsampler.class.getName(); } /** * Callback to run the subsampler. This function needs * access to the variables holding each Option, thus all * this closure nonsense. */ public void runSubsampler(String[] testFiles, int maxN, int targetCount, float ratio) throws IOException { new Subsampler(testFiles, maxN, targetCount).subsample( ot.getValue(), ratio, of.getValue(), oe.getValue(), ofpath.getValue(), oepath.getValue(), ooutput.getValue() ); } /** * Non-static version of main so that we can define anonymous * local classes to override or extend the above. */ public void runMain(String[] args) { Options o = this.getCliOptions(); try { new GnuParser().parse(o, args); } catch (ParseException pe) { // The message from pe is ugly, so we omit it. System.err.println("Error parsing command line"); new HelpFormatter().printHelp(this.getClassName(), o); System.exit(1); } try { float ratio = 0.8f; if (this.oratio.getValue() != null) { ratio = Float.parseFloat(this.oratio.getValue()); } this.runSubsampler(this.otest.getValues(), 12, 20, ratio); } catch (Exception e) { e.printStackTrace(); System.exit(1); } } }