package edu.stanford.nlp.international.arabic.parsesegment; import edu.stanford.nlp.util.logging.Redwood; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.util.Date; import java.util.Map; import java.util.Properties; import java.util.zip.GZIPInputStream; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.StringUtils; /** * * @author Spence Green * */ public final class JointParser { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(JointParser.class); private JointParser() {} private final static int MIN_ARGS = 1; private static String usage() { String cmdLineUsage = String.format("Usage: java %s [OPTS] trainFile < lattice_file > trees%n", JointParser.class.getName()); StringBuilder classUsage = new StringBuilder(cmdLineUsage); String nl = System.getProperty("line.separator"); classUsage.append(" -v : Verbose output").append(nl); classUsage.append(" -t file : Test on input trees").append(nl); classUsage.append(" -l num : Max (gold) sentence length to evaluate (in interstices)").append(nl); classUsage.append(" -o : Input is a serialized list of lattices").append(nl); return classUsage.toString(); } private static Map<String, Integer> optionArgDefs() { Map<String, Integer> optionArgDefs = Generics.newHashMap(); optionArgDefs.put("v", 0); optionArgDefs.put("t", 1); optionArgDefs.put("l", 1); optionArgDefs.put("o", 0); return optionArgDefs; } /** * * @param args */ public static void main(String[] args) { if(args.length < MIN_ARGS) { log.info(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, optionArgDefs()); boolean VERBOSE = PropertiesUtils.getBool(options, "v", false); File testTreebank = options.containsKey("t") ? new File(options.getProperty("t")) : null; int maxGoldSentLen = PropertiesUtils.getInt(options, "l", Integer.MAX_VALUE); boolean SER_INPUT = PropertiesUtils.getBool(options, "o", false); String[] parsedArgs = options.getProperty("","").split("\\s+"); if (parsedArgs.length != MIN_ARGS) { log.info(usage()); System.exit(-1); } File trainTreebank = new File(parsedArgs[0]); Date startTime = new Date(); log.info("###################################"); log.info("### Joint Segmentation / Parser ###"); log.info("###################################"); System.err.printf("Start time: %s\n", startTime); JointParsingModel parsingModel = new JointParsingModel(); parsingModel.setVerbose(VERBOSE); parsingModel.setMaxEvalSentLen(maxGoldSentLen); parsingModel.setSerInput(SER_INPUT); //WSGDEBUG -- Some stuff for eclipse debugging InputStream inputStream = null; try { if(System.getProperty("eclipse") == null) inputStream = (SER_INPUT) ? new ObjectInputStream(new GZIPInputStream(System.in)) : System.in; else { FileInputStream fileStream = new FileInputStream(new File("debug.2.xml")); inputStream = (SER_INPUT) ? new ObjectInputStream(new GZIPInputStream(fileStream)) : fileStream; } } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(-1); } catch (IOException e) { e.printStackTrace(); System.exit(-1); } if(!trainTreebank.exists()) log.info("Training treebank does not exist!\n " + trainTreebank.getPath()); else if(testTreebank != null && !testTreebank.exists()) log.info("Test treebank does not exist!\n " + testTreebank.getPath()); else if(parsingModel.run(trainTreebank, testTreebank, inputStream)) log.info("Successful shutdown!"); else log.error("Parsing model failure."); Date stopTime = new Date(); long elapsedTime = stopTime.getTime() - startTime.getTime(); log.info(); log.info(); System.err.printf("Completed processing at %s\n",stopTime); System.err.printf("Elapsed time: %d seconds\n", (int) (elapsedTime / 1000F)); } }