/////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2007 University of Texas at Austin and (C) 2005 // University of Pennsylvania and Copyright (C) 2002, 2003 University // of Massachusetts Amherst, Department of Computer Science. // // This software is licensed under the terms of the Common Public // License, Version 1.0 or (at your option) any subsequent version. // // The license is approved by the Open Source Initiative, and is // available from their website at http://www.opensource.org. /////////////////////////////////////////////////////////////////////////////// package mstparser; import java.io.File; /** * Hold all the options for the parser so they can be passed around easily. * * <p> Created: Sat Nov 10 15:25:10 2001 </p> * * @author Jason Baldridge * @version $Id: CONLLReader.java 103 2007-01-21 20:26:39Z jasonbaldridge $ * @see mstparser.io.DependencyReader */ public final class ParserOptions { public String trainfile = null; public String testfile = null; public File trainforest = null; public File testforest = null; public boolean train = false; public boolean eval = false; public boolean test = false; public boolean rankEdgesByConfidence = false; public String modelName = "dep.model"; public String lossType = "punc"; public boolean createForest = true; public String decodeType = "proj"; public String format = "CONLL"; public int numIters = 10; public String outfile = "out.txt"; public String goldfile = null; public int trainK = 1; public int testK = 1; public boolean secondOrder = false; public boolean useRelationalFeatures = false; public boolean discourseMode = false; public String confidenceEstimator = null; // Agreement Options // none, agr, agr+orig //public String agreementFeature = "none"; // Stacked Options public int augmentNumParts = 2; // afm 03-03-08 --- Number of partitions public boolean stackedLevel0 = false; // afm 03-10-08 --- true for training/testing the level-0 classifier (if train, augment training data with output predictions) public boolean stackedLevel1 = false; // afm 03-10-08 --- true for training/testing the level-1 classifier public boolean separateLab = false; // afm 06-03-08 --- Perform labeling in a separate stage (using MALLET) public StackedFeaturesOptions stackedFeats = null; public int separateLabCutOff = 0; public boolean composeFeaturesWithPOS = false; // afm 09-25-08 --- If true, compose features just with POS tags instead of composing them with lemmas and words. public boolean useStemmingIfLemmasAbsent = false; // afm 09-25-08 --- If lemmas are not available, use the first three characters of the words instead public ParserOptions(String[] args) { stackedFeats = new StackedFeaturesOptions(); for (int i = 0; i < args.length; i++) { String[] pair = args[i].split(":"); if (pair[0].equals("train")) { train = true; } if (pair[0].equals("eval")) { eval = true; } if (pair[0].equals("test")) { test = true; } if (pair[0].equals("iters")) { numIters = Integer.parseInt(pair[1]); } if (pair[0].equals("output-file")) { outfile = args[i].substring(args[i].indexOf(":") + 1); } if (pair[0].equals("gold-file")) { goldfile = args[i].substring(args[i].indexOf(":") + 1); } if (pair[0].equals("train-file")) { trainfile = args[i].substring(args[i].indexOf(":") + 1); } if (pair[0].equals("test-file")) { testfile = args[i].substring(args[i].indexOf(":") + 1); } if (pair[0].equals("model-name")) { modelName = args[i].substring(args[i].indexOf(":") + 1); } if (pair[0].equals("training-k")) { trainK = Integer.parseInt(pair[1]); } if (pair[0].equals("loss-type")) { lossType = pair[1]; } if (pair[0].equals("order") && pair[1].equals("2")) { secondOrder = true; } if (pair[0].equals("create-forest")) { createForest = pair[1].equals("true") ? true : false; } if (pair[0].equals("decode-type")) { decodeType = pair[1]; } if (pair[0].equals("format")) { format = pair[1]; } if (pair[0].equals("relational-features")) { useRelationalFeatures = pair[1].equals("true") ? true : false; } if (pair[0].equals("discourse-mode")) { discourseMode = pair[1].equals("true") ? true : false; } if (pair[0].equals("augment-nparts")) { augmentNumParts = Integer.parseInt(pair[1]); } if (pair[0].equals("stacked-level0")) { stackedLevel0 = true; } if (pair[0].equals("stacked-level1")) { stackedLevel1 = true; } if (pair[0].equals("separate-lab")) { separateLab = true; } if (pair[0].equals("separate-lab-cutoff")) { separateLabCutOff = Integer.parseInt(pair[1]); } if (pair[0].equals("compose-features-with-pos")) { composeFeaturesWithPOS = true; } if (pair[0].equals("use-stemming-if-lemmas-absent")) { useStemmingIfLemmasAbsent = true; } if (pair[0].equals("stackedfeat-pred-edge")) { stackedFeats.usePredEdge = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-prev-sibl")) { stackedFeats.usePrevSibl = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-next-sibl")) { stackedFeats.useNextSibl = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-labels")) { stackedFeats.useLabels = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-grandparents")) { stackedFeats.useGrandparents = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-valency")) { stackedFeats.useValency = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-allchildren")) { stackedFeats.useAllChildren = pair[1].equals("1") ? true : false; } if (pair[0].equals("stackedfeat-pred-head")) { stackedFeats.usePredHead = pair[1].equals("1") ? true : false; } if (pair[0].equals("confidence-estimation")) { confidenceEstimator = pair[1]; } if (pair[0].equals("rankEdgesByConfidence")) { rankEdgesByConfidence = true; } if (createForest == false && pair[0].equals("forest-file")) { trainforest = new File(pair[1]); testforest = new File(pair[1]); } } if (stackedLevel1) { stackedFeats.display(); } if(createForest == true) { try { File tmpDir = new File("/tmp"); if (null != trainfile) { trainforest = File.createTempFile("train", ".forest"); trainforest.deleteOnExit(); } if (null != testfile) { testforest = File.createTempFile("test", ".forest"); testforest.deleteOnExit(); } } catch (java.io.IOException e) { DependencyParser.out.println("Unable to create tmp files for feature forests!"); DependencyParser.out.println(e); System.exit(0); } } } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("FLAGS ["); sb.append("train-file: ").append(trainfile); sb.append(" | "); sb.append("test-file: ").append(testfile); sb.append(" | "); sb.append("gold-file: ").append(goldfile); sb.append(" | "); sb.append("output-file: ").append(outfile); sb.append(" | "); sb.append("model-name: ").append(modelName); sb.append(" | "); sb.append("train: ").append(train); sb.append(" | "); sb.append("test: ").append(test); sb.append(" | "); sb.append("eval: ").append(eval); sb.append(" | "); sb.append("loss-type: ").append(lossType); sb.append(" | "); sb.append("second-order: ").append(secondOrder); sb.append(" | "); sb.append("training-iterations: ").append(numIters); sb.append(" | "); sb.append("training-k: ").append(trainK); sb.append(" | "); sb.append("decode-type: ").append(decodeType); sb.append(" | "); sb.append("create-forest: ").append(createForest); sb.append(" | "); sb.append("format: ").append(format); sb.append(" | "); sb.append("relational-features: ").append(useRelationalFeatures); sb.append(" | "); sb.append("discourse-mode: ").append(discourseMode); sb.append("]\n"); return sb.toString(); } }