package edu.berkeley.nlp.PCFGLA; import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.ObjectInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.zip.GZIPInputStream; import edu.berkeley.nlp.PCFGLA.Corpus.TreeBankType; import edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentSubstate; import edu.berkeley.nlp.PCFGLA.smoothing.Smoother; import edu.berkeley.nlp.parser.EnglishPennTreebankParseEvaluator; import edu.berkeley.nlp.syntax.Tree; import edu.berkeley.nlp.syntax.Trees; import edu.berkeley.nlp.util.Numberer; import edu.berkeley.nlp.util.Pair; /** * Reads in the Penn Treebank and generates N_GRAMMARS different grammars. * * @author Slav Petrov */ public class GrammarTester implements Callable{ public static ParserFactory externalParserFactory = null; public static interface ParserFactory { public ConstrainedArrayParser newParser(Grammar gr, Lexicon lex, SpanPredictor sp); } public static class Options { @Option(name = "-in", required = true, usage = "Input File for Grammar (Required)\n") public String inFileName; @Option(name = "-path", usage = "Path to Corpus (Default: null)\n") public String path = null; @Option(name = "-treebank", usage = "Language: WSJ, CHNINESE, GERMAN, CONLL, SINGLEFILE (Default: ENGLISH)") public TreeBankType treebank = TreeBankType.WSJ; @Option(name = "-maxL", usage = "Maximum sentence length (Default <=40)") public int maxSentenceLength = 40; @Option(name = "-section", usage = "On which part of the WSJ to test: train/dev/test (Default: dev)") public String section = "dev"; @Option(name = "-maxS", usage = "Maximum number of sentences (Default all)") public int maxSentences = 1000000; @Option(name = "-parser", usage = "Parser type: c-to-f, plain, kbest, basic, maxderivation") public String parser = "c-to-f"; @Option(name = "-k", usage = "k for k-best parsing") public int k = 1; @Option(name = "-cons", usage = "Constraints for plain parser") public String cons = null; @Option(name = "-viterbi", usage = "Compute viterbi derivation instead of max-rule parse (Default: max-rule)") public boolean viterbi = false; @Option(name = "-allowAllSubstates", usage = "Don't prune at the substate level") public boolean allowAllSubstates = false; @Option(name = "-unaryPenalty", usage = "Unary penalty (Default: 1.0)") public double unaryPenalty = 1.0; @Option(name = "-finalLevel", usage = "Parse with projected grammar from this level (Default: -1 = input grammar)") public int finalLevel = -1; @Option(name = "-verbose", usage = "Verbose/Quiet (Default: Quiet)\n") public boolean verbose = false; @Option(name = "-accurate", usage = "Set thresholds for accuracy. (Default: set thresholds for efficiency)") public boolean accurate = false; @Option(name = "-useGoldPOS", usage = "Use gold part of speech tags (Default: false)") public boolean useGoldPOS = false; @Option(name = "-smooth", usage = "Smooth the parameters before parsing") public static boolean smooth = false; @Option(name = "-doNOTprojectConstraints", usage = "Do NOT project constraints") public boolean doNOTprojectConstraints = false; @Option(name = "-nThreads", usage = "Parse in parallel using this many threads (Default: 1).") public int nThreads = 1; @Option(name = "-filterTrees", usage = "Parse in parallel using this many threads (Default: 1).") public boolean filterTrees = false; @Option(name = "-filterAllUnaries", usage="Mark any unary parent with a ^u") public boolean filterAllUnaries = false; @Option(name = "-filterStupidFrickinWHNP", usage="Temp hack!") public boolean filterStupidFrickinWHNP = false; @Option(name = "-printGoldTree", usage="Print (flat) gold tree") public boolean printGoldTree = false; @Option(name = "-computeConstraints", usage="Compute constraints from the given grammar (rather than loading with -cons)") public boolean computeConstraints = false; @Option(name = "-evaluateConstraints", usage="Evaluate search errors from constraints") public boolean evaluateConstraints = false; @Option(name = "-logT", usage="Threshold for constraints") public double logT = -10; @Option(name="-printAllKBest", usage="Print every kBest parse") public boolean printAllKBest = false; @Option(name="-testAll", usage="Test all grammar files starting with this name") public boolean testAll = false; @Option(name="-filePath", usage="Path for grammars to be tested") public String filePath = null; @Option(name = "-nProcess", usage = "Parse in parallel using this many threads (Default: 1).") public int nProcess = 1; @Option(name = "-lowercase", usage = "Lowercase all words in the treebank") public boolean lowercase = false; @Option(name = "-allSubstatesAllowed", usage = "When using constraints whether to prune on the substate level") public boolean allSubstatesAllowed = false; @Option(name = "-printAllF1", usage = "Print all F1 scores (when using testAll)") public boolean printAllF1 = false; } List<Tree<String>> testTrees; boolean[][][][][] cons; String fileName; int maxSentenceLength; public static void main(String[] args){ OptionParser optParser = new OptionParser(Options.class); Options opts = (Options) optParser.parse(args, true); // provide feedback on command-line arguments System.out.println("Calling with " + optParser.getPassedInOptions()); String path = opts.path; // int lang = opts.lang; System.out.println("Loading trees from "+path+" and using treebank type "+opts.treebank); int maxSentenceLength = opts.maxSentenceLength; System.out.println("Will remove sentences with more than "+maxSentenceLength+" words."); // int nbest = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input, "-N","1")); String testSetString = opts.section; boolean devTestSet = testSetString.equals("dev"); boolean finalTestSet = testSetString.equals("final"); boolean trainTestSet = testSetString.equals("train"); if (!(devTestSet || finalTestSet || trainTestSet)) { System.out.println("I didn't understand dev/final test set argument "+testSetString); System.exit(1); } System.out.println(" using "+testSetString+" test set"); boolean[][][][][] cons = null; if (opts.computeConstraints) { String[] args1 = new String[0]; String dirName = "."; String baseName="tmp"; String[] consArgsTrain = addOptions(args1, new String[]{"-logT", "" + opts.logT,"-maxL", "" + opts.maxSentenceLength,"-path",opts.path, "-filterStupidFrickinWHNP", opts.filterStupidFrickinWHNP ? "true" : "false","-markUnaryParents", "true", "-out", dirName+"/"+baseName+"0_" + opts.section, "-in", opts.inFileName, "-section", opts.section, "-nChunks", "1", "-outputLog", dirName+"/"+baseName+".cons.log"}); ParserConstrainer.main(consArgsTrain); opts.cons = dirName+"/"+baseName+"0_" + opts.section + "-0.data"; } if (opts.cons!=null) cons = ParserConstrainer.loadData(opts.cons); Corpus corpus = new Corpus(path,opts.treebank,1.0,!trainTestSet); List<Tree<String>> testTrees = null; if (devTestSet) testTrees = corpus.getDevTestingTrees(); if (finalTestSet) testTrees = corpus.getFinalTestingTrees(); if (trainTestSet) testTrees = corpus.getTrainTrees(); // for (Tree<String> tree : testTrees){ // System.out.println(tree); // } if (opts.lowercase){ System.out.println("Lowercasing the treebank."); Corpus.lowercaseWords(testTrees); } String inFileName = (opts.testAll) ? opts.filePath+"/"+opts.inFileName : opts.inFileName; if (inFileName==null) { throw new Error("Did not provide a grammar."); } System.out.println("Loading grammar from "+inFileName+"."); ParserData pData = ParserData.Load(inFileName); if (pData==null) { System.out.println("Failed to load grammar from file"+inFileName+"."); System.exit(1); } Grammar grammar = pData.getGrammar(); grammar.splitRules(); Lexicon lexicon = pData.getLexicon(); SpanPredictor spanPredictor = pData.getSpanPredictor(); if (opts.smooth){ System.out.println("Smoothing only lexicon."); // Smoother grSmoother = new SmoothAcrossParentBits(0.01,grammar.splitTrees); // grammar.setSmoother(grSmoother); // grammar.smooth(false); // Smoother lexSmoother = new SmoothAcrossParentBits(0.01,grammar.splitTrees); Smoother lexSmoother = new SmoothAcrossParentSubstate(0.01); lexicon.setSmoother(lexSmoother); } Numberer.setNumberers(pData.getNumbs()); int finalLevel = opts.finalLevel; if (finalLevel!=-1) System.out.println("Parsing with projected grammar from level "+finalLevel+"."); boolean viterbiParse = opts.viterbi; if (viterbiParse) System.out.println("Computing viterbi derivation instead of max-rule parse."); // CoarseToFineMaxRuleParser parser = new CoarseToFineTwoChartsParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate); boolean doVariational = false; boolean useGoldPOS = opts.useGoldPOS; ConstrainedArrayParser parser = null; EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String> eval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>(new HashSet<String>(Arrays.asList(new String[] {"ROOT","PSEUDO"})), new HashSet<String>(Arrays.asList(new String[] {"''", "``", ".", ":", ","}))); EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String> tmpEval = null; System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores."); // for (Tree<String> testTree : testTrees) { // System.out.println(testTree); // } System.exit(0); if (externalParserFactory != null) { parser = externalParserFactory.newParser(grammar, lexicon, spanPredictor); } else { if ("plain".equals(opts.parser)){ testTrees = Corpus.filterTreesForConditional(testTrees,opts.filterAllUnaries,opts.filterStupidFrickinWHNP,false); grammar.clearUnaryIntermediates(); // grammar.removeUnlikelyRules(0.5,1.0); // lexicon.removeUnlikelyTags(0.01,1.0); if (grammar instanceof HierarchicalAdaptiveGrammar){ lexicon.explicitlyComputeScores(grammar.finalLevel); parser = new ConstrainedHierarchicalTwoChartParser(grammar, lexicon, spanPredictor, grammar.finalLevel); }else parser = new ConstrainedTwoChartsParser(grammar, lexicon, spanPredictor); if (opts.viterbi) parser.viterbi = true; } else if ("basic".equals(opts.parser)){ parser = new ConstrainedArrayParser(grammar, lexicon, grammar.numSubStates); } else if ("kbest".equals(opts.parser)){ parser = new CoarseToFineNBestParser(grammar, lexicon, opts.k, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true); tmpEval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>(Collections.singleton("ROOT"), new HashSet<String>(Arrays.asList(new String[] {"''", "``", ".", ":", ","}))); } else if ("maxderivation".equals(opts.parser)){ parser = new CoarseToFineMaxRuleDerivationParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true); } else parser = new CoarseToFineMaxRuleParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true); } parser.binarization = pData.getBinarization(); boolean kBestParsing ="kbest".equals(opts.parser); if (opts.allSubstatesAllowed) System.out.println("All substates are allowed."); if (opts.filterTrees) testTrees = Corpus.filterTreesForConditional(testTrees,opts.filterAllUnaries,opts.filterStupidFrickinWHNP,false); if (opts.nThreads > 1){ System.out.println("Parsing with "+opts.nThreads+" threads in parallel."); MultiThreadedParserWrapper m_parser = new MultiThreadedParserWrapper(parser, opts.nThreads); int treeNumber = 0; ArrayList<Tree<String>> newList = new ArrayList<Tree<String>>(); for (Tree<String> testTree : testTrees) { List<String> testSentence = testTree.getYield(); int sentenceLength = testSentence.size(); if (sentenceLength > maxSentenceLength) continue; newList.add(testTree); } testTrees = newList; for (Tree<String> testTree : testTrees) { List<String> testSentence = testTree.getYield(); int sentenceLength = testSentence.size(); if (sentenceLength > maxSentenceLength){ System.out.println("()\n"); continue; } // m_parser.waitUntilFreeThread(); m_parser.parseThisSentence(testSentence); while (m_parser.hasNext()){ List<Tree<String>> parsedTrees = m_parser.getNext(); Tree<String> tTree = testTrees.get(treeNumber++); Tree<String> bestTree = null; if (kBestParsing){ double bestFscore = -1; for (Tree<String> pTree : parsedTrees){ pTree = TreeAnnotations.unAnnotateTree(pTree); double f1 = tmpEval.evaluate(pTree, tTree, false); if (f1>bestFscore) { bestTree = pTree; bestFscore = f1; } } } else { bestTree = parsedTrees.get(0); bestTree = TreeAnnotations.unAnnotateTree(bestTree); } if (!bestTree.getChildren().isEmpty()) { System.out.println(bestTree.getChildren().get(0)); } else System.out.println("()\n"); eval.evaluate(bestTree, tTree); } } while (!m_parser.isDone()){ while (m_parser.hasNext()){ List<Tree<String>> parsedTrees = m_parser.getNext(); Tree<String> tTree = testTrees.get(treeNumber++); Tree<String> bestTree = null; if (kBestParsing){ double bestFscore = -1; for (Tree<String> pTree : parsedTrees){ pTree = TreeAnnotations.unAnnotateTree(pTree); if (opts.printAllKBest) System.out.println("\t" + pTree); double f1 = tmpEval.evaluate(pTree, tTree, false); if (f1>bestFscore) { bestTree = pTree; bestFscore = f1; } } } else { bestTree = parsedTrees.get(0); bestTree = TreeAnnotations.unAnnotateTree(bestTree); } if (!bestTree.getChildren().isEmpty()) { System.out.println(bestTree.getChildren().get(0)); } else System.out.println("()\n"); if (opts.printGoldTree) System.out.println(tTree.getChildren().get(0)); eval.evaluate(bestTree, tTree); } } System.out.println("Parsed "+treeNumber+" sentences."); eval.display(true); System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores."); System.exit(0); } if (!opts.testAll){ int i = 0; int totalGoldPruned = 0; int totalPruned = 0; for (Tree<String> testTree : testTrees) { List<String> testSentence = testTree.getYield(); int sentenceLength = testSentence.size(); if( sentenceLength > maxSentenceLength) { System.out.println("()\n"); continue; } // System.out.println("Gold: "+testTree); // if (true) continue; List<String> posTags = null; if (useGoldPOS) posTags = testTree.getPreTerminalYield(); // if (true){ // for (int ii=0; ii<posTags.size(); ii++){ // System.out.println(testSentence.get(ii)+"\t"+posTags.get(ii)); // } // System.out.println(""); // continue; // } boolean[][][][] allowedStates = null; if (cons!=null) { if (cons[i]==null) { i++; continue; } if (!opts.doNOTprojectConstraints) parser.projectConstraints(cons[i], opts.allSubstatesAllowed); allowedStates = cons[i]; } Tree<String> parsedTree = null; if (kBestParsing){ List<Tree<String>> list = parser.getKBestConstrainedParses(testSentence, posTags, opts.k); double bestFscore = 0; for (Tree<String> tree : list){ Tree<String> tmp = TreeAnnotations.unAnnotateTree(tree); if (opts.printAllKBest) System.out.println("\t"+tmp); double f1 = tmpEval.evaluate(tmp, testTree, false); if (f1>bestFscore) { parsedTree = tmp; bestFscore = f1; } } if (parsedTree==null) parsedTree = new Tree<String>("ROOT"); } else { parsedTree = parser.getBestConstrainedParse(testSentence,posTags,allowedStates); if (opts.verbose) System.out.println("Annotated result:\n"+Trees.PennTreeRenderer.render(parsedTree)); parsedTree = TreeAnnotations.unAnnotateTree(parsedTree); if (useGoldPOS && parsedTree.getChildren().isEmpty()){ // parse error when using goldPOS, try without parsedTree = parser.getBestConstrainedParse(testSentence,null,allowedStates); parsedTree = TreeAnnotations.unAnnotateTree(parsedTree); } } // if (outFile!=null) output.write(parsedTree+"\n"); if (!parsedTree.getChildren().isEmpty()) { System.out.println(parsedTree.getChildren().get(0)); } else System.out.println("()\nLength: "+sentenceLength);//System.out.println(testTree);// int numGoldPruned = 0; int numPruned = 0; if (opts.evaluateConstraints && cons != null) { numGoldPruned = countPrunedNodes(testTree, allowedStates, Numberer.getGlobalNumberer("tags"), false, 0, testTree.getYield().size()); numPruned = countPrunedNodes(allowedStates, Numberer.getGlobalNumberer("tags"), false, 0, testTree.getYield().size()); System.out.println("Pruned " + numGoldPruned + " constituents."); totalGoldPruned += numGoldPruned; totalPruned += numPruned; } if (opts.printGoldTree) System.out.println("Gold: " + testTree.getChildren().get(0)); eval.evaluate(parsedTree, testTree); if (++i > opts.maxSentences) break; } if (opts.evaluateConstraints) System.out.println("Pruned total of " + totalGoldPruned + " gold constituents out of a total of " + totalPruned +" constituents pruned."); eval.display(true); System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores."); } else { int k=0; for (Tree<String> testTree : testTrees) { List<String> testSentence = testTree.getYield(); int sentenceLength = testSentence.size(); if( sentenceLength > maxSentenceLength) { System.out.println("()\n"); continue; } boolean[][][][] allowedStates = null; if (cons!=null) { if (cons[k]==null) { k++; continue; } if (!opts.doNOTprojectConstraints) parser.projectConstraints(cons[k], opts.allSubstatesAllowed); } k++; } File[] fileList = null; final String fileName = opts.inFileName; if (opts.testAll){ FilenameFilter filter = new FilenameFilter(){ public boolean accept(File arg0, String arg1) { return arg1.startsWith(fileName); } }; fileList = new File(opts.filePath).listFiles(filter); Comparator DATE_COMPARE = new Comparator() { private Date d1 = new Date(); private Date d2 = new Date(); public int compare(Object file1, Object file2) { d1.setTime(((File) file1).lastModified()); d2.setTime(((File) file2).lastModified()); return d1.compareTo(d2); } }; Arrays.sort(fileList,DATE_COMPARE); } else { fileList = new File[1]; } int nProcess = opts.nProcess; double bestF1 = -1; String bestGrammar = null; ExecutorService pool = Executors.newFixedThreadPool(nProcess); Future[] submits = new Future[nProcess]; for (int f=0; f<fileList.length; f+=nProcess){ GrammarTester thisThreadConstrainer = null; for (int i=0; i<nProcess; i++){ String fName = (f+i<fileList.length) ? fileList[f+i].getName() : fileList[f].getName(); String thisGrammar = opts.filePath+"/"+fName; GrammarTester tester = new GrammarTester(thisGrammar, testTrees, maxSentenceLength, cons); submits[i] = pool.submit(tester); } while (true) { boolean done = true; for (Future task : submits) { done &= task.isDone(); } if (done) break; } try { for (int i = 0; i < nProcess; i++) { Pair<EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>,String> res = (Pair<EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>,String>) submits[i].get(); System.out.print(res.getSecond()+"\t"); double thisF1 = res.getFirst().display(true); if (opts.printAllF1) System.out.println(res.getSecond() + " had F1 " + thisF1); if (thisF1 > bestF1){ bestF1 = thisF1; bestGrammar = res.getSecond(); } } } catch (ExecutionException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } System.out.println("The best F1 was: "+bestF1); System.out.println("The best grammar was: "+bestGrammar); File finalGrammar = new File(bestGrammar); finalGrammar.renameTo(new File(opts.filePath+"/"+opts.inFileName)); pool.shutdown(); } if (!opts.testAll) System.exit(0); } GrammarTester(String fName, List<Tree<String>> tT, int maxL, boolean[][][][][] c){ testTrees = tT; cons = c; fileName = fName; maxSentenceLength = maxL; } /** * @param allowedStates * @param globalNumberer * @param b * @param i * @param size * @return */ private static int countPrunedNodes(boolean[][][][] allowedStates, Numberer globalNumberer, boolean b, int start, int end) { int total = 0; for (int i = start; i < end; ++i) { for (int j = i+1; j <= end; ++j) { for (int state = 0; state < allowedStates[i][j].length; ++state) { if (!hasTrue(allowedStates[i][j][state])) total++; } } } return total; } public static List<Integer>[][][] loadData(String fileName) { List<Integer>[][][] data = null; try { FileInputStream fis = new FileInputStream(fileName); // Load from file GZIPInputStream gzis = new GZIPInputStream(fis); // Compressed ObjectInputStream in = new ObjectInputStream(gzis); // Load objects data = (List<Integer>[][][])in.readObject(); // Read the mix of grammars in.close(); // And close the stream. } catch (IOException e) { System.out.println("IOException\n"+e); return null; } catch (ClassNotFoundException e) { System.out.println("Class not found!"); return null; } return data; } private static String[] addOptions(String[] a, String[] b) { String[] res = new String[a.length+b.length]; for (int i=0; i<a.length; i++){ res[i] = a[i]; } for (int i=0; i<b.length; i++){ res[i+a.length] = b[i]; } return res; } private static boolean isAllowed(String label, Numberer tagNumberer, boolean[][] cons, boolean isPreTerminal) { for (int state = 0; state < cons.length; ++state) { boolean[] allowed = cons[state]; assert tagNumberer.total() > state; String asString = (String)tagNumberer.object(state); String unannotatedLabel = asString; if (!isPreTerminal) unannotatedLabel = TreeAnnotations.unAnnotateTree(new Tree<String>(asString, Collections.singletonList(new Tree<String>("FakeLabel")))).getLabel(); if (unannotatedLabel.equals(label)) { if (hasTrue(allowed)) return true; } } return false; } private static int countPrunedNodes (Tree<String> tree, boolean[][][][] cons, Numberer tagNumberer,boolean splitRoot, int from, int to){ int total = 0; if (!isAllowed(tree.getLabel(),tagNumberer,cons[from][to],tree.isPreTerminal())) { total += 1; } if (tree.isPreTerminal()) { return total; } // if (label<0) label =0; //// System.out.println(label + " " +tree.getLabel()); // if (label>=numStates.length){ //// System.err.println("Have never seen this state before: "+tree.getLabel()); //// StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to); //// return new Tree<StateSet>(newState); // } // short nodeNumStates = allSplitTheSame ? numStates[0] : numStates[label]; // if (!splitRoot) nodeNumStates = 1; // StateSet newState = new StateSet(label, nodeNumStates, null, (short)from , (short)to); // Tree<StateSet> newTree = new Tree<StateSet>(newState); // List<Tree<StateSet>> newChildren = new ArrayList<Tree<StateSet>>(); for (Tree<String> child : tree.getChildren()) { short length = (short) child.getYield().size(); total += countPrunedNodes(child, cons, tagNumberer, true, from, from+length); from += length; } return total; } public static boolean hasTrue(boolean[] a) { boolean hasTrue = false; if (a == null) return hasTrue; for (boolean b : a) hasTrue |= b; return hasTrue; } public Pair<EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>, String> call() throws Exception { EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String> eval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>(Collections.singleton("ROOT"), new HashSet<String>(Arrays.asList(new String[] {"''", "``", ".", ":", ","}))); ParserData pData = ParserData.Load(fileName); if (pData==null) { System.out.println("Failed to load grammar from file"+fileName+"."); System.exit(1); } Grammar grammar = pData.getGrammar(); grammar.splitRules(); Lexicon lexicon = pData.getLexicon(); grammar.clearUnaryIntermediates(); lexicon.explicitlyComputeScores(grammar.finalLevel); if (GrammarTester.Options.smooth){ System.out.println("Smoothing only the lexicon."); Smoother lexSmoother = new SmoothAcrossParentSubstate(0.01); lexicon.setSmoother(lexSmoother); } SpanPredictor spanPredictor = pData.getSpanPredictor(); ConstrainedArrayParser parser = null;// new // ConstrainedHierarchicalTwoChartParser // (grammar, lexicon, spanPredictor, // grammar.finalLevel); if (grammar instanceof HierarchicalAdaptiveGrammar) { lexicon.explicitlyComputeScores(grammar.finalLevel); parser = new ConstrainedHierarchicalTwoChartParser(grammar, lexicon, spanPredictor, grammar.finalLevel); } else parser = new ConstrainedTwoChartsParser(grammar, lexicon, spanPredictor); int i=0; for (Tree<String> testTree : testTrees) { List<String> testSentence = testTree.getYield(); int sentenceLength = testSentence.size(); if(sentenceLength > maxSentenceLength) continue; Tree<String> parsedTree = null; boolean[][][][] con = (cons==null) ? null : cons[i]; parsedTree = parser.getBestConstrainedParse(testSentence,null,con); parsedTree = TreeAnnotations.unAnnotateTree(parsedTree); eval.evaluate(parsedTree, testTree, false); i++; } return new Pair<EnglishPennTreebankParseEvaluator.LabeledConstituentEval<String>,String>(eval,fileName); } }