package edu.stanford.nlp.parser.lexparser; import java.util.*; import edu.stanford.nlp.util.Index; import edu.stanford.nlp.stats.ClassicCounter; import edu.stanford.nlp.trees.Tree; /** Gathers statistics on tree dependencies and then passes them to an * MLEDependencyGrammar for dependency grammar construction. * * @author Dan Klein */ public class MLEDependencyGrammarExtractor extends AbstractTreeExtractor<DependencyGrammar> { protected final Index<String> wordIndex; protected final Index<String> tagIndex; /** This is where all dependencies are stored (using full tag space). */ protected ClassicCounter<IntDependency> dependencyCounter = new ClassicCounter<>(); //private Set dependencies = new HashSet(); protected TreebankLangParserParams tlpParams; /** Whether left and right is distinguished. */ protected boolean directional; /** Whether dependent distance from head is distinguished. */ protected boolean useDistance; /** Whether dependent distance is distinguished more coarsely. */ protected boolean useCoarseDistance; /** Whether basic category tags are in the dependency grammar. */ protected final boolean basicCategoryTagsInDependencyGrammar; public MLEDependencyGrammarExtractor(Options op, Index<String> wordIndex, Index<String> tagIndex) { super(op); this.wordIndex = wordIndex; this.tagIndex = tagIndex; tlpParams = op.tlpParams; directional = op.directional; useDistance = op.distance; useCoarseDistance = op.coarseDistance; basicCategoryTagsInDependencyGrammar = op.trainOptions.basicCategoryTagsInDependencyGrammar; } @Override protected void tallyRoot(Tree lt, double weight) { // this list is in full (not reduced) tag space List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(lt, wordIndex, tagIndex); for (IntDependency dependency : deps) { dependencyCounter.incrementCount(dependency, weight); } } @Override public DependencyGrammar formResult() { wordIndex.addToIndex(Lexicon.UNKNOWN_WORD); MLEDependencyGrammar dg = new MLEDependencyGrammar(tlpParams, directional, useDistance, useCoarseDistance, basicCategoryTagsInDependencyGrammar, op, wordIndex, tagIndex); for (IntDependency dependency : dependencyCounter.keySet()) { dg.addRule(dependency, dependencyCounter.getCount(dependency)); } return dg; } } // end class MLEDependencyGrammarExtractor