FastFactoredParser.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.parser.lexparser; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.*;

import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.parser.KBestViterbiParser;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.util.*;


/** Provides a much faster way to realize the factored
 *  parsing idea, including easily returning "k good" results
 *  at the expense of optimality.  Exploiting the k best functionality
 *  of the ExhaustivePCFGParser, this model simply gets more than
 *  k best PCFG parsers, scores them according to the dependency
 *  grammar, and returns them in terms of their product score.
 *  No actual parsing is done.
 *
 *  @author Christopher Manning
 */
public class FastFactoredParser implements KBestViterbiParser  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(FastFactoredParser.class);

  // TODO Regression tests
  // TODO Set dependency tuning and test whether useful
  // TODO Validate and up the Arabic numbers
  // TODO Make the printing options for k good/best sane
  // TODO Check parsing of a List<String>.  Change defaultSentence() to be List<HasWord>

  protected static final boolean VERBOSE = false;

  protected ExhaustivePCFGParser pparser;
  protected GrammarProjection projection;

  protected MLEDependencyGrammar dg;
  protected Options op;

  private int numToFind;

  private final Index<String> wordIndex;
  private final Index<String> tagIndex;

  protected int project(int state) {
    return projection.project(state);
  }

  /**
   * Return the best parse of the sentence most recently parsed.
   *
   * @return The best (highest score) tree
   */
  public Tree getBestParse() {
    return nGoodTrees.get(0).object();
  }

  public double getBestScore() {
    return nGoodTrees.get(0).score();
  }


  public boolean hasParse() {
    return ! nGoodTrees.isEmpty();
  }


  private List<ScoredObject<Tree>> nGoodTrees = new ArrayList<>();



  /**
   * Return the list of N "good" parses of the sentence most recently parsed.
   * (The first is guaranteed to be the best, but later ones are only
   * guaranteed the best subject to the possibilities that disappear because
   * the PCFG/Dep charts only store the best over each span.)
   *
   * @return The list of N best trees
   */
  public List<ScoredObject<Tree>> getKGoodParses(int k) {
    if (k <= nGoodTrees.size()) {
      return nGoodTrees.subList(0, k);
    } else {
      throw new UnsupportedOperationException("FastFactoredParser: cannot provide " + k + " good parses.");
    }
  }


  /** Use the DependencyGrammar to score the tree.
   *
   * @param tr A binarized tree (as returned by the PCFG parser
   * @return The score for the tree according to the grammar
   */
  private double depScoreTree(Tree tr) {
    // log.info("Here's our tree:");
    // tr.pennPrint();
    // log.info(Trees.toDebugStructureString(tr));
    Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory());
    cwtTree.percolateHeads(binHeadFinder);
    // log.info("Here's what it went to:");
    // cwtTree.pennPrint();
    List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex);
    // log.info("Here's the deps:\n" + deps);
    return dg.scoreAll(deps);
  }

  private final HeadFinder binHeadFinder = new BinaryHeadFinder();

   /**
   * Parse a Sentence.  It is assumed that when this is called, the pparser
   * has already been called to parse the sentence.
   *
   * @param words The list of words to parse.
   * @return true iff it could be parsed
   */
  public boolean parse(List<? extends HasWord> words) {
    nGoodTrees.clear();

    int numParsesToConsider = numToFind * op.testOptions.fastFactoredCandidateMultiplier + op.testOptions.fastFactoredCandidateAddend;
    if (pparser.hasParse()) {
      List<ScoredObject<Tree>> pcfgBest = pparser.getKBestParses(numParsesToConsider);
      Beam<ScoredObject<Tree>> goodParses = new Beam<>(numToFind);

      for (ScoredObject<Tree> candidate : pcfgBest) {
        if (Thread.interrupted()) {
          throw new RuntimeInterruptedException();
        }
        double depScore = depScoreTree(candidate.object());
        ScoredObject<Tree> x = new ScoredObject<>(candidate.object(), candidate.score() + depScore);
        goodParses.add(x);
      }
      nGoodTrees = goodParses.asSortedList();
    }
    return ! nGoodTrees.isEmpty();
  }

  /** Get the exact k best parses for the sentence.
   *
   *  @param k The number of best parses to return
   *  @return The exact k best parses for the sentence, with
   *         each accompanied by its score (typically a
   *         negative log probability).
   */
  public List<ScoredObject<Tree>> getKBestParses(int k) {
    throw new UnsupportedOperationException();
  }


  /** Get a complete set of the maximally scoring parses for a sentence,
   *  rather than one chosen at random.  This set may be of size 1 or larger.
   *
   *  @return All the equal best parses for a sentence, with each
   *         accompanied by its score
   */
  public List<ScoredObject<Tree>> getBestParses() {
    throw new UnsupportedOperationException();
  }

  /** Get k parse samples for the sentence.  It is expected that the
   *  parses are sampled based on their relative probability.
   *
   *  @param k The number of sampled parses to return
   *  @return A list of k parse samples for the sentence, with
   *         each accompanied by its score
   */
  public List<ScoredObject<Tree>> getKSampledParses(int k) {
    throw new UnsupportedOperationException();
  }


  FastFactoredParser(ExhaustivePCFGParser pparser, MLEDependencyGrammar dg, Options op, int numToFind, Index<String> wordIndex, Index<String> tagIndex) {
    this(pparser, dg, op, numToFind, new NullGrammarProjection(null, null), wordIndex, tagIndex);
  }

  FastFactoredParser(ExhaustivePCFGParser pparser, MLEDependencyGrammar dg, Options op, int numToFind, GrammarProjection projection, Index<String> wordIndex, Index<String> tagIndex) {
    this.pparser = pparser;
    this.projection = projection;
    this.dg = dg;
    this.op = op;
    this.numToFind = numToFind;
    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
  }

} // end class FastFactoredParser