package edu.stanford.nlp.pipeline; import java.util.List; import java.util.Set; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.parser.common.ParserUtils; import edu.stanford.nlp.parser.charniak.CharniakParser; import edu.stanford.nlp.trees.EnglishGrammaticalStructureFactory; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.util.CoreMap; /** * This class will add parse information to an Annotation from the BLLIP parser. * It allows you to use the Charniak parser or Charniak and Johnson reranking parser * along with any existing parser and reranking model. * * It assumes that the Annotation already contains the tokenized words * as a {@code List<List<CoreLabel>>} under * {@code CoreAnnotations.SentencesAnnotation.class}. * If the words have POS tags, they will not be used. * * @author David McClosky */ public class CharniakParserAnnotator implements Annotator { // TODO: make this an option? private static final boolean BUILD_GRAPHS = true; private final GrammaticalStructureFactory gsf = new EnglishGrammaticalStructureFactory(); private final boolean VERBOSE; private final CharniakParser parser; public CharniakParserAnnotator(String parserModel, String parserExecutable, boolean verbose, int maxSentenceLength) { VERBOSE = verbose; parser = new CharniakParser(parserExecutable, parserModel); parser.setMaxSentenceLength(maxSentenceLength); } public CharniakParserAnnotator() { VERBOSE = false; parser = new CharniakParser(); } @Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { System.err.println("Parsing: " + words); } int maxSentenceLength = parser.getMaxSentenceLength(); // generate the constituent tree Tree tree; // initialized below if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) { tree = parser.getBestParse(words); } else { tree = ParserUtils.xTree(words); } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } } @Override public Set<Requirement> requires() { return TOKENIZE_AND_SSPLIT; } @Override public Set<Requirement> requirementsSatisfied() { return PARSE_AND_TAG; } }