package edu.stanford.nlp.quoteattribution.Sieves.QMSieves; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator; import edu.stanford.nlp.quoteattribution.*; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.Pair; import java.util.*; /** * @author Grace Muzny */ public class DependencyParseSieve extends QMSieve { public DependencyParseSieve(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacySet) { super(doc, characterMap, pronounCorefMap, animacySet, "Deterministic depparse"); } public void doQuoteToMention(Annotation doc) { // Trigram patterns // p/r 1/.304 dependencyParses(doc); oneSpeakerSentence(doc); } private boolean inRange(Pair<Integer, Integer> range, int val) { return range.first <= val && val <= range.second; } //using quote-removed depparses public void dependencyParses(Annotation doc) { List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class); List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap quote : quotes) { if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) { continue; } Pair<Integer, Integer> range = QuoteAttributionUtils.getRemainderInSentence(doc, quote); if(range == null) { continue; } //search for mentions in the first run Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(range); ArrayList<String> names = namesAndNameIndices.first; ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class); SemgrexMatcher matcher = subjVerbPattern.matcher(graph); List<Pair<IndexedWord, IndexedWord>> subjVerbPairs = new ArrayList<>(); //TODO: check and see if this is necessary while (matcher.find()) { IndexedWord subj = matcher.getNode("SUBJ"); IndexedWord verb = matcher.getNode("VERB"); subjVerbPairs.add(new Pair<>(subj, verb)); } List<IndexedWord> vbs = graph.getAllNodesByPartOfSpeechPattern("VB.*"); for (IndexedWord iw : vbs) { // does it have an nsubj child? Set<IndexedWord> children = graph.getChildren(iw); List<IndexedWord> deps = Generics.newArrayList(); IndexedWord nsubj = null; for (IndexedWord child : children) { SemanticGraphEdge sge = graph.getEdge(iw, child); if (sge.getRelation().getShortName().equals("dep") && child.tag().startsWith("VB")) { deps.add(child); } else if (sge.getRelation().getShortName().equals("nsubj")) { nsubj = child; } } if (nsubj != null) { for (IndexedWord dep : deps) { subjVerbPairs.add(new Pair(nsubj, dep)); } } } //look for a speech verb for (Pair<IndexedWord, IndexedWord> SVPair : subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = tokenToLocation(verb.backingLabel()); int subjTokPos = tokenToLocation(verb.backingLabel()); if (inRange(range, verbTokPos) && inRange(range, subjTokPos) && commonSpeechWords.contains(verb.lemma())) { if (subj.tag().equals("NNP")) { int startChar = subj.beginPosition(); for (int i = 0; i < names.size(); i++) { Pair<Integer, Integer> nameIndex = nameIndices.get(i); //avoid names that don't actually exist in if (rangeContainsCharIndex(nameIndex, startChar)) { fillInMention(quote, tokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, NAME); break; } } } else if (subj.tag().equals("PRP")) { int loc = tokenToLocation(subj.backingLabel()); fillInMention(quote, subj.word(), loc, loc, sieveName, PRONOUN); break; } else if(subj.tag().equals("NN") && animacySet.contains(subj.word())) { int loc = tokenToLocation(subj.backingLabel()); fillInMention(quote, subj.word(), loc, loc, sieveName, ANIMATE_NOUN); break; } } } } } }