package edu.stanford.nlp.quoteattribution.Sieves.QMSieves; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator; import edu.stanford.nlp.quoteattribution.*; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Pair; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; /** * @author Grace Muzny */ public class TrigramSieve extends QMSieve { public TrigramSieve(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacySet) { super(doc, characterMap, pronounCorefMap, animacySet, ""); } public void doQuoteToMention(Annotation doc) { trigramPatterns(doc); oneSpeakerSentence(doc); } public void trigramPatterns(Annotation doc) { List<CoreLabel> docTokens = doc.get(CoreAnnotations.TokensAnnotation.class); List<CoreMap> docQuotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); for(CoreMap quote : docQuotes) { if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) continue; int quoteBeginTokenIndex = quote.get(CoreAnnotations.TokenBeginAnnotation.class); int quoteEndTokenIndex = quote.get(CoreAnnotations.TokenEndAnnotation.class); int quoteEndSentenceIndex = quote.get(CoreAnnotations.SentenceEndAnnotation.class); Pair<Integer, Integer> precedingTokenRange = QuoteAttributionUtils.getTokenRangePrecedingQuote(doc, quote); //get tokens before and after if(precedingTokenRange != null) { Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(precedingTokenRange); ArrayList<String> names = namesAndNameIndices.first; ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second; if (names.size() > 0) { int offset = 0; if(beforeQuotePunctuation.contains(docTokens.get(quoteBeginTokenIndex - 1).word())) { offset = 1; } Pair<Integer, Integer> lastNameIndex = nameIndices.get(nameIndices.size() - 1); CoreLabel prevToken = docTokens.get(quoteBeginTokenIndex - 1 - offset); //CVQ if (prevToken.tag().startsWith("V") // verb! && lastNameIndex.second.equals(quoteBeginTokenIndex - 2 - offset)) { fillInMention(quote, names.get(names.size() - 1), lastNameIndex.first, lastNameIndex.second, "trigram CVQ", NAME); continue; } //VCQ if (lastNameIndex.second.equals(quoteBeginTokenIndex - 1 - offset)) { CoreLabel secondPrevToken = docTokens.get(lastNameIndex.first - 1); if(secondPrevToken.tag().startsWith("V")) { fillInMention(quote, names.get(names.size() - 1), lastNameIndex.first, lastNameIndex.second, "trigram VCQ", NAME); continue; } } } ArrayList<Integer> pronounsIndices = scanForPronouns(precedingTokenRange); if (pronounsIndices.size() > 0) { int offset = 0; if(beforeQuotePunctuation.contains(docTokens.get(quoteBeginTokenIndex - 1).word())) { offset = 1; } CoreLabel prevToken = docTokens.get(quoteBeginTokenIndex - 1 - offset); int lastPronounIndex = pronounsIndices.get(pronounsIndices.size() - 1); //PVQ if (prevToken.tag().startsWith("V") // verb! && lastPronounIndex == quoteBeginTokenIndex - 2 - offset) { fillInMention(quote, tokenRangeToString(lastPronounIndex), lastPronounIndex, lastPronounIndex, "trigram PVQ", PRONOUN); continue; } //VPQ if (lastPronounIndex == quoteBeginTokenIndex - 1 - offset && docTokens.get(quoteBeginTokenIndex - 2 - offset).tag().startsWith("V")) { fillInMention(quote, tokenRangeToString(lastPronounIndex), lastPronounIndex, lastPronounIndex, "trigram VPQ", PRONOUN); continue; } } } Pair<Integer, Integer> followingTokenRange = QuoteAttributionUtils.getTokenRangeFollowingQuote(doc, quote); if(followingTokenRange != null) { Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(followingTokenRange); ArrayList<String> names = namesAndNameIndices.first; ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second; if (names.size() > 0) { Pair<Integer, Integer> firstNameIndex = nameIndices.get(0); CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1); //QVC if (nextToken.tag().startsWith("V") // verb! && firstNameIndex.first.equals(quoteEndTokenIndex + 2)) { fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QVC", NAME); continue; } //QCV if (firstNameIndex.first.equals(quoteEndTokenIndex + 1)) { CoreLabel secondNextToken = docTokens.get(firstNameIndex.second + 1); if(secondNextToken.tag().startsWith("V")) { fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QCV", NAME); continue; } } } ArrayList<Integer> pronounsIndices = scanForPronouns(followingTokenRange); if (pronounsIndices.size() > 0) { CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1); int firstPronounIndex = pronounsIndices.get(0); //QVP if (nextToken.tag().startsWith("V") // verb! && firstPronounIndex == quoteEndTokenIndex + 2) { fillInMention(quote, tokenRangeToString(pronounsIndices.get(0)), firstPronounIndex, firstPronounIndex, "trigram QVP", PRONOUN); continue; } //QPV if (firstPronounIndex == quoteEndTokenIndex + 1 && docTokens.get(quoteEndTokenIndex + 2).tag().startsWith("V")) { fillInMention(quote, tokenRangeToString(pronounsIndices.get(pronounsIndices.size() - 1)), firstPronounIndex, firstPronounIndex, "trigram QPV", PRONOUN); continue; } } } } } }