package edu.stanford.nlp.quoteattribution.Sieves.QMSieves; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator; import edu.stanford.nlp.quoteattribution.*; import edu.stanford.nlp.quoteattribution.Sieves.Sieve; import edu.stanford.nlp.util.CoreMap; import java.util.List; import java.util.Map; import java.util.Set; /** * Created by mjfang on 7/7/16. */ public class ConversationalSieve extends QMSieve { public ConversationalSieve(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacySet) { super(doc, characterMap, pronounCorefMap, animacySet, "conv"); } //attribute conversational mentions: assign the mention to the same quote as the //if quote X has not been labelled, has no add'l text, and quote X-2 has been labelled, and quotes X-2, X-1, and X are consecutive in paragraph, //and X-1's quote does not refer to a name: //give quote X the same mention as X-2. public void doQuoteToMention(Annotation doc) { List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class); List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class); for(int index = 2; index < quotes.size(); index++) { CoreMap currQuote = quotes.get(index); CoreMap prevQuote = quotes.get(index - 1); CoreMap twoPrevQuote = quotes.get(index - 2); int twoPrevPara = getQuoteParagraph(twoPrevQuote); //default to first in quote that begins n-2 for(int i = index-3; i >= 0; i--) { if(getQuoteParagraph(quotes.get(i)) == twoPrevPara) { twoPrevQuote = quotes.get(i); } else break; } int tokenBeginIdx = currQuote.get(CoreAnnotations.TokenBeginAnnotation.class); int tokenEndIdx = currQuote.get(CoreAnnotations.TokenEndAnnotation.class); CoreMap currQuoteBeginSentence = sentences.get(currQuote.get(CoreAnnotations.SentenceBeginAnnotation.class)); boolean isAloneInParagraph = true; if(tokenBeginIdx > 0) { CoreLabel prevToken = tokens.get(tokenBeginIdx - 1); CoreMap prevSentence = sentences.get(prevToken.get(CoreAnnotations.SentenceIndexAnnotation.class)); if(prevSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class).equals(currQuoteBeginSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class))) { isAloneInParagraph = false; } } if(tokenEndIdx < tokens.size() - 1) { CoreLabel nextToken = tokens.get(tokenEndIdx + 1); CoreMap nextSentence = sentences.get(nextToken.get(CoreAnnotations.SentenceIndexAnnotation.class)); if(nextSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class).equals(currQuoteBeginSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class))) { isAloneInParagraph = false; } } if(twoPrevQuote.get(QuoteAttributionAnnotator.MentionAnnotation.class) == null || !isAloneInParagraph || currQuote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null || twoPrevQuote.get(QuoteAttributionAnnotator.MentionTypeAnnotation.class).equals(Sieve.PRONOUN)) { continue; } if(getQuoteParagraph(currQuote) == getQuoteParagraph(prevQuote) + 1 && getQuoteParagraph(prevQuote) == getQuoteParagraph(twoPrevQuote) + 1) { fillInMention(currQuote, getMentionData(twoPrevQuote), sieveName); } } } }