ConversationalSieve.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.quoteattribution.Sieves.QMSieves;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator;
import edu.stanford.nlp.quoteattribution.*;
import edu.stanford.nlp.quoteattribution.Sieves.Sieve;
import edu.stanford.nlp.util.CoreMap;

import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Created by mjfang on 7/7/16.
 */
public class ConversationalSieve extends QMSieve {

  public ConversationalSieve(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacySet) {
    super(doc, characterMap, pronounCorefMap, animacySet, "conv");
  }

  //attribute conversational mentions: assign the mention to the same quote as the
  //if quote X has not been labelled, has no add'l text, and quote X-2 has been labelled, and quotes X-2, X-1, and X are consecutive in paragraph,
  //and X-1's quote does not refer to a name:
  //give quote X the same mention as X-2.
  public void doQuoteToMention(Annotation doc) {
    List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
    List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
    List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
    for(int index = 2; index < quotes.size(); index++) {
      CoreMap currQuote = quotes.get(index);
      CoreMap prevQuote = quotes.get(index - 1);
      CoreMap twoPrevQuote = quotes.get(index - 2);

      int twoPrevPara = getQuoteParagraph(twoPrevQuote);
      //default to first in quote that begins n-2
      for(int i = index-3; i >= 0; i--)
      {
        if(getQuoteParagraph(quotes.get(i)) == twoPrevPara) {
          twoPrevQuote = quotes.get(i);
        }
        else
          break;
      }
      int tokenBeginIdx = currQuote.get(CoreAnnotations.TokenBeginAnnotation.class);
      int tokenEndIdx = currQuote.get(CoreAnnotations.TokenEndAnnotation.class);
      CoreMap currQuoteBeginSentence = sentences.get(currQuote.get(CoreAnnotations.SentenceBeginAnnotation.class));
      boolean isAloneInParagraph = true;
      if(tokenBeginIdx > 0) {
        CoreLabel prevToken = tokens.get(tokenBeginIdx - 1);
        CoreMap prevSentence = sentences.get(prevToken.get(CoreAnnotations.SentenceIndexAnnotation.class));
        if(prevSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class).equals(currQuoteBeginSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class))) {
          isAloneInParagraph = false;
        }
      }
      if(tokenEndIdx < tokens.size() - 1) {
        CoreLabel nextToken = tokens.get(tokenEndIdx + 1);
        CoreMap nextSentence = sentences.get(nextToken.get(CoreAnnotations.SentenceIndexAnnotation.class));
        if(nextSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class).equals(currQuoteBeginSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class))) {
          isAloneInParagraph = false;
        }
      }
      if(twoPrevQuote.get(QuoteAttributionAnnotator.MentionAnnotation.class) == null
              || !isAloneInParagraph
              || currQuote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null
              || twoPrevQuote.get(QuoteAttributionAnnotator.MentionTypeAnnotation.class).equals(Sieve.PRONOUN)) {
        continue;
      }
      if(getQuoteParagraph(currQuote) == getQuoteParagraph(prevQuote) + 1 && getQuoteParagraph(prevQuote) == getQuoteParagraph(twoPrevQuote) + 1) {
        fillInMention(currQuote, getMentionData(twoPrevQuote), sieveName);
      }
    }
  }
}