TrigramSieve.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.quoteattribution.Sieves.QMSieves;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator;
import edu.stanford.nlp.quoteattribution.*;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * @author Grace Muzny
 */
public class TrigramSieve extends QMSieve {

  public TrigramSieve(Annotation doc, Map<String, List<Person>> characterMap,
                      Map<Integer, String> pronounCorefMap, Set<String> animacySet) {
    super(doc, characterMap, pronounCorefMap, animacySet, "");
  }

  public void doQuoteToMention(Annotation doc) {
    trigramPatterns(doc);
    oneSpeakerSentence(doc);
  }

  public void trigramPatterns(Annotation doc) {
    List<CoreLabel> docTokens = doc.get(CoreAnnotations.TokensAnnotation.class);
    List<CoreMap> docQuotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
    for(CoreMap quote : docQuotes) {
      if(quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null)
        continue;
      int quoteBeginTokenIndex = quote.get(CoreAnnotations.TokenBeginAnnotation.class);
      int quoteEndTokenIndex = quote.get(CoreAnnotations.TokenEndAnnotation.class);
      int quoteEndSentenceIndex = quote.get(CoreAnnotations.SentenceEndAnnotation.class);
      Pair<Integer, Integer> precedingTokenRange = QuoteAttributionUtils.getTokenRangePrecedingQuote(doc, quote);
      //get tokens before and after
      if(precedingTokenRange != null) {
        Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(precedingTokenRange);
        ArrayList<String> names = namesAndNameIndices.first;
        ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;


        if (names.size() > 0) {
          int offset = 0;
          if(beforeQuotePunctuation.contains(docTokens.get(quoteBeginTokenIndex - 1).word())) {
            offset = 1;
          }
          Pair<Integer, Integer> lastNameIndex = nameIndices.get(nameIndices.size() - 1);
          CoreLabel prevToken = docTokens.get(quoteBeginTokenIndex - 1 - offset);


          //CVQ
          if (prevToken.tag().startsWith("V")  // verb!
                  && lastNameIndex.second.equals(quoteBeginTokenIndex - 2 - offset)) {
            fillInMention(quote, names.get(names.size() - 1), lastNameIndex.first, lastNameIndex.second, "trigram CVQ", NAME);
            continue;
          }
          //VCQ
          if (lastNameIndex.second.equals(quoteBeginTokenIndex - 1 - offset)) {
            CoreLabel secondPrevToken = docTokens.get(lastNameIndex.first - 1);
            if(secondPrevToken.tag().startsWith("V")) {
              fillInMention(quote, names.get(names.size() - 1), lastNameIndex.first, lastNameIndex.second, "trigram VCQ", NAME);
              continue;
            }
          }
        }
        ArrayList<Integer> pronounsIndices = scanForPronouns(precedingTokenRange);
        if (pronounsIndices.size() > 0) {

          int offset = 0;
          if(beforeQuotePunctuation.contains(docTokens.get(quoteBeginTokenIndex - 1).word())) {
            offset = 1;
          }

          CoreLabel prevToken = docTokens.get(quoteBeginTokenIndex - 1 - offset);
          int lastPronounIndex = pronounsIndices.get(pronounsIndices.size() - 1);
          //PVQ
          if (prevToken.tag().startsWith("V")  // verb!
                  && lastPronounIndex == quoteBeginTokenIndex - 2 - offset) {
            fillInMention(quote, tokenRangeToString(lastPronounIndex), lastPronounIndex, lastPronounIndex, "trigram PVQ", PRONOUN);
            continue;
          }
          //VPQ
          if (lastPronounIndex == quoteBeginTokenIndex - 1 - offset
                  && docTokens.get(quoteBeginTokenIndex - 2 - offset).tag().startsWith("V")) {
            fillInMention(quote, tokenRangeToString(lastPronounIndex), lastPronounIndex, lastPronounIndex, "trigram VPQ", PRONOUN);
            continue;
          }
        }
      }

      Pair<Integer, Integer> followingTokenRange = QuoteAttributionUtils.getTokenRangeFollowingQuote(doc, quote);
      if(followingTokenRange != null) {
        Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(followingTokenRange);
        ArrayList<String> names = namesAndNameIndices.first;
        ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;

        if (names.size() > 0) {
          Pair<Integer, Integer> firstNameIndex = nameIndices.get(0);
          CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1);
          //QVC
          if (nextToken.tag().startsWith("V")  // verb!
                  && firstNameIndex.first.equals(quoteEndTokenIndex + 2)) {
            fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QVC", NAME);
            continue;
          }
          //QCV
          if (firstNameIndex.first.equals(quoteEndTokenIndex + 1)) {
            CoreLabel secondNextToken = docTokens.get(firstNameIndex.second + 1);
            if(secondNextToken.tag().startsWith("V")) {
              fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QCV", NAME);
              continue;
            }
          }
        }
        ArrayList<Integer> pronounsIndices = scanForPronouns(followingTokenRange);
        if (pronounsIndices.size() > 0) {
          CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1);
          int firstPronounIndex = pronounsIndices.get(0);
          //QVP
          if (nextToken.tag().startsWith("V")  // verb!
                  && firstPronounIndex == quoteEndTokenIndex + 2) {
            fillInMention(quote, tokenRangeToString(pronounsIndices.get(0)), firstPronounIndex, firstPronounIndex, "trigram QVP", PRONOUN);
            continue;
          }
          //QPV
          if (firstPronounIndex == quoteEndTokenIndex + 1
                  && docTokens.get(quoteEndTokenIndex + 2).tag().startsWith("V")) {
            fillInMention(quote, tokenRangeToString(pronounsIndices.get(pronounsIndices.size() - 1)), firstPronounIndex,
                    firstPronounIndex, "trigram QPV", PRONOUN);
            continue;
          }
        }
      }
    }
  }
}