TimexTreeAnnotator.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.time;

import java.util.*;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Iterables;

public class TimexTreeAnnotator implements Annotator {
	
  public static enum MatchType {ExactMatch, SmallestEnclosing}
  
  private MatchType matchType;
  
  public TimexTreeAnnotator(MatchType matchType) {
    this.matchType = matchType;
  }
  
  public void annotate(Annotation document) {
    for (CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) {
      final List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
      tree.indexSpans(0);
      
      // add a tree to each timex annotation
      for (CoreMap timexAnn: sentence.get(TimeAnnotations.TimexAnnotations.class)) {
        Tree subtree;
        final int timexBegin = beginOffset(timexAnn);
        final int timexEnd = endOffset(timexAnn);
        Iterable<Tree> possibleMatches;
        switch (this.matchType) {
          
          // only use trees that match exactly
        case ExactMatch:
          possibleMatches = Iterables.filter(tree, tree1 -> {
            int treeBegin = beginOffset(tree, tokens);
            int treeEnd = endOffset(tree, tokens);
            return treeBegin == timexBegin && timexEnd == treeEnd;
          });
          Iterator<Tree> treeIter = possibleMatches.iterator();
          subtree = treeIter.hasNext() ? treeIter.next() : null;
          break;
          
          // select the smallest enclosing tree
        case SmallestEnclosing:
          possibleMatches = Iterables.filter(tree, tree1 -> {
            int treeBegin = beginOffset(tree, tokens);
            int treeEnd = endOffset(tree, tokens);
            return treeBegin <= timexBegin && timexEnd <= treeEnd;
          });
          List<Tree> sortedMatches = CollectionUtils.toList(possibleMatches);
          Collections.sort(sortedMatches, (tree1, tree2) -> {
            Integer width1 = endOffset(tree1, tokens) - beginOffset(tree1, tokens);
            Integer width2 = endOffset(tree2, tokens) - endOffset(tree2, tokens);
            return width1.compareTo(width2);
          });
          subtree = sortedMatches.get(0);
          break;
          
          // more cases could go here if they're added
        default:
          throw new RuntimeException("unexpected match type");
        }
  	
        // add the subtree to the time annotation
        if (subtree != null) {
          timexAnn.set(TreeCoreAnnotations.TreeAnnotation.class, subtree);
        }
      }
    }
  }
  
  private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }
  
  private static int endOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int endToken = label.get(CoreAnnotations.EndIndexAnnotation.class);
    if (endToken > tokens.size()) {
      String msg = "no token %d in tree:\n%s\ntokens:\n%s";
      throw new RuntimeException(String.format(msg, endToken - 1, tree, tokens));
    }
    return endOffset(tokens.get(endToken - 1));
  }
  
  private static int beginOffset(CoreMap map) {
    return map.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
  }
  
  private static int endOffset(CoreMap map) {
    return map.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
  }

  @Override
  public Set<Class<? extends CoreAnnotation>> requires() {
    return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
        CoreAnnotations.TextAnnotation.class,
        CoreAnnotations.TokensAnnotation.class,
        CoreAnnotations.CharacterOffsetBeginAnnotation.class,
        CoreAnnotations.CharacterOffsetEndAnnotation.class,
        CoreAnnotations.SentencesAnnotation.class
    )));
  }

  @Override
  public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
    // TODO: not sure what goes here
    return Collections.emptySet();
  }
}