GenderAnnotator.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.pipeline; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.ie.regexp.RegexNERSequenceClassifier;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;

/**
 * This class adds gender information (MALE / FEMALE) to tokens as GenderAnnotations. It uses the
 * RegexNERSequenceClassifier and a manual mapping from token text to gender labels. Assumes
 * that the Annotation has already been split into sentences, then tokenized into Lists of CoreLabels.
 *
 * @author jtibs
 */

public class GenderAnnotator implements Annotator  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(GenderAnnotator.class);

  private final RegexNERSequenceClassifier classifier;
  private final boolean verbose;

  public GenderAnnotator() {
    this(false, DefaultPaths.DEFAULT_GENDER_FIRST_NAMES);
  }

  public GenderAnnotator(boolean verbose, String mapping) {
    classifier = new RegexNERSequenceClassifier(mapping, true, true);
    this.verbose = verbose;
  }

  public void annotate(Annotation annotation) {
    if (verbose) {
      log.info("Adding gender annotation...");
    }

    if (! annotation.containsKey(CoreAnnotations.SentencesAnnotation.class))
      throw new RuntimeException("Unable to find sentences in " + annotation);

    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      classifier.classify(tokens);

      for (CoreLabel token : tokens) {
        token.set(MachineReadingAnnotations.GenderAnnotation.class, token.get(CoreAnnotations.AnswerAnnotation.class));
      }
    }
  }


  @Override
  public Set<Class<? extends CoreAnnotation>> requires() {
    return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
        CoreAnnotations.TextAnnotation.class,
        CoreAnnotations.TokensAnnotation.class,
        CoreAnnotations.SentencesAnnotation.class,
        CoreAnnotations.NamedEntityTagAnnotation.class
    )));
  }

  @Override
  public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
    return Collections.singleton(MachineReadingAnnotations.GenderAnnotation.class);
  }

}