package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.ie.regexp.RegexNERSequenceClassifier;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
/**
* This class adds gender information (MALE / FEMALE) to tokens as GenderAnnotations. It uses the
* RegexNERSequenceClassifier and a manual mapping from token text to gender labels. Assumes
* that the Annotation has already been split into sentences, then tokenized into Lists of CoreLabels.
*
* @author jtibs
*/
public class GenderAnnotator implements Annotator {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(GenderAnnotator.class);
private final RegexNERSequenceClassifier classifier;
private final boolean verbose;
public GenderAnnotator() {
this(false, DefaultPaths.DEFAULT_GENDER_FIRST_NAMES);
}
public GenderAnnotator(boolean verbose, String mapping) {
classifier = new RegexNERSequenceClassifier(mapping, true, true);
this.verbose = verbose;
}
public void annotate(Annotation annotation) {
if (verbose) {
log.info("Adding gender annotation...");
}
if (! annotation.containsKey(CoreAnnotations.SentencesAnnotation.class))
throw new RuntimeException("Unable to find sentences in " + annotation);
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
classifier.classify(tokens);
for (CoreLabel token : tokens) {
token.set(MachineReadingAnnotations.GenderAnnotation.class, token.get(CoreAnnotations.AnswerAnnotation.class));
}
}
}
@Override
public Set<Class<? extends CoreAnnotation>> requires() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
CoreAnnotations.TextAnnotation.class,
CoreAnnotations.TokensAnnotation.class,
CoreAnnotations.SentencesAnnotation.class,
CoreAnnotations.NamedEntityTagAnnotation.class
)));
}
@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
return Collections.singleton(MachineReadingAnnotations.GenderAnnotation.class);
}
}