package edu.stanford.nlp.pipeline; import hu.u_szeged.utils.stemmer.PorterStemmer; import hu.u_szeged.utils.stemmer.Stemmer; import java.util.Collections; import java.util.List; import java.util.Set; import edu.stanford.nlp.ling.CoreAnnotation; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation; import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.util.ArraySet; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Timing; public class NormalizerAnnotator implements Annotator { private Timing timer; private boolean verbose; private static final Stemmer stemmer = new PorterStemmer(); public NormalizerAnnotator() { this(false); } public NormalizerAnnotator(boolean verbose) { timer = new Timing(); this.verbose = verbose; } @Override public void annotate(Annotation annotation) { if (verbose) { timer.start(); System.err.print("Adding normalized token annotation..."); } if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) { List<CoreMap> sentences = annotation.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); for (int i = 0; i < tokens.size(); i++) { CoreLabel token = tokens.get(i); String text = token.get(TextAnnotation.class); String lemma = token.get(LemmaAnnotation.class); this.addLemma(NormalizerAnnotation.class, token, lemma, text); } } } else { throw new RuntimeException("unable to find words/tokens in: " + annotation); } if (verbose) timer.stop("done."); } private void addLemma(Class<? extends CoreAnnotation<String>> ann, CoreMap map, String lemma, String text) { map.set(ann, stemmer.stemString(lemma == null ? text : lemma)); } public static class NormalizerAnnotation implements CoreAnnotation<String> { public Class<String> getType() { return String.class; } } @Override public Set<Requirement> requires() { return Collections.unmodifiableSet(new ArraySet<Requirement>()); } @Override public Set<Requirement> requirementsSatisfied() { return Collections.singleton(TOKENIZE_REQUIREMENT); } }