package edu.stanford.nlp.pipeline;
import hu.u_szeged.utils.Stopword;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Timing;
public class StopWordAnnotator implements Annotator {
private Timing timer;
private boolean verbose;
public static Stopword stopWord;
public StopWordAnnotator() {
this(false);
}
public StopWordAnnotator(boolean verbose) {
stopWord = new Stopword();
timer = new Timing();
this.verbose = verbose;
}
@Override
public void annotate(Annotation annotation) {
if (verbose) {
timer.start();
System.err.print("Adding normalized token annotation...");
}
if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
token.set(StopWordAnnotation.class, stopWord.isStopword(token));
}
}
} else {
throw new RuntimeException("unable to find words/tokens in: " + annotation);
}
if (verbose)
timer.stop("done.");
}
public static class StopWordAnnotation implements CoreAnnotation<Boolean> {
public Class<Boolean> getType() {
return Boolean.class;
}
}
@Override
public Set<Requirement> requires() {
return Collections.singleton(TOKENIZE_REQUIREMENT);
}
@Override
public Set<Requirement> requirementsSatisfied() {
return Collections.singleton(TOKENIZE_REQUIREMENT);
}
}