StopWordAnnotator.java example

Explorer

kpe-master
- src
  - edu
    - stanford
      - nlp
        pipeline
        HunTokenizerAnnotator.java
        MweDictAnnotator.java
        MyCleanXmlAnnotator.java
        NormalizerAnnotator.java
        OwnMorphaAnnotator.java
        OwnPOSTaggerAnnotator.java
        StopWordAnnotator.java
        SzTEAnnotationPipeline.java
        SzTECoreNLP.java
        process
        HunPTBLexer.java
        HunTokenizer.java
        tagger
        maxent
        OwnMaxentTagger.java
        OwnTestSentence.java
  - hu
    - u_szeged

package edu.stanford.nlp.pipeline;

import hu.u_szeged.utils.Stopword;

import java.util.Collections;
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Timing;

public class StopWordAnnotator implements Annotator {

  private Timing timer;
  private boolean verbose;
  public static Stopword stopWord;

  public StopWordAnnotator() {
    this(false);
  }

  public StopWordAnnotator(boolean verbose) {
    stopWord = new Stopword();
    timer = new Timing();
    this.verbose = verbose;
  }

  @Override
  public void annotate(Annotation annotation) {
    if (verbose) {
      timer.start();
      System.err.print("Adding normalized token annotation...");
    }

    if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
      List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
      for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (int i = 0; i < tokens.size(); i++) {
          CoreLabel token = tokens.get(i);
          token.set(StopWordAnnotation.class, stopWord.isStopword(token));
        }
      }
    } else {
      throw new RuntimeException("unable to find words/tokens in: " + annotation);
    }

    if (verbose)
      timer.stop("done.");
  }

  public static class StopWordAnnotation implements CoreAnnotation<Boolean> {
    public Class<Boolean> getType() {
      return Boolean.class;
    }
  }

  @Override
  public Set<Requirement> requires() {
    return Collections.singleton(TOKENIZE_REQUIREMENT);
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    return Collections.singleton(TOKENIZE_REQUIREMENT);
  }
}