SpanishTokenizerAnnotatorITest.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.international.spanish;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import junit.framework.TestCase;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

/**
 * @see edu.stanford.nlp.pipeline.TokenizerAnnotatorTest
 *
 * But, for Spanish (an itest because it relies on a model)
 *
 * @author Gabor Angeli
 */
public class SpanishTokenizerAnnotatorITest extends TestCase {

  private static List<String> spanishTokens = Arrays.asList(
      "Da",
      "me",
      "lo");

  public void testSpanish() {
    Annotation ann = new Annotation("Damelo");
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize");
    props.setProperty("tokenize.language", "es");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);

    Iterator<String> it = spanishTokens.iterator();
    for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
      assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
    }
    assertFalse("Too few tokens in new CoreLabel usage", it.hasNext());
  }
}