package edu.stanford.nlp.international.spanish;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import junit.framework.TestCase;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
/**
* @see edu.stanford.nlp.pipeline.TokenizerAnnotatorTest
*
* But, for Spanish (an itest because it relies on a model)
*
* @author Gabor Angeli
*/
public class SpanishTokenizerAnnotatorITest extends TestCase {
private static List<String> spanishTokens = Arrays.asList(
"Da",
"me",
"lo");
public void testSpanish() {
Annotation ann = new Annotation("Damelo");
Properties props = new Properties();
props.setProperty("annotators", "tokenize");
props.setProperty("tokenize.language", "es");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(ann);
Iterator<String> it = spanishTokens.iterator();
for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
}
assertFalse("Too few tokens in new CoreLabel usage", it.hasNext());
}
}