package edu.stanford.nlp.pipeline;
import junit.framework.TestCase;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
public class ChineseSegmenterAnnotatorITest extends TestCase {
StanfordCoreNLP pipeline = null;
@Override
public void setUp()
throws Exception
{
if (pipeline != null) {
return;
}
Properties props = new Properties();
props.setProperty("annotators", "cseg");
props.setProperty("customAnnotatorClass.cseg", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");
props.setProperty("cseg.model", "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz");
props.setProperty("cseg.sighanCorporaDict", "/u/nlp/data/gale/segtool/stanford-seg/releasedata");
props.setProperty("cseg.serDictionary", "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz");
props.setProperty("cseg.sighanPostProcessing", "true");
pipeline = new StanfordCoreNLP(props);
}
public void testPipeline() {
String query = "你马上回来北京吗?";
String[] expectedWords = {"你", "马上", "回来", "北京", "吗", "?"};
int[] expectedPositions = {0, 1, 3, 5, 7, 8, 9};
Annotation annotation = new Annotation(query);
pipeline.annotate(annotation);
List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
assertEquals(expectedWords.length, tokens.size());
for (int i = 0; i < expectedWords.length; ++i) {
assertEquals(expectedWords[i], tokens.get(i).word());
assertEquals(expectedPositions[i], tokens.get(i).beginPosition());
assertEquals(expectedPositions[i+1], tokens.get(i).endPosition());
}
}
}