package edu.stanford.nlp.pipeline;
import junit.framework.TestCase;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
public class ArabicSegmenterAnnotatorITest extends TestCase {
StanfordCoreNLP pipeline = null;
@Override
public void setUp()
throws Exception
{
if (pipeline != null) {
return;
}
Properties props = new Properties();
props.setProperty("annotators", "segment");
props.setProperty("customAnnotatorClass.segment", "edu.stanford.nlp.pipeline.ArabicSegmenterAnnotator");
props.setProperty("segment.model", "/u/nlp/data/arabic-segmenter/arabic-segmenter-atb+bn+arztrain.ser.gz");
pipeline = new StanfordCoreNLP(props);
}
public void testPipeline() {
String query = "وما هي كلمتُك المفضلة للدراسة؟";
String[] expectedWords = {"و", "ما", "هي", "كلمة", "ك", "المفضلة", "ل", "الدراسة", "?"};
int[] expectedStartPositions = {0, 1, 4, 7, 12, 14, 22, 23, 29};
int[] expectedEndPositions = {1, 3, 6, 11, 13, 21, 23, 29, 30};
Annotation annotation = new Annotation(query);
pipeline.annotate(annotation);
List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
assertEquals(expectedWords.length, tokens.size());
for (int i = 0; i < expectedWords.length; ++i) {
assertEquals(expectedWords[i], tokens.get(i).word());
assertEquals(expectedStartPositions[i], tokens.get(i).beginPosition());
assertEquals(expectedEndPositions[i], tokens.get(i).endPosition());
}
}
}