//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.stats; import static org.junit.Assert.assertEquals; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.resource.ResourceInitializationException; import org.junit.Test; import uk.gov.dstl.baleen.annotators.stats.DocumentLanguage; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; /** * */ public class DocumentLanguageTest extends AbstractAnnotatorTest{ public DocumentLanguageTest() { super(DocumentLanguage.class); } @Test public void testEN() throws Exception{ jCas.setDocumentText("Hello, my name is Andrew. I come from London, but I live in Salisbury. I studied Physics at university, and I play the trumpet."); int i = 0; while(i < 5 && !"en".equals(jCas.getDocumentLanguage())){ //Loop because on short pieces of text the DocumentLanguage sampling can give x-unspecified. processJCas(); } assertEquals("en", jCas.getDocumentLanguage()); } @Test public void testDE() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText("Hallo, mein Name ist Andrew. Ich komme aus London, aber ich lebe in Salisbury. Ich studierte Physik an der Universität, und ich spiele die Trompete."); int i = 0; while(i < 5 && !"de".equals(jCas.getDocumentLanguage())){ //Loop because on short pieces of text the DocumentLanguage sampling can give x-unspecified. processJCas(); } assertEquals("de", jCas.getDocumentLanguage()); } @Test public void testNoneSuch() throws AnalysisEngineProcessException, ResourceInitializationException { // not any language... jCas.setDocumentText("sdrwkcb s't dn, slwv ylrtn s nctns sht."); processJCas(); assertEquals("x-unspecified", jCas.getDocumentLanguage()); } }