//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.language; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; import org.apache.uima.UIMAException; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.cas.FSArray; import org.apache.uima.resource.ExternalResourceDescription; import org.apache.uima.resource.ResourceInitializationException; import org.junit.Test; import uk.gov.dstl.baleen.annotators.language.WordNetLemmatizer; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; import uk.gov.dstl.baleen.resources.SharedWordNetResource; import uk.gov.dstl.baleen.types.language.WordLemma; import uk.gov.dstl.baleen.types.language.WordToken; public class WordNetLemmatizerTest extends AbstractAnnotatorTest { private final ExternalResourceDescription wordnetErd; public WordNetLemmatizerTest() { super(WordNetLemmatizer.class); wordnetErd = ExternalResourceFactory.createExternalResourceDescription("wordnet", SharedWordNetResource.class); } @Test public void test() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); // Add an another with an lemma already final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(t.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); assertEquals(existingLemma, out.get(1).getLemmas(0)); } }