//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.helpers;
import static org.junit.Assert.assertEquals;
import java.util.Collections;
import java.util.regex.Matcher;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ExternalResourceDescription;
import org.junit.Test;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.language.OpenNLP;
import uk.gov.dstl.baleen.annotators.regex.helpers.AbstractRegexNPAnnotator;
import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.resources.SharedOpenNLPModel;
import uk.gov.dstl.baleen.types.common.Person;
public class AbstractRegexNPAnnotatorTest extends AnnotatorTestBase{
@Test
public void testAllCapitals() throws Exception{
AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class);
jCas.setDocumentText("PERSON JOHN SMITH WAS SEEN ENTERING THE WAREHOUSE");
ae.process(jCas);
assertEquals(0, JCasUtil.select(jCas, Person.class).size());
}
@Test
public void testNoChunks() throws Exception{
AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class);
jCas.setDocumentText("PERSON JOHN SMITH was seen entering the warehouse");
ae.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Person.class).size());
assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue());
}
@Test
public void testChunks() throws Exception{
ExternalResourceDescription tokensDesc = ExternalResourceFactory.createExternalResourceDescription("tokens", SharedOpenNLPModel.class);
ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createExternalResourceDescription("sentences", SharedOpenNLPModel.class);
ExternalResourceDescription posDesc = ExternalResourceFactory.createExternalResourceDescription("posTags", SharedOpenNLPModel.class);
ExternalResourceDescription chunksDesc = ExternalResourceFactory.createExternalResourceDescription("phraseChunks", SharedOpenNLPModel.class);
AnalysisEngineDescription descNLP = AnalysisEngineFactory.createEngineDescription(OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc);
AnalysisEngine aeNLP = AnalysisEngineFactory.createEngine(descNLP);
AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class);
jCas.setDocumentText("PERSON JOHN SMITH WAS SEEN ENTERING THE WAREHOUSE");
aeNLP.process(jCas);
ae.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Person.class).size());
assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue());
}
public static class TestAnnotator extends AbstractRegexNPAnnotator<Person>{
public TestAnnotator(){
super("PERSON ([ A-Z]*[A-Z])", 1, true, 1.0);
}
@Override
protected Person create(JCas jCas, Matcher matcher) {
return new Person(jCas);
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(Person.class));
}
}
}