//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.regex; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.resource.ResourceInitializationException; import org.junit.Test; import uk.gov.dstl.baleen.annotators.regex.Custom; import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase; import uk.gov.dstl.baleen.types.common.Person; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.types.semantic.Relation; public class CustomTest extends AnnotatorTestBase { private static final String UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON = "uk.gov.dstl.baleen.types.common.Person"; private static final String DIGIT_REGEX = "P[0-9]+"; private static final String P456 = "p456"; private static final String P123 = "P123"; private static final String TEXT = "P123 was seen speaking to p456"; @Test public void testMissingType() throws Exception{ AnalysisEngine regexAE = AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); jCas.setDocumentText(TEXT); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity e1 = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertNotNull(e1); assertEquals(P123, e1.getCoveredText()); assertEquals(P123, e1.getValue()); regexAE.destroy(); } @Test public void testBadTypes() throws Exception{ try{ AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, "this.is.not.a.type", Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); fail("Expected exception not thrown"); }catch(ResourceInitializationException e){ // Expected exception } try{ AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, Custom.class.getName(), Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); fail("Expected exception not thrown"); }catch(ResourceInitializationException e){ // Expected exception } try{ AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, Relation.class.getName(), Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); fail("Expected exception not thrown"); }catch(ResourceInitializationException e){ // Expected exception } } @Test public void testCaseInsensitive() throws Exception{ AnalysisEngine regexAE = AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON, Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, false); jCas.setDocumentText(TEXT); regexAE.process(jCas); assertEquals(2, JCasUtil.select(jCas, Person.class).size()); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); assertNotNull(p1); assertEquals(P123, p1.getCoveredText()); assertEquals(P123, p1.getValue()); Person p2 = JCasUtil.selectByIndex(jCas, Person.class, 1); assertNotNull(p2); assertEquals(P456, p2.getCoveredText()); assertEquals(P456, p2.getValue()); regexAE.destroy(); } @Test public void testCaseSensitive() throws Exception{ AnalysisEngine regexAE = AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON, Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); jCas.setDocumentText(TEXT); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); assertNotNull(p1); assertEquals(P123, p1.getCoveredText()); assertEquals(P123, p1.getValue()); regexAE.destroy(); } @Test public void testPatternGroup() throws Exception{ AnalysisEngine regexAE = AnalysisEngineFactory.createEngine(Custom.class, Custom.PARAM_TYPE, UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON, Custom.PARAM_PATTERN, "\\b[A-Z][a-z]+\\s+([A-Z]+)\\b", Custom.PARAM_CASE_SENSITIVE, true, Custom.PARAM_GROUP, "1"); jCas.setDocumentText("John SMITH was seen speaking to p456"); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); assertNotNull(p1); assertEquals("John SMITH", p1.getCoveredText()); assertEquals("SMITH", p1.getValue()); regexAE.destroy(); } }