//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.gazetteer; import static org.junit.Assert.assertEquals; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.resource.ExternalResourceDescription; import org.bson.Document; import org.junit.Test; import com.google.common.collect.Lists; import com.mongodb.util.JSON; import uk.gov.dstl.baleen.annotators.gazetteer.MongoRegex; import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase; import uk.gov.dstl.baleen.resources.SharedFongoResource; import uk.gov.dstl.baleen.types.semantic.Location; import uk.gov.dstl.baleen.types.semantic.ReferenceTarget; public class MongoRegexTest extends AnnotatorTestBase{ private static final String COLLECTION = "collection"; private static final String LONDON_REGEX = "\\blon\\w*\\b"; private static final String REGEX = "regex"; private static final String LOCATION = "Location"; private static final String TYPE = "type"; private static final String FONGO_COLLECTION = "fongo.collection"; private static final String FONGO_DATA = "fongo.data"; private static final String MONGO = "mongo"; private static final String TEXT = "Hello world, this is a test. Hello London, this is a test."; private static final String VALUE = "value"; private static final String MONGO_COLL = "baleen_testing_MongoRadixTreeGazetteerTest"; private static final List<Document> GAZ_DATA = Lists.newArrayList( new Document(VALUE, new String[]{"world", "earth", "planet"}), new Document(VALUE, new String[]{"london", "londres"}).append("geoJson","Property_Test"), new Document(VALUE, new String[]{"madrid"})); @Test public void test() throws Exception{ ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, JSON.serialize(GAZ_DATA)); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoRegex.class, MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION, "caseSensitive", true, REGEX, LONDON_REGEX); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText(TEXT); ae.process(jCas); assertEquals(0, JCasUtil.select(jCas, Location.class).size()); ae.destroy(); } @Test public void testProperty() throws Exception{ ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, JSON.serialize(GAZ_DATA)); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoRegex.class, MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION, REGEX, LONDON_REGEX); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText(TEXT); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London", l.getValue()); assertEquals("London", l.getCoveredText()); assertEquals("Property_Test", l.getGeoJson()); ae.destroy(); } @Test public void testCoref() throws Exception{ ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, JSON.serialize(GAZ_DATA)); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoRegex.class, MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION, REGEX, "\\b[A-Z][a-z]*\\b"); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText("Hello World, Hello Earth"); ae.process(jCas); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l1 = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("World", l1.getValue()); assertEquals("World", l1.getCoveredText()); assertEquals(rt, l1.getReferent()); Location l2 = JCasUtil.selectByIndex(jCas, Location.class, 1); assertEquals("Earth", l2.getValue()); assertEquals("Earth", l2.getCoveredText()); assertEquals(rt, l2.getReferent()); ae.destroy(); } }