//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.gazetteer; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.List; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.cas.StringArray; import org.apache.uima.resource.ExternalResourceDescription; import org.bson.Document; import org.junit.Test; import com.google.common.collect.Lists; import com.mongodb.util.JSON; import uk.gov.dstl.baleen.annotators.gazetteer.Mongo; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; import uk.gov.dstl.baleen.resources.SharedFongoResource; import uk.gov.dstl.baleen.types.common.Buzzword; import uk.gov.dstl.baleen.types.semantic.Location; import uk.gov.dstl.baleen.types.semantic.ReferenceTarget; public class MongoTest extends AbstractAnnotatorTest{ private static final String WORLD = "world"; private static final String LOCATION = "Location"; private static final String TYPE = "type"; private static final String COLLECTION = "collection"; private static final String FONGO_DATA = "fongo.data"; private static final String FONGO_COLLECTION = "fongo.collection"; private static final String MONGO = "mongo"; private static final String VALUE = "value"; private static final String MONGO_COLL = "baleen_testing_MongoGazetteerTest"; private static final Document LONDON_GEOJSON = new Document(TYPE, "Point").append("coordinates", Arrays.asList(-0.1275, 51.5072)); private static final List<Document> GAZ_DATA = Lists.newArrayList( new Document(VALUE, new String[]{WORLD, "earth", "planet"}), new Document(VALUE, new String[]{"london", "londres"}).append("geoJson", LONDON_GEOJSON), new Document(VALUE, new String[]{"madrid"}).append("geoJson", "Property Test"), new Document(VALUE, new String[]{"sydney (australia"}).append("tags", Arrays.asList("broken_regex"))); private final ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, JSON.serialize(GAZ_DATA)); public MongoTest() { super(Mongo.class); } @Test public void test() throws Exception{ jCas.setDocumentText("Hello world, this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(WORLD, l.getValue()); assertEquals(WORLD, l.getCoveredText()); } @Test public void testRegex() throws Exception{ jCas.setDocumentText("Hello Sydney (Australia), this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("Sydney (Australia", l.getValue()); assertEquals("Sydney (Australia", l.getCoveredText()); } @Test public void testMidword() throws Exception{ jCas.setDocumentText("HelloWorld"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(0, JCasUtil.select(jCas, Location.class).size()); } @Test public void testProperty() throws Exception{ jCas.setDocumentText("Hello London, this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location lLon = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London", lLon.getValue()); assertEquals("London", lLon.getCoveredText()); assertEquals(LONDON_GEOJSON.toJson(), lLon.getGeoJson()); jCas.reset(); jCas.setDocumentText("Hello Madrid, this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location lMad = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("Madrid", lMad.getValue()); assertEquals("Madrid", lMad.getCoveredText()); assertEquals("Property Test", lMad.getGeoJson()); } @Test public void testBuzzwordProperty() throws Exception{ jCas.setDocumentText("Hello Sydney (Australia), this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, "Buzzword"); assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("Sydney (Australia", b.getValue()); assertEquals("Sydney (Australia", b.getCoveredText()); StringArray tags = b.getTags(); assertEquals(1, tags.size()); assertEquals("broken_regex", tags.get(0)); } @Test public void testCoref() throws Exception{ jCas.setDocumentText("Hello World, Hello Earth"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l1 = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("World", l1.getValue()); assertEquals("World", l1.getCoveredText()); assertEquals(rt, l1.getReferent()); Location l2 = JCasUtil.selectByIndex(jCas, Location.class, 1); assertEquals("Earth", l2.getValue()); assertEquals("Earth", l2.getCoveredText()); assertEquals(rt, l2.getReferent()); } }