//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.gazetteer; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.List; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.resource.ExternalResourceDescription; import org.bson.Document; import org.junit.Test; import com.google.common.collect.Lists; import com.mongodb.util.JSON; import uk.gov.dstl.baleen.annotators.gazetteer.MongoStemming; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; import uk.gov.dstl.baleen.resources.SharedFongoResource; import uk.gov.dstl.baleen.types.common.Buzzword; import uk.gov.dstl.baleen.types.semantic.Location; import uk.gov.dstl.baleen.types.semantic.ReferenceTarget; public class MongoStemmingTest extends AbstractAnnotatorTest{ public MongoStemmingTest() { super(MongoStemming.class); } private static final String LOCATION = "Location"; private static final String BUZZWORD = "Buzzword"; private static final String TYPE = "type"; private static final String COLLECTION = "collection"; private static final String FONGO_DATA = "fongo.data"; private static final String FONGO_COLLECTION = "fongo.collection"; private static final String MONGO = "mongo"; private static final String VALUE = "value"; private static final String MONGO_COLL = "baleen_testing_MongoStemmingRadixTreeGazetteerTest"; private static final Document LONDON_GEOJSON = new Document(TYPE, "Point").append("coordinates", Arrays.asList(-0.1275, 51.5072)); private static final List<Document> GAZ_DATA = Lists.newArrayList( new Document(VALUE, new String[]{"conspiracy", "conspire", "scheme", "plot"}), new Document(VALUE, new String[]{"london", "londres"}).append("geoJson", LONDON_GEOJSON), new Document(VALUE, new String[]{"knight", "sir", "dame", "lady"}), new Document(VALUE, new String[]{"enter the room"})); private final ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, JSON.serialize(GAZ_DATA)); @Test public void test() throws Exception{ jCas.setDocumentText("Forty seven knights conspired against the crown."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(2, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("knights", b1.getValue()); assertEquals("knights", b1.getCoveredText()); Buzzword b2 = JCasUtil.selectByIndex(jCas, Buzzword.class, 1); assertEquals("conspired", b2.getValue()); assertEquals("conspired", b2.getCoveredText()); } @Test public void testMultipleWords() throws Exception{ jCas.setDocumentText("Bill and Ben entered the room on a dark and windy night."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("entered the room", b1.getValue()); assertEquals("entered the room", b1.getCoveredText()); } @Test public void testMidword() throws Exception{ jCas.setDocumentText("Desiring chocolate is not a sin"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(0, JCasUtil.select(jCas, Buzzword.class).size()); } @Test public void testProperty() throws Exception{ jCas.setDocumentText("Guy Fawkes was caught in London"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location lLon = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London", lLon.getValue()); assertEquals("London", lLon.getCoveredText()); assertEquals(LONDON_GEOJSON.toJson(), lLon.getGeoJson()); } @Test public void testCoref() throws Exception{ jCas.setDocumentText("Lords, ladies, sirs, and madames..."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(2, JCasUtil.select(jCas, Buzzword.class).size()); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("ladies", b1.getValue()); assertEquals("ladies", b1.getCoveredText()); assertEquals(rt, b1.getReferent()); Buzzword b2 = JCasUtil.selectByIndex(jCas, Buzzword.class, 1); assertEquals("sirs", b2.getValue()); assertEquals("sirs", b2.getCoveredText()); assertEquals(rt, b2.getReferent()); } }