//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.consumers; import java.util.Collections; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.jcas.cas.FSArray; import org.apache.uima.resource.ExternalResourceDescription; import org.apache.uima.resource.ResourceAccessException; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.impl.CustomResourceSpecifier_impl; import org.bson.Document; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import com.mongodb.client.MongoCollection; import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase; import uk.gov.dstl.baleen.resources.SharedFongoResource; import uk.gov.dstl.baleen.types.common.Person; import uk.gov.dstl.baleen.types.language.Pattern; import uk.gov.dstl.baleen.types.language.WordLemma; import uk.gov.dstl.baleen.types.language.WordToken; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.types.semantic.Location; public class MongoPatternSaverTest extends AnnotatorTestBase { private AnalysisEngine ae; private SharedFongoResource sfr; @Before public void setUp() throws ResourceInitializationException, ResourceAccessException { // Create a description of an external resource - a fongo instance, in the same way we would // have created a shared mongo resource final ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription( SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]"); // Create the analysis engine final AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoPatternSaver.class, MongoPatternSaver.KEY_MONGO, erd, "collection", "test"); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); sfr = (SharedFongoResource) ae.getUimaContext() .getResourceObject(MongoPatternSaver.KEY_MONGO); } @After public void tearDown() { if (ae != null) { ae.destroy(); } } @Test public void test() throws AnalysisEngineProcessException { jCas.setDocumentText("The cow jumps over the moon."); final Entity cow = new Person(jCas); cow.setBegin(4); cow.setEnd(7); cow.addToIndexes(jCas); final Entity moon = new Location(jCas); moon.setBegin(23); moon.setEnd(27); moon.addToIndexes(jCas); final WordToken jumps = new WordToken(jCas); jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); final WordLemma jumpLemma = new WordLemma(jCas); jumpLemma.setLemmaForm("jump"); jumps.setLemmas(new FSArray(jCas, 1)); jumps.setLemmas(0, jumpLemma); jumps.addToIndexes(); final Pattern pattern = new Pattern(jCas); pattern.setBegin(8); pattern.setBegin(22); pattern.setWords(new FSArray(jCas, 1)); pattern.setWords(0, jumps); pattern.setSource(cow); pattern.setTarget(moon); pattern.addToIndexes(); ae.process(jCas); final MongoCollection<Document> collection = sfr.getDB().getCollection("test"); Assert.assertEquals(1, collection.count()); final Document object = collection.find().first(); final Document source = (Document) object.get("source"); final Document target = (Document) object.get("target"); final List<?> words = (List<?>) object.get("words"); Assert.assertEquals("cow", source.get("text")); Assert.assertEquals("uk.gov.dstl.baleen.types.common.Person", source.get("type")); Assert.assertEquals("moon", target.get("text")); Assert.assertEquals("uk.gov.dstl.baleen.types.semantic.Location", target.get("type")); Assert.assertEquals(1, words.size()); final Document word = (Document) words.get(0); Assert.assertEquals("jumps", word.get("text")); Assert.assertEquals("VB", word.get("pos")); Assert.assertEquals("jump", word.get("lemma")); } @Test public void testNoLemmas() throws AnalysisEngineProcessException { jCas.setDocumentText("The cow jumps over the moon."); final Entity cow = new Person(jCas); cow.setBegin(4); cow.setEnd(7); cow.addToIndexes(jCas); final Entity moon = new Location(jCas); moon.setBegin(23); moon.setEnd(27); moon.addToIndexes(jCas); final WordToken jumps = new WordToken(jCas); jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); jumps.addToIndexes(); final Pattern pattern = new Pattern(jCas); pattern.setBegin(8); pattern.setBegin(22); pattern.setWords(new FSArray(jCas, 1)); pattern.setWords(0, jumps); pattern.setSource(cow); pattern.setTarget(moon); pattern.addToIndexes(); ae.process(jCas); final MongoCollection<Document> collection = sfr.getDB().getCollection("test"); Assert.assertEquals(1, collection.count()); final Document object = collection.find().first(); final Document source = (Document) object.get("source"); final Document target = (Document) object.get("target"); final List<?> words = (List<?>) object.get("words"); Assert.assertEquals("cow", source.get("text")); Assert.assertEquals("uk.gov.dstl.baleen.types.common.Person", source.get("type")); Assert.assertEquals("moon", target.get("text")); Assert.assertEquals("uk.gov.dstl.baleen.types.semantic.Location", target.get("type")); Assert.assertEquals(1, words.size()); final Document word = (Document) words.get(0); Assert.assertEquals("jumps", word.get("text")); Assert.assertEquals("VB", word.get("pos")); Assert.assertNull(word.get("lemma")); } }