//Dstl (c) Crown Copyright 2017 // Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.collectionreaders; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.resource.ExternalResourceDescription; import org.bson.Document; import org.junit.Test; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import uk.gov.dstl.baleen.collectionreaders.testing.AbstractReaderTest; import uk.gov.dstl.baleen.resources.SharedFongoResource; import uk.gov.dstl.baleen.types.metadata.Metadata; import uk.gov.dstl.baleen.uima.BaleenCollectionReader; public class MongoReaderTest extends AbstractReaderTest { private static final String MONGO = "mongo"; private static final String TEXT = "Hello Metadata"; private static final String CONTENT = "content"; private static final String COLLECTION = "documents"; private ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(MONGO, SharedFongoResource.class); public MongoReaderTest() { super(MongoReader.class); } @Test public void test() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MONGO, erd, "collection", COLLECTION, "idField", "_id", "contentField", CONTENT, "contentExtractor", "UimaContentExtractor"); bcr.initialize(); SharedFongoResource sfr = (SharedFongoResource) bcr.getUimaContext().getResourceObject(MONGO); createContent(sfr); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); assertEquals("Hello World", jCas.getDocumentText().trim()); assertEquals(1, JCasUtil.select(jCas, Metadata.class).size()); jCas.reset(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); assertEquals("Hello Test", jCas.getDocumentText().trim()); assertEquals(1, JCasUtil.select(jCas, Metadata.class).size()); jCas.reset(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); assertEquals(TEXT, jCas.getDocumentText().trim()); assertEquals(5, JCasUtil.select(jCas, Metadata.class).size()); List<Metadata> metadata = JCasUtil.select(jCas, Metadata.class).stream() .filter(m -> !m.getKey().equalsIgnoreCase("baleen:content-extractor")) .sorted((a, b) -> a.getKey().compareTo(b.getKey())) .collect(Collectors.toList()); assertEquals("key1", metadata.get(0).getKey()); assertEquals("key2", metadata.get(1).getKey()); assertEquals("key3", metadata.get(2).getKey()); assertEquals("key3", metadata.get(3).getKey()); assertEquals("foo", metadata.get(0).getValue()); assertEquals("bar", metadata.get(1).getValue()); assertEquals("howdy", metadata.get(3).getValue()); assertEquals("hey", metadata.get(2).getValue()); jCas.reset(); assertFalse(bcr.doHasNext()); bcr.close(); } private void createContent(SharedFongoResource sfr) { MongoDatabase db = sfr.getDB(); MongoCollection<Document> coll = db.getCollection(COLLECTION); coll.insertMany(Arrays.asList( new Document(CONTENT, "Hello World"), new Document(CONTENT, "Hello Test"), new Document(CONTENT, TEXT).append("key1", "foo").append("key2", "bar").append("key3", Arrays.asList("howdy", "hey")))); } }