//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima; import static org.junit.Assert.assertEquals; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.uima.UIMAException; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; import org.junit.Before; import org.junit.Test; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.language.Text; import uk.gov.dstl.baleen.uima.data.TextBlock; public class BaleenTextAwareAnnotatorTest { private JCas noTextJCas; private JCas textJCas; @Before public void before() throws UIMAException { noTextJCas = JCasFactory.createJCas(); noTextJCas.setDocumentText("This is text and more text"); textJCas = JCasFactory.createJCas(); textJCas.setDocumentText("This is text and more text"); final Text text = new Text(textJCas, 8, 12); text.addToIndexes(); final Text moreText = new Text(textJCas, 17, 26); moreText.addToIndexes(); } @Test public void testProcessTextBlockIsCalledForEntireDoc() throws AnalysisEngineProcessException { final FakeTextAwareAnnotator annotator = createAnnotator(false); annotator.doProcess(noTextJCas); assertEquals(1, annotator.getCount()); } @Test public void testProcessTextBlockIsCalledForTextBlocks() throws AnalysisEngineProcessException { final FakeTextAwareAnnotator annotator = createAnnotator(false); annotator.doProcess(textJCas); assertEquals(2, annotator.getCount()); } @Test public void testGetTextInBlocksForTextBlocks() { final FakeTextAwareAnnotator annotator = createAnnotator(false); final String s = annotator.getTextInTextBlocks(textJCas); assertEquals("text\n\nmore text", s); } @Test public void testGetTextInBlocksForEntireDoc() { final FakeTextAwareAnnotator annotator = createAnnotator(false); final String s = annotator.getTextInTextBlocks(noTextJCas); assertEquals("This is text and more text", s); } @Test public void testGetTextInBlocksWhenHasBlocksButWholeDocParamIsSet() { final FakeTextAwareAnnotator annotator = createAnnotator(true); final String s = annotator.getTextInTextBlocks(textJCas); assertEquals("This is text and more text", s); } @Test public void testGetTextBlockForTextBlocks() { final FakeTextAwareAnnotator annotator = createAnnotator(false); final List<TextBlock> list = annotator.getTextBlocks(textJCas); assertEquals(2, list.size()); assertEquals("text", list.get(0).getCoveredText()); assertEquals("more text", list.get(1).getCoveredText()); } @Test public void testGetTextBlockForEntireDoc() { final FakeTextAwareAnnotator annotator = createAnnotator(false); final List<TextBlock> list = annotator.getTextBlocks(noTextJCas); assertEquals(1, list.size()); assertEquals("This is text and more text", list.get(0).getCoveredText()); } @Test public void testGetTextBlockWhenHasBlocksButWholeDocParamIsSet() { final FakeTextAwareAnnotator annotator = createAnnotator(true); final List<TextBlock> list = annotator.getTextBlocks(textJCas); assertEquals(1, list.size()); assertEquals("This is text and more text", list.get(0).getCoveredText()); } public FakeTextAwareAnnotator createAnnotator(final boolean wholeDoc) { return new FakeTextAwareAnnotator(wholeDoc); } public static class FakeTextAwareAnnotator extends BaleenTextAwareAnnotator { private final Set<TextBlock> blocksSeen = new HashSet<>(); public FakeTextAwareAnnotator(final boolean wholeDoc) { setWholeDocumentAsText(wholeDoc); } @Override public void doProcess(final JCas jCas) throws AnalysisEngineProcessException { // make this public basically so we can avoid all the other process stuff. super.doProcess(jCas); } @Override protected void doProcessTextBlock(final TextBlock block) throws AnalysisEngineProcessException { blocksSeen.add(block); } @Override public List<TextBlock> getTextBlocks(final JCas jCas) { return super.getTextBlocks(jCas); } @Override public String getTextInTextBlocks(final JCas jCas) { return super.getTextInTextBlocks(jCas); } public int getCount() { return blocksSeen.size(); } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(Collections.emptySet(), Collections.emptySet()); } } }