//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.patterns; import java.util.Collection; import org.apache.uima.UIMAException; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.resource.ExternalResourceDescription; import org.junit.Assert; import org.junit.Test; import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase; import uk.gov.dstl.baleen.resources.SharedStopwordResource; import uk.gov.dstl.baleen.types.language.Pattern; import uk.gov.dstl.baleen.types.language.Sentence; import uk.gov.dstl.baleen.types.language.WordToken; import uk.gov.dstl.baleen.types.semantic.Entity; public class PatternExtractorTest extends AnnotatorTestBase { private AnalysisEngine ae; @Override public void beforeTest() throws UIMAException { super.beforeTest(); ExternalResourceDescription stopwordsDesc = ExternalResourceFactory .createExternalResourceDescription(PatternExtractor.KEY_STOPWORDS, SharedStopwordResource.class); final AnalysisEngineDescription desc = AnalysisEngineFactory.createEngineDescription(PatternExtractor.class, PatternExtractor.KEY_STOPWORDS, stopwordsDesc); ae = AnalysisEngineFactory.createEngine(desc); } @Test public void testNegationProcess() throws AnalysisEngineProcessException { final String text = "The fox did not jump over the dog."; jCas.setDocumentText(text); final Sentence sentence = new Sentence(jCas); sentence.setBegin(0); sentence.setEnd(text.length()); sentence.addToIndexes(jCas); int offset = 0; while (offset < text.length()) { int end = text.indexOf(" ", offset); if (end == -1) { end = text.indexOf(".", offset); } if (end > 0) { final WordToken wordToken = new WordToken(jCas); wordToken.setBegin(offset); wordToken.setEnd(end); // Fake the POS wordToken.setPartOfSpeech("VBZ"); wordToken.addToIndexes(jCas); offset = end + 1; } else { offset = text.length(); } } final Entity fox = new Entity(jCas); fox.setBegin(4); fox.setEnd(7); fox.addToIndexes(jCas); final Entity dog = new Entity(jCas); dog.setBegin(30); dog.setEnd(33); dog.addToIndexes(jCas); SimplePipeline.runPipeline(jCas, ae); final Collection<Pattern> patterns = JCasUtil.select(jCas, Pattern.class); Assert.assertEquals(0, patterns.size()); } @Test public void testProcess() throws AnalysisEngineProcessException { final String text = "The fox jumps over the dog."; jCas.setDocumentText(text); final Sentence sentence = new Sentence(jCas); sentence.setBegin(0); sentence.setEnd(text.length()); sentence.addToIndexes(jCas); int offset = 0; while (offset < text.length()) { int end = text.indexOf(" ", offset); if (end == -1) { end = text.indexOf(".", offset); } if (end > 0) { final WordToken wordToken = new WordToken(jCas); wordToken.setBegin(offset); wordToken.setEnd(end); // Fake the POS wordToken.setPartOfSpeech("VBZ"); wordToken.addToIndexes(jCas); offset = end + 1; } else { offset = text.length(); } } final Entity fox = new Entity(jCas); fox.setBegin(4); fox.setEnd(7); fox.addToIndexes(jCas); final Entity dog = new Entity(jCas); dog.setBegin(23); dog.setEnd(26); dog.addToIndexes(jCas); SimplePipeline.runPipeline(jCas, ae); final Collection<Pattern> patterns = JCasUtil.select(jCas, Pattern.class); Assert.assertEquals(1, patterns.size()); final Pattern p = patterns.iterator().next(); Assert.assertEquals(1, p.getWords().size()); Assert.assertEquals("jumps", p.getWords(0).getCoveredText()); } }