//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.language;
import java.util.Collection;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.resource.ExternalResourceDescription;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Assert;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.language.OpenNLP;
import uk.gov.dstl.baleen.annotators.language.OpenNLPParser;
import uk.gov.dstl.baleen.annotators.testing.AbstractMultiAnnotatorTest;
import uk.gov.dstl.baleen.resources.SharedOpenNLPModel;
import uk.gov.dstl.baleen.types.language.PhraseChunk;
import uk.gov.dstl.baleen.types.language.Sentence;
public class OpenNLPParserTest extends AbstractMultiAnnotatorTest {
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {
final ExternalResourceDescription parserChunkingDesc = ExternalResourceFactory
.createExternalResourceDescription("parserChunking", SharedOpenNLPModel.class);
// Add in the OpenNLP implementation too, as its a prerequisite
// (in theory we should test OpenNLPParser in isolation, but in practise
// it as this as a
// dependency
// so better test they work together)
final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createExternalResourceDescription(
"tokens",
SharedOpenNLPModel.class);
final ExternalResourceDescription sentencesDesc = ExternalResourceFactory
.createExternalResourceDescription("sentences", SharedOpenNLPModel.class);
final ExternalResourceDescription posDesc = ExternalResourceFactory.createExternalResourceDescription("posTags",
SharedOpenNLPModel.class);
final ExternalResourceDescription chunksDesc = ExternalResourceFactory
.createExternalResourceDescription("phraseChunks", SharedOpenNLPModel.class);
AnalysisEngineFactory.createEngineDescription();
return asArray(
createAnalysisEngine(OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags",
posDesc, "phraseChunks", chunksDesc),
createAnalysisEngine(OpenNLPParser.class, "parserChunking", parserChunkingDesc));
}
@Test
public void test() throws AnalysisEngineProcessException, ResourceInitializationException {
final String text = "The fox jumps over the dog.";
jCas.setDocumentText(text);
processJCas();
final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class);
final Sentence s1 = select.iterator().next();
final List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1);
Assert.assertEquals(4, phrases.size());
Assert.assertEquals("The fox", phrases.get(0).getCoveredText());
Assert.assertEquals("jumps over the dog", phrases.get(1).getCoveredText());
Assert.assertEquals("over the dog", phrases.get(2).getCoveredText());
Assert.assertEquals("the dog", phrases.get(3).getCoveredText());
}
}