//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.structural;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Before;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest;
import uk.gov.dstl.baleen.types.language.Text;
import uk.gov.dstl.baleen.types.structure.Footer;
import uk.gov.dstl.baleen.types.structure.Header;
import uk.gov.dstl.baleen.types.structure.Paragraph;
import uk.gov.dstl.baleen.types.structure.Quotation;
public class TextBlocksTest extends AbstractAnnotatorTest {
public TextBlocksTest() {
super(TextBlocks.class);
}
@Before
public void before() {
jCas.setDocumentText("This is a header. This is a paragraph. This is a footer.");
}
private void addStructure() {
final Header header = new Header(jCas, 0, 17);
header.addToIndexes();
final Paragraph paragraph = new Paragraph(jCas, 18, 38);
paragraph.addToIndexes();
final Footer footer = new Footer(jCas, 40, jCas.getDocumentText().length());
footer.addToIndexes();
}
@Test
public void testWithoutStructuralAnnotations() throws AnalysisEngineProcessException, ResourceInitializationException {
processJCas();
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(1, list.size());
assertEquals(jCas.getDocumentText(), list.get(0).getCoveredText());
}
@Test
public void testWithStructuralAnnotations() throws AnalysisEngineProcessException, ResourceInitializationException {
addStructure();
processJCas();
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(1, list.size());
assertEquals("This is a paragraph.", list.get(0).getCoveredText());
}
@Test
public void testWithCustomTypes() throws AnalysisEngineProcessException, ResourceInitializationException {
addStructure();
processJCas(TextBlocks.PARAM_TYPE_NAMES, new String[]{ "Header" });
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(1, list.size());
assertEquals("This is a header.", list.get(0).getCoveredText());
}
@Test
public void testWithTwoTypes() throws AnalysisEngineProcessException, ResourceInitializationException {
addStructure();
processJCas(TextBlocks.PARAM_TYPE_NAMES, new String[]{ "Header", "Paragraph" });
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(2, list.size());
assertEquals("This is a header.", list.get(0).getCoveredText());
assertEquals("This is a paragraph.", list.get(1).getCoveredText());
}
@Test
public void testKeepBiggest() throws AnalysisEngineProcessException, ResourceInitializationException {
addStructure();
final Quotation q = new Quotation(jCas, 29, 38);
q.addToIndexes();
processJCas(TextBlocks.PARAM_KEEP_SMALLEST, false);
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(1, list.size());
assertEquals("This is a paragraph.", list.get(0).getCoveredText());
}
@Test
public void testKeepSmallest() throws AnalysisEngineProcessException, ResourceInitializationException {
addStructure();
final Quotation q = new Quotation(jCas, 28, 38);
q.addToIndexes();
processJCas(TextBlocks.PARAM_KEEP_SMALLEST, true);
final List<Text> list = new ArrayList<>( JCasUtil.select(jCas, Text.class) );
assertEquals(1, list.size());
assertEquals("paragraph.", list.get(0).getCoveredText());
}
}