//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.structural; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static uk.gov.dstl.baleen.annotators.structural.StructuralEntity.PARAM_CONFIDENCE; import static uk.gov.dstl.baleen.annotators.structural.StructuralEntity.PARAM_QUERY; import static uk.gov.dstl.baleen.annotators.structural.StructuralEntity.PARAM_SUB_TYPE; import static uk.gov.dstl.baleen.annotators.structural.StructuralEntity.PARAM_TYPE; import java.io.IOException; import java.util.Collection; import java.util.Set; import java.util.stream.Collectors; import org.apache.uima.fit.util.JCasUtil; import org.junit.Before; import org.junit.Test; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; import uk.gov.dstl.baleen.types.common.Person; import uk.gov.dstl.baleen.types.structure.Document; import uk.gov.dstl.baleen.types.structure.Heading; import uk.gov.dstl.baleen.types.structure.ListItem; import uk.gov.dstl.baleen.types.structure.Ordered; public class StructuralEntityTest extends AbstractAnnotatorTest { private static final String H1 = "List of names"; private static final String H2 = "List of names"; private static final String N1 = "Stuart"; private static final String N2 = "James"; private static final String N3 = "Chris"; private static final String N4 = "Jon"; private static final String TEXT = String.join("\n", "", H1, N1, N2, N3, "", H2, N4, " other"); public StructuralEntityTest() { super(StructuralEntity.class); } @Before public void setup() throws IOException { jCas.setDocumentText(TEXT); int cursor = 0; int depth = 0; Document document = new Document(jCas); document.setBegin(cursor); document.setDepth(depth); document.setEnd(TEXT.length()); document.addToIndexes(); Heading h1 = new Heading(jCas); h1.setBegin(cursor); h1.setDepth(++depth); cursor += H1.length(); h1.setEnd(cursor); h1.setLevel(1); h1.addToIndexes(); Ordered ol1 = new Ordered(jCas); ol1.setBegin(++cursor); ol1.setDepth(depth); depth++; cursor = addItem(depth, cursor, N1); cursor = addItem(depth, cursor, N2); cursor = addItem(depth, cursor, N3); cursor = addItem(depth, cursor, ""); depth--; ol1.setEnd(cursor); ol1.addToIndexes(); Heading h2 = new Heading(jCas); h2.setBegin(cursor); h2.setDepth(++depth); cursor += H2.length(); h2.setEnd(cursor); h2.setLevel(2); h2.addToIndexes(); Ordered ol2 = new Ordered(jCas); ol2.setBegin(++cursor); ol2.setDepth(depth); depth++; cursor = addItem(depth, cursor, N4); depth--; ol2.setEnd(cursor); ol2.addToIndexes(); --depth; } private int addItem(int depth, int cursor, String item) { ListItem li = new ListItem(jCas); li.setBegin(++cursor); li.setDepth(depth); cursor += item.length(); li.setEnd(cursor); li.addToIndexes(); return cursor; } @Test public void testProcess() throws Exception { processJCas(PARAM_QUERY, "heading[level=1] + Ordered ListItem", PARAM_CONFIDENCE, "0.5", PARAM_TYPE, Person.class.getSimpleName(), PARAM_SUB_TYPE, "sub"); Collection<Person> people = JCasUtil.select(jCas, Person.class); assertEquals(3, people.size()); Set<String> names = people.stream().map(Person::getCoveredText).collect(Collectors.toSet()); assertTrue(names.contains(N1)); assertTrue(names.contains(N2)); assertTrue(names.contains(N3)); assertEquals(0.5, people.iterator().next().getConfidence(), 0.0); assertEquals("sub", people.iterator().next().getSubType()); } }