//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.structural;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static uk.gov.dstl.baleen.annotators.structural.TableRelation.PARAM_TYPE;
import static uk.gov.dstl.baleen.annotators.structural.TableRelation.SOURCE_PATTERN;
import static uk.gov.dstl.baleen.annotators.structural.TableRelation.SOURCE_TYPE;
import static uk.gov.dstl.baleen.annotators.structural.TableRelation.TARGET_PATTERN;
import static uk.gov.dstl.baleen.annotators.structural.TableRelation.TARGET_TYPE;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.tcas.Annotation;
import org.junit.Before;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest;
import uk.gov.dstl.baleen.types.common.CommsIdentifier;
import uk.gov.dstl.baleen.types.common.Person;
import uk.gov.dstl.baleen.types.semantic.Relation;
import uk.gov.dstl.baleen.types.structure.Document;
import uk.gov.dstl.baleen.types.structure.Paragraph;
import uk.gov.dstl.baleen.types.structure.Table;
import uk.gov.dstl.baleen.types.structure.TableBody;
import uk.gov.dstl.baleen.types.structure.TableCell;
import uk.gov.dstl.baleen.types.structure.TableHeader;
import uk.gov.dstl.baleen.types.structure.TableRow;
public class TableRelationTest extends AbstractAnnotatorTest {
private static final String TH1 = "Name";
private static final String TH2 = "eMail";
private static final String R1C1 = "Stuart";
private static final String R2C1 = "James";
private static final String R3C1 = "Chris";
private static final String R1C2 = "a@b.com";
private static final String R2C2 = "c@d.com";
private static final String R3C2 = "d@e.com";
private static final String HEADING = TH1 + " " + TH2;
private static final String ROW1 = R1C1 + " " + R1C2;
private static final String ROW2 = R2C1 + " " + R2C2;
private static final String ROW3 = R3C1 + " " + R3C2;
private static final String TEXT = String.join("\n", "", HEADING, ROW1, ROW2, ROW3, " other");
public TableRelationTest() {
super(TableRelation.class);
}
@Before
public void setup() throws IOException {
jCas.setDocumentText(TEXT);
int cursor = 0;
int depth = 0;
Document document = new Document(jCas);
document.setBegin(cursor);
document.setDepth(depth);
document.setEnd(TEXT.length());
document.addToIndexes();
Table table = new Table(jCas);
table.setBegin(cursor);
table.setDepth(depth);
TableHeader th = new TableHeader(jCas);
th.setBegin(cursor);
th.setDepth(++depth);
cursor = addRow(depth, cursor, TH1, TH2);
th.setEnd(cursor);
th.addToIndexes();
--depth;
TableBody tableBody = new TableBody(jCas);
tableBody.setBegin(cursor);
tableBody.setDepth(++depth);
cursor = addRow(depth, cursor, R1C1, R1C2);
cursor = addRow(depth, cursor, R2C1, R2C2);
cursor = addRow(depth, cursor, R3C1, R3C2);
tableBody.setEnd(cursor);
tableBody.addToIndexes();
--depth;
table.setEnd(cursor);
table.addToIndexes();
--depth;
Person chris = new Person(jCas);
int begin = (HEADING + ROW1 + ROW2).length() + 4;
chris.setBegin(begin);
chris.setEnd(begin + R3C1.length());
chris.addToIndexes();
}
private int addRow(int depth, int cursor, String cell1, String cell2) {
TableRow tableRow = new TableRow(jCas);
tableRow.setBegin(++cursor);
tableRow.setDepth(++depth);
TableCell c1 = new TableCell(jCas);
c1.setBegin(cursor);
c1.setDepth(++depth);
Paragraph p1 = new Paragraph(jCas);
p1.setBegin(cursor);
p1.setDepth(++depth);
cursor += cell1.length();
p1.setEnd(cursor);
p1.addToIndexes();
--depth;
c1.setEnd(cursor);
c1.addToIndexes();
TableCell c2 = new TableCell(jCas);
c2.setBegin(++cursor);
c2.setDepth(depth);
Paragraph p2 = new Paragraph(jCas);
p2.setBegin(cursor);
p2.setDepth(++depth);
cursor += cell2.length();
p2.setEnd(cursor);
p2.addToIndexes();
--depth;
c2.setEnd(cursor);
c2.addToIndexes();
--depth;
tableRow.setEnd(cursor);
tableRow.addToIndexes();
--depth;
return cursor;
}
@Test
public void testProcess() throws Exception {
processJCas(SOURCE_PATTERN, "Name", SOURCE_TYPE, Person.class.getSimpleName(), TARGET_PATTERN, "email",
TARGET_TYPE, CommsIdentifier.class.getSimpleName(), PARAM_TYPE, "emailAddress");
Collection<Person> people = JCasUtil.select(jCas, Person.class);
assertEquals(3, people.size());
Set<String> names = people.stream().map(Annotation::getCoveredText).collect(Collectors.toSet());
assertTrue(names.contains(R1C1));
assertTrue(names.contains(R2C1));
assertTrue(names.contains(R3C1));
Collection<CommsIdentifier> emails = JCasUtil.select(jCas, CommsIdentifier.class);
assertEquals(3, emails.size());
Set<String> addresses = emails.stream().map(Annotation::getCoveredText).collect(Collectors.toSet());
assertTrue(addresses.contains(R1C2));
assertTrue(addresses.contains(R2C2));
assertTrue(addresses.contains(R3C2));
Collection<Relation> relations = JCasUtil.select(jCas, Relation.class);
assertEquals(3, relations.size());
Relation first = relations.iterator().next();
assertEquals(HEADING.length() + 2, first.getBegin());
assertEquals(HEADING.length() + 2 + ROW1.length(), first.getEnd());
assertEquals(Person.class, first.getSource().getClass());
assertEquals(R1C1, first.getSource().getCoveredText());
assertEquals(CommsIdentifier.class, first.getTarget().getClass());
assertEquals(R1C2, first.getTarget().getCoveredText());
assertEquals("emailAddress", first.getRelationshipType());
}
}