//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.cleaners; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.junit.Test; import uk.gov.dstl.baleen.annotators.testing.Annotations; import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase; import uk.gov.dstl.baleen.types.common.Person; import uk.gov.dstl.baleen.types.geo.Coordinate; import uk.gov.dstl.baleen.types.semantic.Location; import uk.gov.dstl.baleen.types.semantic.ReferenceTarget; public class CorefBracketsTest extends AnnotatorTestBase { private static final String MRGS = "4QFJ1267"; private static final String SOMEWHERE = "Somewhere"; private static final String LOC_TEXT = "Somewhere (4QFJ1267)"; private static final String PERSON_TEXT = "William Tell (Bill) (Billy) is a famous character"; private static final String WILLIAM = "William Tell"; private static final String BILL = "Bill"; private static final String BILLY = "Billy"; @Test public void testMultipleEntities() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(PERSON_TEXT); Annotations.createPerson(jCas, 0, 12, WILLIAM); Annotations.createPerson(jCas, 14, 18, BILL); Annotations.createPerson(jCas, 21, 26, BILLY); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); Person p2 = JCasUtil.selectByIndex(jCas, Person.class, 1); Person p3 = JCasUtil.selectByIndex(jCas, Person.class, 2); assertEquals(rt, p1.getReferent()); assertEquals(rt, p2.getReferent()); assertEquals(rt, p3.getReferent()); } @Test public void testIncorrectEntities() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(PERSON_TEXT); Annotations.createPerson(jCas, 0, 12, WILLIAM); Annotations.createLocation(jCas, 14, 18, BILL, null); Annotations.createPerson(jCas, 21, 26, BILLY); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); Person p2 = JCasUtil.selectByIndex(jCas, Person.class, 1); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(rt, p1.getReferent()); assertEquals(rt, p2.getReferent()); assertNull(l.getReferent()); } @Test public void testSkippedEntities() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(PERSON_TEXT); Annotations.createPerson(jCas, 0, 12, WILLIAM); Annotations.createPerson(jCas, 21, 26, BILLY); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); Person p2 = JCasUtil.selectByIndex(jCas, Person.class, 1); assertEquals(rt, p1.getReferent()); assertEquals(rt, p2.getReferent()); } @Test public void testNoExistingReferents() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(LOC_TEXT); Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); Annotations.createCoordinate(jCas, 11, 19, MRGS); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); } @Test public void testExistingLocReferent() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(LOC_TEXT); ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas); Location l1 = Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); l1.setReferent(rt1); Annotations.createCoordinate(jCas, 11, 19, MRGS); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); assertEquals(l.getReferent(), c.getReferent()); } @Test public void testExistingCoordReferent() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(LOC_TEXT); ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas); Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); Coordinate c1 = Annotations.createCoordinate(jCas, 11, 19, MRGS); c1.setReferent(rt1); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); assertEquals(c.getReferent(), l.getReferent()); } @Test public void testExistingReferentsNoMerge() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); populateJCasMergeTest(jCas); ae.process(jCas); assertEquals(2, JCasUtil.select(jCas, ReferenceTarget.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); ReferenceTarget lRt = l.getReferent(); ReferenceTarget cRt = c.getReferent(); assertNotEquals(lRt, cRt); } @Test public void testExistingReferentsMerge() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class, "mergeReferents", true); populateJCasMergeTest(jCas); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); assertEquals(c.getReferent(), l.getReferent()); } private void populateJCasMergeTest(JCas jCas){ jCas.setDocumentText(LOC_TEXT); ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas); ReferenceTarget rt2 = Annotations.createReferenceTarget(jCas); Location l1 = Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); l1.setReferent(rt1); Coordinate c1 = Annotations.createCoordinate(jCas, 11, 19, MRGS); c1.setReferent(rt2); } @Test public void testMultipleSpaces() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText("Somewhere \t(4QFJ1267)"); Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); Annotations.createCoordinate(jCas, 14, 22, MRGS); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); } @Test public void testNoExistingReferentsMerge() throws Exception{ AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class, CorefBrackets.PARAM_MERGE_REFERENTS, true); jCas.setDocumentText("James (Jimmy) visited Thomas and Ben"); Person p1 = new Person(jCas, 0, 5); p1.addToIndexes(); Person p2 = new Person(jCas, 7, 12); p2.addToIndexes(); Person p3 = new Person(jCas, 22, 28); p3.addToIndexes(); Person p4 = new Person(jCas, 33, 36); p4.addToIndexes(); ae.process(jCas); assertNotNull(p1.getReferent()); assertEquals(p1.getReferent(), p2.getReferent()); assertNotEquals(p1.getReferent(), p3.getReferent()); assertNotEquals(p1.getReferent(), p4.getReferent()); assertNull(p3.getReferent()); assertNull(p4.getReferent()); } }