//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.templates; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.util.Optional; import org.apache.uima.jcas.JCas; import org.junit.Before; import org.junit.Test; import com.google.common.collect.ImmutableList; import uk.gov.dstl.baleen.exceptions.InvalidParameterException; import uk.gov.dstl.baleen.types.structure.Document; import uk.gov.dstl.baleen.types.structure.Heading; import uk.gov.dstl.baleen.types.structure.Paragraph; import uk.gov.dstl.baleen.types.structure.Structure; import uk.gov.dstl.baleen.uima.testing.JCasSingleton; import uk.gov.dstl.baleen.uima.utils.SelectorPath; import uk.gov.dstl.baleen.uima.utils.StructureHierarchy; import uk.gov.dstl.baleen.uima.utils.StructureUtil; public class RecordStructureManagerTest { protected static final String HEADING = "Test"; protected static final String PARA1 = "The quick brown fox jumped over the lazy dog's back."; protected static final String PARA2 = "The quick brown cat jumped over the lazy dog's back."; protected static final String PARA3 = "The quick brown rat jumped over the lazy dog's back."; protected static final String PARA4 = "The quick brown ant jumped over the lazy dog's back."; protected static final String PARA5 = "The quick brown elk jumped over the lazy dog's back."; protected static final String TEXT = String.join("\n", HEADING, PARA1, PARA2, PARA3, PARA4, HEADING, PARA5); private RecordStructureManager recordStructureManager; private Paragraph paragraph1; private Paragraph paragraph2; private Paragraph paragraph3; private Paragraph paragraph4; private Paragraph paragraph5; private Document document; private Heading heading1; private Heading heading2; protected void addAnnotations(JCas jCas) { document = new Document(jCas); document.setDepth(1); document.setBegin(0); document.setEnd(TEXT.length()); document.addToIndexes(); int cursor = 0; heading1 = new Heading(jCas); heading1.setDepth(2); heading1.setBegin(cursor); cursor += HEADING.length(); heading1.setEnd(cursor); heading1.addToIndexes(); paragraph1 = new Paragraph(jCas); paragraph1.setDepth(2); paragraph1.setBegin(++cursor); cursor += PARA1.length(); paragraph1.setEnd(cursor); paragraph1.addToIndexes(); paragraph2 = new Paragraph(jCas); paragraph2.setDepth(2); paragraph2.setBegin(++cursor); cursor += PARA2.length(); paragraph2.setEnd(cursor); paragraph2.addToIndexes(); paragraph3 = new Paragraph(jCas); paragraph3.setDepth(2); paragraph3.setBegin(++cursor); cursor += PARA3.length(); paragraph3.setEnd(cursor); paragraph3.addToIndexes(); paragraph4 = new Paragraph(jCas); paragraph4.setDepth(2); paragraph4.setBegin(++cursor); cursor += PARA4.length(); paragraph4.setEnd(cursor); paragraph4.addToIndexes(); heading2 = new Heading(jCas); heading2.setDepth(2); heading2.setBegin(cursor); cursor += HEADING.length(); heading2.setEnd(cursor); heading2.addToIndexes(); paragraph5 = new Paragraph(jCas); paragraph5.setDepth(2); paragraph5.setBegin(++cursor); cursor += PARA5.length(); paragraph5.setEnd(cursor); paragraph5.addToIndexes(); } @Before public void setUp() throws Exception { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText(TEXT); addAnnotations(jCas); recordStructureManager = new RecordStructureManager( StructureHierarchy.build(jCas, StructureUtil.getStructureClasses())); } @Test public void testGetMissingStructure() throws InvalidParameterException { assertFalse(recordStructureManager.select("Document > Break").isPresent()); } @Test public void testGetFirstParagraph() throws InvalidParameterException { Optional<Structure> structure = recordStructureManager.select("Document > Paragraph:nth-of-type(1)"); assertTrue(structure.isPresent()); assertEquals(paragraph1, structure.get()); } @Test public void testCanNotRepeatMissingStructure() throws InvalidParameterException { SelectorPath minimal = SelectorPath.parse("Table"); RepeatSearch repeatSearch = new RepeatSearch(ImmutableList.of(minimal), minimal); Optional<Structure> structure = recordStructureManager.select("Document > Heading"); assertFalse(recordStructureManager.repeatRecord(structure, repeatSearch, true).isPresent()); } @Test public void testCanRepeatParagraph() throws InvalidParameterException { SelectorPath minimal = SelectorPath.parse("Document > Paragraph:nth-of-type(1)"); RepeatSearch repeatUnit = new RepeatSearch(ImmutableList.of(minimal), minimal); Optional<Structure> structure = recordStructureManager.select("Document > Heading"); Optional<Structure> repeat1 = recordStructureManager.repeatRecord(structure, repeatUnit, true); assertTrue(repeat1.isPresent()); assertEquals(paragraph1, repeat1.get()); Optional<Structure> repeat2 = recordStructureManager.repeatRecord(repeat1, repeatUnit, false); assertTrue(repeat2.isPresent()); assertEquals(paragraph2, repeat2.get()); Optional<Structure> repeat3 = recordStructureManager.repeatRecord(repeat2, repeatUnit, false); assertTrue(repeat3.isPresent()); assertEquals(paragraph3, repeat3.get()); Optional<Structure> repeat4 = recordStructureManager.repeatRecord(repeat3, repeatUnit, false); assertTrue(repeat4.isPresent()); assertEquals(paragraph4, repeat4.get()); Optional<Structure> repeat5 = recordStructureManager.repeatRecord(repeat4, repeatUnit, false); assertFalse(repeat5.isPresent()); } @Test public void testCanGetAfterRepeatParagraph() throws InvalidParameterException { SelectorPath minimal = SelectorPath.parse("Document > Paragraph:nth-of-type(1)"); RepeatSearch repeatUnit = new RepeatSearch(ImmutableList.of(minimal), minimal); Optional<Structure> structure = recordStructureManager.select("Document > Heading"); Optional<Structure> repeat1 = recordStructureManager.repeatRecord(structure, repeatUnit, true); Optional<Structure> repeat2 = recordStructureManager.repeatRecord(repeat1, repeatUnit, false); Optional<Structure> repeat3 = recordStructureManager.repeatRecord(repeat2, repeatUnit, false); recordStructureManager.repeatRecord(repeat3, repeatUnit, false); Optional<Structure> adjusted = recordStructureManager.select("Document > Paragraph:nth-of-type(2)"); assertTrue(adjusted.isPresent()); assertEquals(paragraph5, adjusted.get()); } @Test public void testCanRepeatFromStartOfDocument() throws InvalidParameterException { SelectorPath maximal = SelectorPath.parse("Document > Heading"); RepeatSearch repeatUnit = new RepeatSearch(ImmutableList.of(maximal), maximal); Optional<Structure> repeat1 = recordStructureManager.repeatRecord(Optional.empty(), repeatUnit, true); assertTrue(repeat1.isPresent()); assertEquals(heading1, repeat1.get()); Optional<Structure> repeat2 = recordStructureManager.repeatRecord(repeat1, repeatUnit, false); assertFalse(repeat2.isPresent()); } @Test public void testCanGetAfterMissingRepeat() throws InvalidParameterException { SelectorPath minimal = SelectorPath.parse("Document > Paragraph"); RepeatSearch repeatUnit = new RepeatSearch(ImmutableList.of(minimal), minimal); recordStructureManager.repeatRecord(Optional.empty(), repeatUnit, true); Optional<Structure> adjusted = recordStructureManager.select("Document > Paragraph:nth-of-type(2)"); assertTrue(adjusted.isPresent()); assertEquals(paragraph1, adjusted.get()); } }