//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima.utils.select; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import org.apache.uima.UIMAException; import org.apache.uima.jcas.JCas; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import uk.gov.dstl.baleen.types.structure.Aside; import uk.gov.dstl.baleen.types.structure.Break; import uk.gov.dstl.baleen.types.structure.Document; import uk.gov.dstl.baleen.types.structure.Figure; import uk.gov.dstl.baleen.types.structure.Header; import uk.gov.dstl.baleen.types.structure.Paragraph; import uk.gov.dstl.baleen.types.structure.Section; import uk.gov.dstl.baleen.types.structure.Structure; import uk.gov.dstl.baleen.types.structure.Style; import uk.gov.dstl.baleen.uima.testing.JCasSingleton; import uk.gov.dstl.baleen.uima.utils.StructureHierarchy; import uk.gov.dstl.baleen.uima.utils.StructureUtil; public class SelectTest { private Node<Structure> root = null; private static String text; private JCas jCas; @BeforeClass public static void initClass() { StringBuilder sb = new StringBuilder(" "); for (int i = 1; i <= 10; i++) { sb.append(String.format("%d", i)); } for (int i = 1; i <= 10; i++) { sb.append(String.format("%d", i)); sb.append(String.format("%d", i)); sb.append(String.format("%d", i)); sb.append(String.format("%d", i)); } sb.append("\n"); sb.append("\n"); sb.append("\n"); sb.append("Some text before the only child in this div"); text = sb.toString(); } @Before public void init() throws UIMAException { jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText(text); int cursor = 0; int depth = 0; Document document = new Document(jCas); document.setBegin(cursor); document.setDepth(depth); depth++; Header header = new Header(jCas); header.setBegin(cursor); header.setEnd(cursor); header.setDepth(depth); header.addToIndexes(); Section section1 = new Section(jCas); section1.setBegin(++cursor); section1.setDepth(depth); depth++; for (int i = 1; i <= 10; i++) { Paragraph p = new Paragraph(jCas); p.setBegin(cursor); cursor += Integer.valueOf(i).toString().length(); p.setEnd(cursor); p.setDepth(depth); p.addToIndexes(); } depth--; section1.setEnd(cursor); section1.addToIndexes(); Section section2 = new Section(jCas); section2.setBegin(cursor); section2.setDepth(depth); depth++; for (int i = 1; i <= 10; i++) { Paragraph p = new Paragraph(jCas); p.setBegin(cursor); cursor += Integer.valueOf(i).toString().length(); p.setEnd(cursor); p.setDepth(depth); p.addToIndexes(); Aside a = new Aside(jCas); a.setBegin(cursor); cursor += Integer.valueOf(i).toString().length(); a.setEnd(cursor); a.setDepth(depth); a.addToIndexes(); Style s = new Style(jCas); s.setBegin(cursor); cursor += Integer.valueOf(i).toString().length(); s.setEnd(cursor); s.setDepth(depth); s.addToIndexes(); Figure f = new Figure(jCas); f.setBegin(cursor); cursor += Integer.valueOf(i).toString().length(); f.setEnd(cursor); f.setDepth(depth); f.addToIndexes(); } depth--; section2.setEnd(cursor); section2.addToIndexes(); Section section3 = new Section(jCas); section3.setBegin(cursor); section3.setDepth(depth); depth++; Break break1 = new Break(jCas); break1.setBegin(cursor); break1.setEnd(++cursor); break1.setDepth(depth); break1.addToIndexes(); depth--; section3.setEnd(cursor); section3.addToIndexes(); Paragraph empty = new Paragraph(jCas); empty.setBegin(++cursor); empty.setEnd(cursor); empty.setDepth(depth); empty.addToIndexes(); Section section4 = new Section(jCas); section4.setBegin(++cursor); section4.setDepth(depth); depth++; cursor += "Some text before the ".length(); Style em = new Style(jCas); em.setBegin(cursor); em.setDepth(depth); cursor += "only".length(); em.setEnd(cursor); em.addToIndexes(); cursor += " child in this div".length(); section4.setEnd(cursor); section4.addToIndexes(); document.setEnd(cursor); document.addToIndexes(); root = StructureHierarchy.build(jCas, StructureUtil.getStructureClasses()).getRoot(); } @Test public void firstChild() { check(root.select("Section:nth-of-type(1) :first-child"), "1"); check(root.select("root:first-child")); } @Test public void lastChild() { check(root.select("Section:nth-of-type(1) :last-child"), "10"); check(root.select("root:last-child")); } @Test public void nthChild_simple() { for (int i = 1; i <= 10; i++) { check(root.select(String.format("Section:nth-of-type(1) :nth-child(%d)", i)), String.valueOf(i)); } } @Test public void nthOfType_unknownTag() { for (int i = 1; i <= 10; i++) { check(root.select(String.format("Section:nth-of-type(2) Figure:nth-of-type(%d)", i)), String.valueOf(i)); } } @Test public void nthLastChild_simple() { for (int i = 1; i <= 10; i++) { check(root.select(String.format("Section:nth-of-type(1) :nth-last-child(%d)", i)), String.valueOf(11 - i)); } } @Test public void nthOfType_simple() { for (int i = 1; i <= 10; i++) { check(root.select(String.format("Section:nth-of-type(2) Paragraph:nth-of-type(%d)", i)), String.valueOf(i)); } } @Test public void nthLastOfType_simple() { for (int i = 1; i <= 10; i++) { check(root.select(String.format("Section:nth-of-type(2) :nth-last-of-type(%d)", i)), String.valueOf(11 - i), String.valueOf(11 - i), String.valueOf(11 - i), String.valueOf(11 - i)); } } @Test public void nthChild_advanced() { check(root.select("Section:nth-of-type(1) :nth-child(-5)")); check(root.select("Section:nth-of-type(1) :nth-child(odd)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-child(2n-1)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-child(2n+1)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-child(2n+3)"), "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-child(even)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-child(2n)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-child(3n-1)"), "2", "5", "8"); check(root.select("Section:nth-of-type(1) :nth-child(-2n+5)"), "1", "3", "5"); check(root.select("Section:nth-of-type(1) :nth-child(+5)"), "5"); } @Test public void nthOfType_advanced() { check(root.select("Section:nth-of-type(2) :nth-of-type(-5)")); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(odd)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Style:nth-of-type(2n-1)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(2n+1)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Aside:nth-of-type(2n+3)"), "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(even)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(2n)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(3n-1)"), "2", "5", "8"); check(root.select("Section:nth-of-type(2) Paragraph:nth-of-type(-2n+5)"), "1", "3", "5"); check(root.select("Section:nth-of-type(2) :nth-of-type(+5)"), "5", "5", "5", "5"); } @Test public void nthLastChild_advanced() { check(root.select("Section:nth-of-type(1) :nth-last-child(-5)")); check(root.select("Section:nth-of-type(1) :nth-last-child(odd)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-last-child(2n-1)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-last-child(2n+1)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-last-child(2n+3)"), "2", "4", "6", "8"); check(root.select("Section:nth-of-type(1) :nth-last-child(even)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-last-child(2n)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(1) :nth-last-child(3n-1)"), "3", "6", "9"); check(root.select("Section:nth-of-type(1) :nth-last-child(-2n+5)"), "6", "8", "10"); check(root.select("Section:nth-of-type(1) :nth-last-child(+5)"), "6"); } @Test public void nthLastOfType_advanced() { check(root.select("Section:nth-of-type(2) :nth-last-of-type(-5)")); check(root.select("Section:nth-of-type(2) Paragraph:nth-last-of-type(odd)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(2) Style:nth-last-of-type(2n-1)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(2) Paragraph:nth-last-of-type(2n+1)"), "2", "4", "6", "8", "10"); check(root.select("Section:nth-of-type(2) Aside:nth-last-of-type(2n+3)"), "2", "4", "6", "8"); check(root.select("Section:nth-of-type(2) Paragraph:nth-last-of-type(even)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Paragraph:nth-last-of-type(2n)"), "1", "3", "5", "7", "9"); check(root.select("Section:nth-of-type(2) Paragraph:nth-last-of-type(3n-1)"), "3", "6", "9"); check(root.select("Section:nth-of-type(2) Aside:nth-last-of-type(-2n+5)"), "6", "8", "10"); check(root.select("Section:nth-of-type(2) :nth-last-of-type(+5)"), "6", "6", "6", "6"); } @Test public void firstOfType() { check(root.select("Section:nth-of-type(2) :first-of-type"), "1", "1", "1", "1"); } @Test public void lastOfType() { check(root.select("Section:nth-of-type(2) :last-of-type"), "10", "10", "10", "10"); } @Test public void empty() { final Nodes<Structure> sel = root.select(":empty"); assertEquals(2, sel.size()); assertEquals("Header", sel.get(0).getTypeName()); assertEquals("Paragraph", sel.get(1).getTypeName()); } @Test public void onlyChild() { final Nodes<Structure> sel = root.select("Section :only-child"); assertEquals(2, sel.size()); assertEquals("Break", sel.get(0).getTypeName()); assertEquals("Style", sel.get(1).getTypeName()); check(root.select("Section :only-child"), "\n", "only"); } @Test public void onlyOfType() { final Nodes<Structure> sel = root.select(":only-of-type"); assertEquals(5, sel.size()); assertEquals("Document", sel.get(0).getTypeName()); // TODO: should we have Document? assertEquals("Header", sel.get(1).getTypeName()); assertEquals("Break", sel.get(2).getTypeName()); assertEquals("Paragraph", sel.get(3).getTypeName()); assertEquals("Style", sel.get(4).getTypeName()); } protected void check(Nodes<Structure> result, String... expectedContent) { assertEquals("Number of elements", expectedContent.length, result.size()); for (int i = 0; i < expectedContent.length; i++) { assertNotNull(result.get(i)); assertEquals("Expected element", expectedContent[i], result.get(i).getItem().getCoveredText()); } } @Test public void root() { Nodes<Structure> sel = root.select(":root"); assertEquals(1, sel.size()); assertNotNull(sel.get(0)); } }