package de.unigoettingen.sub.commons.ocr.util.merge; import static org.custommonkey.xmlunit.XMLAssert.assertXpathEvaluatesTo; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.List; import org.custommonkey.xmlunit.exceptions.XpathException; import org.junit.Test; import org.xml.sax.SAXException; public class HocrMergerTest { private final String input1 = "<html><head></head>" + "<body>" + "<div class='ocr_page' id='page_1'>" + "<div class='ocr_carea'>" + "<p class='ocr_par'>" + "<span class='ocr_line'>" + "<span class='ocr_word'>" + "<span class='xocr_word'>test1</span>" + "</span>" + "</span>" + "</p>" + "</div>" + "</div>" + "</body>" + "</html>"; private final String input2 = "<html><head></head>" + "<body>" + "<div class='ocr_page' id='page_2'>" + "<div class='ocr_carea'>" + "<p class='ocr_par'>" + "<span class='ocr_line'>" + "<span class='ocr_word'>" + "<span class='xocr_word'>test2</span>" + "</span>" + "</span>" + "</p>" + "</div>" + "</div>" + "</body>" + "</html>"; @Test public void test() throws XpathException, SAXException, IOException { InputStream stream1 = new ByteArrayInputStream(input1.getBytes()); InputStream stream2 = new ByteArrayInputStream(input2.getBytes()); List<InputStream> inputs = Arrays.asList(stream1, stream2); ByteArrayOutputStream output = new ByteArrayOutputStream(); Merger mergerSut = new HocrMerger(); mergerSut.mergeBuffered(inputs, output); String result = output.toString(); assertXpathEvaluatesTo("2", "count(//*[local-name()='div' and @class='ocr_page'])", result); assertXpathEvaluatesTo("test1", "//*[local-name()='div' and @class='ocr_page'][1]//*[local-name()='span' and @class='xocr_word']", result); assertXpathEvaluatesTo("test2", "//*[local-name()='div' and @class='ocr_page'][2]//*[local-name()='span' and @class='xocr_word']", result); } }