package lux.xml.tinybin; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import javax.xml.transform.stream.StreamSource; import lux.SearchTest; import net.sf.saxon.Configuration; import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.sort.CodepointCollator; import net.sf.saxon.expr.sort.GenericAtomicComparer; import net.sf.saxon.functions.DeepEqual; import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.trans.XPathException; import net.sf.saxon.tree.tiny.TinyDocumentImpl; import org.apache.commons.io.IOUtils; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; public class TinyBinaryTest { Processor processor; DocumentBuilder builder; @Before public void init () { processor = new Processor(false); builder = processor.newDocumentBuilder(); } @Test public void testRoundTrip() throws SaxonApiException, XPathException, IOException { // try building a TinyBinary and recreating a tree from that assertRoundTrip("lux/reader-test.xml", null); assertRoundTrip("lux/reader-test.xml", "utf-8"); // reuse the same Processor (and thus namepool) assertRoundTrip("lux/reader-test.xml", null); assertRoundTrip("lux/reader-test.xml", "utf-8"); // try a document that includes some namespaces assertRoundTrip("lux/reader-test-ns.xml", null); assertRoundTrip("lux/reader-test-ns.xml", "utf-8"); // a large(r) document: assertRoundTrip("lux/hamlet.xml", null); assertRoundTrip("lux/hamlet.xml", "utf-8"); } @Test public void testReadVersion0 () throws Exception { assertRoundTrip("lux/reader-test.xml", null, (byte) 0); assertRoundTrip("lux/reader-test.xml", "utf-8", (byte) 0); } @Test public void testAttributes () throws Exception { assertRoundTrip ("conf/solrconfig.xml", null); assertRoundTrip ("conf/solrconfig.xml", "utf-8"); } @Test public void testEmptyAttribute() throws SaxonApiException, XPathException, IOException { // this document has an attribute with an empty value assertRoundTrip("lux/wikipedia-ns-test.xml", "utf-8"); } @Test public void testOnce() throws SaxonApiException, XPathException, IOException { assertRoundTrip("lux/reader-test.xml", "utf-8"); } @Test @Ignore public void testBenchmark () throws IOException, SaxonApiException { doBenchmark("lux/reader-test.xml", null, 1000); doBenchmark("lux/reader-test.xml", "utf-8", 1000); doBenchmark("lux/reader-test.xml", null, 1000); doBenchmark("lux/reader-test.xml", "utf-8", 1000); doBenchmark("lux/hamlet.xml", null, 1000); doBenchmark("lux/hamlet.xml", "utf-8", 1000); } private void doBenchmark (String docpath, String charsetName, int iterations) throws IOException, SaxonApiException { InputStream in = SearchTest.class.getClassLoader().getResourceAsStream(docpath); byte[] inputBytes = IOUtils.toByteArray(in); in.close(); Charset charset = charsetName == null ? null : Charset.forName(charsetName); XdmNode doc = builder.build(new StreamSource(new ByteArrayInputStream(inputBytes))); TinyBinary tinyBin = new TinyBinary(((TinyDocumentImpl) doc.getUnderlyingNode()).getTree(), charset); byte[] tinyInput = tinyBin.getBytes(); System.out.println(String.format("Original size=%d bytes, 'tiny' binary size=%d bytes", inputBytes.length, tinyBin.length())); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { builder.build(new StreamSource(new ByteArrayInputStream(inputBytes))); } long t1 = System.nanoTime(); Configuration config = processor.getUnderlyingConfiguration(); for (int i = 0; i < iterations; i++) { new TinyBinary(tinyInput, charset).getTinyDocument(config); } long t2 = System.nanoTime(); System.out.println (String.format("DocBuilder: %dms; TinyBinary: %dms", (t1-start)/1000000, (t2-t1)/1000000)); } private void assertRoundTrip (String docpath, String charsetName, byte formatVersion) throws XPathException, SaxonApiException, IOException { // get a file from the class path InputStream in = SearchTest.class.getClassLoader().getResourceAsStream(docpath); // build a document from that file XdmNode doc = builder.build(new StreamSource(in)); in.close(); Charset charset = charsetName == null ? null : Charset.forName(charsetName); // Make a TinyBinary from the TinyTree TinyBinary tinyBin = new TinyBinary(((TinyDocumentImpl) doc.getUnderlyingNode()).getTree(), charset, formatVersion); byte[] b = tinyBin.getBytes(); // Copy the TinyBinary using its byte array TinyBinary copy = new TinyBinary (b, charset); Configuration config = processor.getUnderlyingConfiguration(); // get the document node from the copy TinyDocumentImpl tinyDoc = copy.getTinyDocument(config); // for debugging: // processor.newSerializer(System.out).serializeNode(new XdmNode(tinyDoc)); XPathContext context = config.getConversionContext(); boolean equals = DeepEqual.deepEquals (tinyDoc.iterate(), doc.getUnderlyingNode().iterate(), new GenericAtomicComparer (CodepointCollator.getInstance(), context), context, DeepEqual.INCLUDE_PREFIXES | DeepEqual.EXCLUDE_WHITESPACE_TEXT_NODES | DeepEqual.INCLUDE_COMMENTS | DeepEqual.COMPARE_STRING_VALUES | DeepEqual.INCLUDE_PROCESSING_INSTRUCTIONS); assertTrue (docpath + " was not preserved by TinyBinary roundtrip", equals); } private void assertRoundTrip (String docpath, String charsetName) throws SaxonApiException, XPathException, IOException { assertRoundTrip (docpath, charsetName, TinyBinary.CURRENT_FORMAT); } }