package lux.xml; import static org.junit.Assert.*; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.util.Arrays; import javax.xml.stream.XMLStreamException; import javax.xml.transform.stream.StreamSource; import lux.index.MutableString; import lux.index.QNameTextMapper; import lux.index.XPathValueMapper; import lux.index.XmlPathMapper; import net.sf.saxon.s9api.Axis; import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmNodeKind; import net.sf.saxon.s9api.XdmSequenceIterator; import org.apache.commons.io.IOUtils; import org.junit.Test; public class XmlReaderTest { private static final String CONTENT = "TEST &>0 This is some markup <that> is escaped ģé 12345678 The end."; @Test public void testSaxonBuilder() throws Exception { SaxonDocBuilder saxonBuilder = new SaxonDocBuilder(new Processor(false)); handleDocument(saxonBuilder, "lux/reader-test.xml"); XdmNode doc = saxonBuilder.getDocument(); assertDocContent(doc); } private void assertDocContent(XdmNode doc) { assertEquals("test", ((XdmNode) doc.axisIterator(Axis.CHILD).next()).getNodeName().toString()); assertEquals(CONTENT, normalize(doc.getStringValue())); } @Test public void testUndefinedEntity() throws Exception { SaxonDocBuilder saxonBuilder = new SaxonDocBuilder(new Processor(false)); handleDocument(saxonBuilder, "lux/external-entity.xml"); XdmNode doc = saxonBuilder.getDocument(); assertEquals ("$100", normalize(doc.getStringValue())); } @Test public void testSaxonBuildFromNodeImpl() throws Exception { SaxonDocBuilder saxonBuilder = new SaxonDocBuilder(new Processor(false)); handleDocument(saxonBuilder, "lux/reader-test.xml"); XdmNode doc = saxonBuilder.getDocument(); // re-process from one node to another XmlReader xmlReader = new XmlReader (); saxonBuilder.reset(); xmlReader.addHandler(saxonBuilder); xmlReader.read(doc.getUnderlyingNode()); XdmNode doc2 = saxonBuilder.getDocument(); assertNotSame(doc, doc2); assertDocContent(doc2); } @Test public void testSaxonBuilderNS() throws Exception { SaxonDocBuilder saxonBuilder = new SaxonDocBuilder(new Processor(false)); handleDocument(saxonBuilder, "lux/reader-test-ns.xml"); XdmNode doc = saxonBuilder.getDocument(); assertDocContent(doc); } @Test public void testStripNamespaces () throws Exception { SaxonDocBuilder saxonBuilder = new SaxonDocBuilder(new Processor(false)); handleDocument (saxonBuilder, "lux/reader-test-ns.xml", true); XdmNode doc = saxonBuilder.getDocument(); assertDocContent(doc); XdmNode title = (XdmNode) (doc.axisIterator(Axis.DESCENDANT, new net.sf.saxon.s9api.QName("title")).next()); assertEquals("title", title.getNodeName().toString()); assertEquals ("TEST", title.getStringValue()); handleDocument (saxonBuilder, "lux/wikipedia-ns-test.xml", true); doc = saxonBuilder.getDocument(); assertEquals ("wikipedia", doc.getStringValue()); assertEquals ("", ((XdmNode) doc.axisIterator(Axis.CHILD).next()).getNodeName().getNamespaceURI()); } @Test public void testPathMapper() throws Exception { XmlPathMapper pathMapper = new XmlPathMapper(); handleDocument(pathMapper, "lux/reader-test.xml"); assertPathMapperKeys(pathMapper); pathMapper.reset(); assertTrue (pathMapper.getPathCounts().isEmpty()); assertTrue (pathMapper.getEltQNameCounts().isEmpty()); assertTrue (pathMapper.getAttQNameCounts().isEmpty()); } private void assertPathMapperKeys(XmlPathMapper pathMapper) { // elements assertEquals (1, pathMapper.getEltQNameCount("title")); assertEquals (2, pathMapper.getEltQNameCount("entities")); assertEquals (1, pathMapper.getEltQNameCount("test")); // attributes assertEquals (2, pathMapper.getAttQNameCount("id")); // paths assertEquals (1, pathMapper.getPathCount("{} test @id")); assertEquals (1, pathMapper.getPathCount("{} test entities @id")); assertEquals (2, pathMapper.getPathCount("{} test entities")); } @Test public void testPathMapperNS() throws Exception { XmlPathMapper pathMapper = new XmlPathMapper(); assertTrue (pathMapper.isNamespaceAware()); handleDocument(pathMapper, "lux/reader-test-ns.xml"); // elements assertEquals (1, pathMapper.getEltQNameCount("title{http://lux.net{test}}")); assertEquals (1, pathMapper.getEltQNameCount("entities{http://lux.net/#test}")); assertEquals (1, pathMapper.getEltQNameCount("entities{#2}")); assertEquals (1, pathMapper.getEltQNameCount("test{http://lux.net/#test}")); // attributes assertEquals (2, pathMapper.getAttQNameCount("id")); // paths assertEquals (1, pathMapper.getPathCount("{} test{http://lux.net/#test} @id")); assertEquals (1, pathMapper.getPathCount("{} test{http://lux.net/#test} entities{#2} @id")); assertEquals (1, pathMapper.getPathCount("{} test{http://lux.net/#test} entities{http://lux.net/#test}")); } @Test public void testPathMapperNSUnaware() throws Exception { XmlPathMapper pathMapper = new XmlPathMapper(); pathMapper.setNamespaceAware(false); assertFalse (pathMapper.isNamespaceAware()); handleDocument(pathMapper, "lux/reader-test-ns.xml"); // elements assertEquals (1, pathMapper.getEltQNameCount("x:title")); assertEquals (2, pathMapper.getEltQNameCount("entities")); assertEquals (1, pathMapper.getEltQNameCount("test")); // attributes assertEquals (2, pathMapper.getAttQNameCount("id")); // paths assertEquals (1, pathMapper.getPathCount("{} test @id")); assertEquals (1, pathMapper.getPathCount("{} test entities @id")); assertEquals (2, pathMapper.getPathCount("{} test entities")); assertEquals (1, pathMapper.getPathCount("{} test x:title")); } @Test public void testSerializer () throws Exception { Serializer serializer = new Serializer(); handleDocument(serializer, "lux/reader-test.xml"); assertSerialize(serializer, "lux/reader-test-norm1.xml"); } @Test public void testSerializerNS () throws Exception { Serializer serializer = new Serializer(); handleDocument(serializer, "lux/reader-test-ns.xml"); assertSerialize(serializer, "lux/reader-test-ns-norm1.xml"); handleDocument (serializer, "lux/wikipedia-ns-test.xml"); assertSerialize(serializer, "lux/wikipedia-ns-test.xml"); } private void assertSerialize(Serializer serializer, String norm) throws IOException { String xml = serializer.getDocument(); InputStream in = getClass().getClassLoader().getResourceAsStream (norm); String original = IOUtils.toString(in, "UTF-8"); assertEquals (original, xml); } @Test public void testXPathValueMapper () throws Exception { XPathValueMapper xpathValueMapper = new XPathValueMapper(); handleDocument (xpathValueMapper, "lux/reader-test.xml"); assertTestPathValues(xpathValueMapper); } private void assertTestPathValues(XPathValueMapper xpathValueMapper) { assertEquals ("{} test @id test\0\0\0\0", String.valueOf(xpathValueMapper.getPathValues().get(0))); assertEquals ("{} test title TEST\0\0\0\0", String.valueOf(xpathValueMapper.getPathValues().get(2))); assertEquals ("{} test entities &>0\0\0\0\0\0", String.valueOf(xpathValueMapper.getPathValues().get(3))); assertEquals ("{} test token ȑȒȓȔȕȖȗȘ", String.valueOf(xpathValueMapper.getPathValues().get(6))); assertEquals ("{} test token \u0211\u0212\u0213\u0214\u0215\u0216\u0217\u0218", String.valueOf(xpathValueMapper.getPathValues().get(6))); } @Test public void testXPathValueHashString () throws Exception { char[] buf = new char[XPathValueMapper.HASH_SIZE]; XPathValueMapper.hashString(" 12345678".toCharArray(), buf); assertEquals ("\u0211\u0212\u0213\u0214\u0215\u0216\u0217\u0218", new String(buf)); Arrays.fill(buf, '\0'); XPathValueMapper.hashString(" !!!!!!!!".toCharArray(), buf); assertEquals ("\u0201\u0201\u0201\u0201\u0201\u0201\u0201\u0201", new String(buf)); Arrays.fill(buf, '\0'); XPathValueMapper.hashString("!!!!!!!! ".toCharArray(), buf); assertEquals ("\u020f\u020f\u020f\u020f\u020f\u020f\u020f\u020f", new String(buf)); } @Test public void testQNameTextMapper () throws Exception { QNameTextMapper mapper = new QNameTextMapper(); handleDocument (mapper, "lux/reader-test.xml"); assertPathMapperKeys(mapper); assertEquals ("@id", mapper.getNames().get(0)); assertEquals ("test", mapper.getValues().get(0)); assertEquals ("@att", mapper.getNames().get(1)); // test attribute value normalization assertEquals ("< \t .>", mapper.getValues().get(1)); assertEquals (new MutableString("title"), mapper.getNames().get(2)); assertEquals ("TEST", mapper.getValues().get(2)); assertEquals (new MutableString("entities"), mapper.getNames().get(3)); assertEquals ("&>0", mapper.getValues().get(3)); assertEquals (new MutableString("token"), mapper.getNames().get(6)); assertEquals (" 12345678", mapper.getValues().get(6)); assertEquals (new MutableString("test"), mapper.getNames().get(7)); assertEquals ("This is some markup <that> is escaped The end.", normalize (mapper.getValues().get(7).toString())); } public final String INPUT = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\r\n" + "<!DOCTYPE test PUBLIC \"test\" \"no.dtd\">\r\n" + "<test xmlns=\"http://lux.net/#test\" id=\"test\">\r\n" + "<!-- this is a comment -->\r\n" + "<x:title xmlns:x=\"http://lux.net{test}\">TEST</x:title>\r\n" + "<entities>&>0</entities>\r\n" + "<![CDATA[This is some markup <that> is escaped]]>\r\n" + "<?process this ?>\r\n" + "<entities xmlns=\"#2\" xmlns:y=\"#y\" y:y=\"y\" id=\"2\">ģé</entities>" + "<y:y xmlns:y=\"#z\" />\r\n" + "<token> 12345678</token>\r\n" + " The end.\r\n" + "</test>\r\n"; /** * This test ensures that we correctly process namespace information when sending events * to the Saxon XmlStreamWriter. At one point this failed due to lack of namespace * declarations for all of the prefixes. * @throws SaxonApiException * @throws XMLStreamException */ @Test public void testSerialize() throws SaxonApiException, XMLStreamException { Processor processor = new Processor (false); DocumentBuilder builder = processor.newDocumentBuilder(); SaxonDocBuilder streamBuilder = new SaxonDocBuilder(processor); XmlReader reader = new XmlReader(); reader.addHandler(streamBuilder); InputStream testInput = getClass().getResourceAsStream("/lux/reader-test-ns.xml"); reader.read (new InputStreamReader (testInput)); XdmNode doc = streamBuilder.getDocument(); net.sf.saxon.s9api.Serializer outputter = new net.sf.saxon.s9api.Serializer(); XdmSequenceIterator iter = doc.axisIterator(Axis.DESCENDANT); iter.next(); // skip the root element while (iter.hasNext()) { XdmNode e = (XdmNode) iter.next(); if (e.getNodeKind() != XdmNodeKind.ELEMENT) { continue; } String speech = outputter.serializeNodeToString(e); System.out.println (speech); builder.build(new StreamSource (new StringReader(speech))); } } private void handleDocument(StAXHandler handler, String path) throws XMLStreamException { handleDocument(handler, path, false); } private void handleDocument(StAXHandler handler, String path, boolean stripNamespaces) throws XMLStreamException { InputStream in = getClass().getClassLoader().getResourceAsStream (path); URL url = getClass().getClassLoader().getResource(path); XmlReader xmlReader = new XmlReader (); xmlReader.setStripNamespaces(stripNamespaces); xmlReader.addHandler(handler); xmlReader.read(in, url.getPath()); } private String normalize (String s) { return s == null ? null : s.replaceAll ("\\s+", " ").trim(); } } /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */