/** * BSD-style license; for more info see http://pmd.sourceforge.net/license.html */ package net.sourceforge.pmd.lang.xml; import static org.junit.Assert.assertNotNull; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.util.Iterator; import java.util.Locale; import org.apache.commons.io.IOUtils; import org.junit.Assert; import org.junit.Test; import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; import net.sourceforge.pmd.lang.Parser; import net.sourceforge.pmd.lang.ast.Node; import net.sourceforge.pmd.lang.ast.xpath.Attribute; import net.sourceforge.pmd.lang.xml.ast.XmlNode; import net.sourceforge.pmd.lang.xml.ast.XmlParser; import net.sourceforge.pmd.util.StringUtil; /** * Unit test for the {@link XmlParser}. */ public class XmlParserTest { private static final String XML_TEST = "<?xml version=\"1.0\"?>\n" + "<!DOCTYPE rootElement\n" + "[\n" + "<!ELEMENT rootElement (child1,child2)>\n" + "<!ELEMENT child1 (#PCDATA)>\n" + "<!ATTLIST child1 test CDATA #REQUIRED>\n" + "<!ELEMENT child2 (#PCDATA)>\n" + "\n" + "<!ENTITY pmd \"Copyright: PMD\">\n" + "]\n" + ">\n" + "<rootElement>\n" + " <!-- that's a comment -->\n" + " <child1 test=\"1\">entity: &pmd;\n" + " </child1>\n" + " <child2>\n" + " <![CDATA[ cdata section ]]>\n" + " </child2>\n" + "</rootElement>"; private static final String XML_NAMESPACE_TEST = "<?xml version=\"1.0\"?>\n" + "<pmd:rootElement xmlns:pmd=\"http://pmd.sf.net\">\n" + " <!-- that's a comment -->\n" + " <pmd:child1 test=\"1\">entity: &\n" + " </pmd:child1>\n" + " <pmd:child2>\n" + " <![CDATA[ cdata section ]]>\n" + " </pmd:child2>\n" + "</pmd:rootElement>"; private static final String XML_INVALID_WITH_DTD = "<?xml version=\"1.0\"?>\n" + "<!DOCTYPE rootElement\n" + "[\n" + "<!ELEMENT rootElement (child)>\n" + "<!ELEMENT child (#PCDATA)>\n" + "]\n" + ">\n" + "<rootElement>\n" + " <invalidChild></invalidChild>\n" + "</rootElement>"; /** * See bug #1054: XML Rules ever report a line -1 and not the line/column * where the error occurs * * @throws Exception * any error */ @Test public void testLineNumbers() throws Exception { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions()); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); assertLineNumbers(document, 1, 1, 19, 14); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); assertLineNumbers(dtdElement, 2, 1, 11, 1); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 7); assertLineNumbers(rootElement, 12, 1, 19, 14); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertLineNumbers(rootElement.jjtGetChild(0), 12, 14, 13, 4); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertLineNumbers(rootElement.jjtGetChild(1), 13, 5, 13, 29); assertTextNode(rootElement.jjtGetChild(2), "\\n "); assertLineNumbers(rootElement.jjtGetChild(2), 13, 30, 14, 4); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "child1", 1, "test", "1"); assertLineNumbers(child1, 14, 5, 15, 13); assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n "); assertLineNumbers(child1.jjtGetChild(0), 14, 22, 15, 4); assertTextNode(rootElement.jjtGetChild(4), "\\n "); assertLineNumbers(rootElement.jjtGetChild(4), 15, 14, 16, 4); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "child2", 3); assertLineNumbers(child2, 16, 5, 18, 13); assertTextNode(child2.jjtGetChild(0), "\\n "); assertLineNumbers(child2.jjtGetChild(0), 16, 13, 17, 6); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertLineNumbers(child2.jjtGetChild(1), 17, 7, 17, 33); assertTextNode(child2.jjtGetChild(2), "\\n "); assertLineNumbers(child2.jjtGetChild(2), 17, 34, 18, 4); assertTextNode(rootElement.jjtGetChild(6), "\\n"); assertLineNumbers(rootElement.jjtGetChild(6), 18, 14, 18, 14); } /** * Verifies the default parsing behavior of the XML parser. */ @Test public void testDefaultParsing() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions()); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 7); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n "); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); assertTextNode(rootElement.jjtGetChild(6), "\\n"); } /** * Verifies the parsing behavior of the XML parser with coalescing enabled. */ @Test public void testParsingCoalescingEnabled() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setCoalescing(true); Parser parser = xmlVersionHandler.getParser(parserOptions); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 7); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n "); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "child2", 1); assertTextNode(child2.jjtGetChild(0), "\\n cdata section \\n "); assertTextNode(rootElement.jjtGetChild(6), "\\n"); } /** * Verifies the parsing behavior of the XML parser if entities are not * expanded. */ @Test public void testParsingDoNotExpandEntities() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setExpandEntityReferences(false); Parser parser = xmlVersionHandler.getParser(parserOptions); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 7); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "child1", 3, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: "); assertNode(child1.jjtGetChild(1), "pmd", 0); assertTextNode(child1.jjtGetChild(2), "Copyright: PMD\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n "); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); assertTextNode(rootElement.jjtGetChild(6), "\\n"); } /** * Verifies the parsing behavior of the XML parser if ignoring comments. */ @Test public void testParsingIgnoreComments() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setIgnoringComments(true); Parser parser = xmlVersionHandler.getParser(parserOptions); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 5); assertTextNode(rootElement.jjtGetChild(0), "\\n \\n "); Node child1 = rootElement.jjtGetChild(1); assertNode(child1, "child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n "); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child2 = rootElement.jjtGetChild(3); assertNode(child2, "child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n"); } /** * Verifies the parsing behavior of the XML parser if ignoring whitespaces * in elements. */ @Test public void testParsingIgnoreElementContentWhitespace() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setIgnoringElementContentWhitespace(true); Parser parser = xmlVersionHandler.getParser(parserOptions); Node document = parser.parse(null, new StringReader(XML_TEST)); assertNode(document, "document", 2); Node dtdElement = document.jjtGetChild(0); assertNode(dtdElement, "rootElement", 0); Node rootElement = document.jjtGetChild(1); assertNode(rootElement, "rootElement", 3); assertNode(rootElement.jjtGetChild(0), "comment", 0); Node child1 = rootElement.jjtGetChild(1); assertNode(child1, "child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n "); Node child2 = rootElement.jjtGetChild(2); assertNode(child2, "child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); } /** * Verifies the default parsing behavior of the XML parser with namespaces. */ @Test public void testDefaultParsingNamespaces() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions()); Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST)); assertNode(document, "document", 1); Node rootElement = document.jjtGetChild(0); assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net"); Assert.assertEquals("http://pmd.sf.net", ((XmlNode) rootElement).getNode().getNamespaceURI()); Assert.assertEquals("pmd", ((XmlNode) rootElement).getNode().getPrefix()); Assert.assertEquals("rootElement", ((XmlNode) rootElement).getNode().getLocalName()); Assert.assertEquals("pmd:rootElement", ((XmlNode) rootElement).getNode().getNodeName()); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "pmd:child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: &\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n "); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "pmd:child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); assertTextNode(rootElement.jjtGetChild(6), "\\n"); } /** * Verifies the default parsing behavior of the XML parser with namespaces * but not namespace aware. */ @Test public void testParsingNotNamespaceAware() { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setNamespaceAware(false); Parser parser = xmlVersionHandler.getParser(parserOptions); Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST)); assertNode(document, "document", 1); Node rootElement = document.jjtGetChild(0); assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net"); Assert.assertNull(((XmlNode) rootElement).getNode().getNamespaceURI()); Assert.assertNull(((XmlNode) rootElement).getNode().getPrefix()); Assert.assertNull(((XmlNode) rootElement).getNode().getLocalName()); Assert.assertEquals("pmd:rootElement", ((XmlNode) rootElement).getNode().getNodeName()); assertTextNode(rootElement.jjtGetChild(0), "\\n "); assertNode(rootElement.jjtGetChild(1), "comment", 0); assertTextNode(rootElement.jjtGetChild(2), "\\n "); Node child1 = rootElement.jjtGetChild(3); assertNode(child1, "pmd:child1", 1, "test", "1"); assertTextNode(child1.jjtGetChild(0), "entity: &\\n "); assertTextNode(rootElement.jjtGetChild(4), "\\n "); Node child2 = rootElement.jjtGetChild(5); assertNode(child2, "pmd:child2", 3); assertTextNode(child2.jjtGetChild(0), "\\n "); assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section"); assertTextNode(child2.jjtGetChild(2), "\\n "); assertTextNode(rootElement.jjtGetChild(6), "\\n"); } /** * Verifies the parsing behavior of the XML parser with validation on. * * @throws UnsupportedEncodingException * error */ @Test public void testParsingWithValidation() throws UnsupportedEncodingException { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions parserOptions = new XmlParserOptions(); parserOptions.setValidating(true); Parser parser = xmlVersionHandler.getParser(parserOptions); PrintStream oldErr = System.err; Locale oldLocale = Locale.getDefault(); try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); System.setErr(new PrintStream(bos)); Locale.setDefault(Locale.ENGLISH); Node document = parser.parse(null, new StringReader(XML_INVALID_WITH_DTD)); Assert.assertNotNull(document); String output = bos.toString("UTF-8"); Assert.assertTrue(output.contains("Element type \"invalidChild\" must be declared.")); Assert.assertTrue(output.contains("The content of element type \"rootElement\" must match \"(child)\".")); Assert.assertEquals(2, document.jjtGetNumChildren()); Assert.assertEquals("invalidChild", String.valueOf(document.jjtGetChild(1).jjtGetChild(1))); } finally { System.setErr(oldErr); Locale.setDefault(oldLocale); } } @Test public void testWithProcessingInstructions() { String xml = "<?xml version=\"1.0\"?><?mypi?><!DOCTYPE testDoc [<!ENTITY myentity \"e\">]><!--Comment--><foo abc=\"abc\"><bar>TEXT</bar><![CDATA[cdata!]]>>&myentity;<</foo>"; LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions options = (XmlParserOptions) xmlVersionHandler.getDefaultParserOptions(); options.setExpandEntityReferences(false); Parser parser = xmlVersionHandler.getParser(options); Node document = parser.parse(null, new StringReader(xml)); Assert.assertNotNull(document); assertNode(document.jjtGetChild(0), "mypi", 0); assertLineNumbers(document.jjtGetChild(0), 1, 22, 1, 29); } private Node parseXml(String xml) { LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler(); XmlParserOptions options = (XmlParserOptions) xmlVersionHandler.getDefaultParserOptions(); Parser parser = xmlVersionHandler.getParser(options); Node document = parser.parse(null, new StringReader(xml)); return document; } @Test public void testBug1518() throws Exception { String xml = IOUtils.toString(XmlParserTest.class.getResourceAsStream("parsertests/bug1518.xml")); Node document = parseXml(xml); assertNotNull(document); } /** * Asserts a single node inclusive attributes. * * @param node * the node * @param toString * the to String representation to expect * @param childs * number of childs * @param atts * attributes - each object pair forms one attribute: first name, * then value. */ private void assertNode(Node node, String toString, int childs, Object... atts) { Assert.assertEquals(toString, String.valueOf(node)); Assert.assertEquals(childs, node.jjtGetNumChildren()); Iterator<Attribute> attributeIterator = ((XmlNode) node).getAttributeIterator(); if (atts != null) { for (int i = 0; i < atts.length; i += 2) { Assert.assertTrue(attributeIterator.hasNext()); String name = String.valueOf(atts[i]); Object value = atts[i + 1]; Attribute attribute = attributeIterator.next(); Assert.assertEquals(name, attribute.getName()); Assert.assertEquals(value, attribute.getValue()); } } Assert.assertFalse(attributeIterator.hasNext()); } /** * Assert a single text node. * * @param node * the node to check * @param text * the text to expect */ private void assertTextNode(Node node, String text) { assertTextNode(node, text, "text"); } /** * Assert a single text node. * * @param node * the node to check * @param text * the text to expect * @param toString * the to string representation */ private void assertTextNode(Node node, String text, String toString) { Assert.assertEquals(toString, String.valueOf(node)); Assert.assertEquals(0, node.jjtGetNumChildren()); Assert.assertEquals(text, StringUtil.escapeWhitespace(node.getImage())); Iterator<Attribute> attributeIterator = ((XmlNode) node).getAttributeIterator(); Assert.assertTrue(attributeIterator.hasNext()); Attribute attribute = attributeIterator.next(); Assert.assertEquals("Image", attribute.getName()); Assert.assertEquals(text, StringUtil.escapeWhitespace(attribute.getValue())); Assert.assertFalse(attributeIterator.hasNext()); } /** * Assert the line numbers of a node. * * @param node * the node * @param beginLine * the begin line * @param beginColumn * the begin column * @param endLine * the end line * @param endColumn * the end column */ private void assertLineNumbers(Node node, int beginLine, int beginColumn, int endLine, int endColumn) { Assert.assertEquals("begin line wrong", beginLine, node.getBeginLine()); Assert.assertEquals("begin column wrong", beginColumn, node.getBeginColumn()); Assert.assertEquals("end line wrong", endLine, node.getEndLine()); Assert.assertEquals("end column wrong", endColumn, node.getEndColumn()); } }