/* * Copyright 2016 Christoph Böhme * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.culturegraph.mf.metamorph.xml; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.FileNotFoundException; import java.net.MalformedURLException; import java.net.URL; import org.culturegraph.mf.commons.ResourceUtil; import org.culturegraph.mf.framework.MetafactureException; import org.junit.Ignore; import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; /** * Tests for class {@link DomLoader}. * * @author Christoph Böhme * */ public final class DomLoaderTest { private static final String BASE_PATH = "org/culturegraph/mf/metamorph/xml/dom-loader/"; private static final String SCHEMA_FILE = BASE_PATH + "test-schema.xsd"; @Test public void shouldCreateDOM() throws FileNotFoundException, MalformedURLException { final String inputFile = "should-create-dom.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // We assume DOM creation worked if the root node of the // document is found in the generated DOM: final Node rootNode = document.getDocumentElement(); assertEquals(Node.ELEMENT_NODE, rootNode.getNodeType()); assertEquals("test-schema", rootNode.getNodeName()); } @Test(expected=MetafactureException.class) public void shouldValidateInputAgainstSchema() throws FileNotFoundException, MalformedURLException { final String inputFile = "should-validate-input-against-schema.xml"; DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // The input document contains an element not allowed by // the test-schema. If validation works an exception // should be thrown by the parser when attempting to // parse the document. } @Test public void domShouldNotContainWhitespaceOnlyTextNodes() throws FileNotFoundException, MalformedURLException { final String inputFile = "dom-should-not-contain-whitespace-only-text-nodes.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // The root element should not contain any text nodes representing // whitespace but only an element node for the <string-element> // child element: final NodeList nodes1 = document.getDocumentElement().getChildNodes(); assertEquals(1, nodes1.getLength()); assertEquals(Node.ELEMENT_NODE, nodes1.item(0).getNodeType()); // The element node for the <string-element> should not contain // any nodes after whitespace has been removed: final NodeList nodes2 = nodes1.item(0).getChildNodes(); assertEquals(0, nodes2.getLength()); } @Test public void domShouldNotContainComments() throws FileNotFoundException, MalformedURLException { final String inputFile = "dom-should-not-contain-comments.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // The root element only contains a comment. As this comment // should not be included in the DOM, the root node should // have no children: final NodeList nodes = document.getDocumentElement().getChildNodes(); assertEquals(0, nodes.getLength()); } @Test public void shouldConvertAndAttachCDataNodesToTextNodes() throws FileNotFoundException, MalformedURLException { final String inputFile = "should-convert-and-attach-cdata-nodes-to-text-nodes.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // The CDATA section in the input file and the surrounding // PCDATA text should be coalesced in a single text // node in the DOM: final NodeList nodes = document.getDocumentElement().getFirstChild().getChildNodes(); assertEquals(1, nodes.getLength()); assertEquals(Node.TEXT_NODE, nodes.item(0).getNodeType()); assertEquals("pcdata-cdata-pcdata", nodes.item(0).getNodeValue()); } @Test public void shouldBeXIncludeAware() throws FileNotFoundException, MalformedURLException { final String inputFile = "should-be-xinclude-aware1.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); // The input file only contains an xinclude statement. The // included file contains an element of type string-element. // If inclusion worked a node representing the <string-element> // should be found in the DOM: final Node stringElement = document.getDocumentElement().getFirstChild(); assertEquals(Node.ELEMENT_NODE, stringElement.getNodeType()); assertEquals("string-element", stringElement.getNodeName()); } @Test public void shouldAnnotateDomWithLocationInformation() throws FileNotFoundException, MalformedURLException { final String inputFile = "should-annotate-dom-with-location-information.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); final Node rootNode = document.getDocumentElement(); final Location location1 = (Location) rootNode.getUserData(Location.USER_DATA_ID); assertTrue(location1.getSystemId().endsWith(inputFile)); assertEquals(3, location1.getElementStart().getLineNumber()); assertEquals(57, location1.getElementStart().getColumnNumber()); assertEquals(5, location1.getElementEnd().getLineNumber()); assertEquals(15, location1.getElementEnd().getColumnNumber()); final Node stringElement = document.getDocumentElement().getFirstChild(); final Location location2 = (Location) stringElement.getUserData(Location.USER_DATA_ID); assertTrue(location2.getSystemId().endsWith(inputFile)); assertEquals(4, location2.getElementStart().getLineNumber()); assertEquals(18, location2.getElementStart().getColumnNumber()); assertEquals(4, location2.getElementEnd().getLineNumber()); assertEquals(39, location2.getElementEnd().getColumnNumber()); } // This test case does not currently succeed on openJDK (version 1.7.0.60) due // to a bug in the Xerces implementation used by the JDK. This bug was fixed // in release 2.9.1 of Xerces. A bug report for updating openJDK exists: // https://bugs.openjdk.java.net/browse/JDK-8038043 @Ignore @Test public void shouldAnnotateIncludedFilesCorrectly() throws FileNotFoundException, MalformedURLException { final String baseName = "should-annotate-included-files-correctly"; final String inputFile = baseName + "1.xml"; final Document document = DomLoader.parse(SCHEMA_FILE, openStream(inputFile)); final Node rootNode = document.getDocumentElement(); final Location location1 = (Location) rootNode.getUserData(Location.USER_DATA_ID); assertTrue(location1.getSystemId().endsWith(inputFile)); assertEquals(4, location1.getElementStart().getLineNumber()); assertEquals(46, location1.getElementStart().getColumnNumber()); assertEquals(7, location1.getElementEnd().getLineNumber()); assertEquals(15, location1.getElementEnd().getColumnNumber()); final Node stringElement = document.getDocumentElement().getFirstChild(); final Location location2 = (Location) stringElement.getUserData(Location.USER_DATA_ID); assertTrue(location2.getSystemId().endsWith(baseName + "2.xml")); assertEquals(3, location2.getElementStart().getLineNumber()); assertEquals(62, location2.getElementStart().getColumnNumber()); assertEquals(3, location2.getElementEnd().getLineNumber()); assertEquals(62, location2.getElementEnd().getColumnNumber()); } private static InputSource openStream(final String resource) throws FileNotFoundException, MalformedURLException { final URL resourceUrl = ResourceUtil.getUrl(BASE_PATH + resource); return new InputSource(resourceUrl.toExternalForm()); } }