/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.culturegraph.mf.metamorph.xml;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.culturegraph.mf.commons.ResourceUtil;
import org.culturegraph.mf.framework.MetafactureException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
* Helper to load DOM {@link Document}s.
*
* @author Markus Michael Geipel
* @author Christoph Böhme
*
*/
public final class DomLoader {
private static final ErrorHandler SAX_ERROR_HANDLER =
new SaxErrorHandler();
private static final ErrorListener TRANSFORMER_ERROR_HANDLER =
new TransformerErrorHandler();
private DomLoader() {
throw new AssertionError("No instances allowed");
}
public static Document parse(String schemaFile, InputSource input) {
final Document document = createEmptyDocument();
final XMLReader pipeline = createXmlFilterPipeline(schemaFile, document);
process(new SAXSource(pipeline, input), new DOMResult(document));
// Xerces does not use the XSD schema for deciding whether
// whitespace is ignorable (it requires a DTD for this).
// Since we do not use a DTD we have to use a different
// method to remove ignorable whitespace.
//
// Note that this method does not only remove ignorable
// whitespace but all text nodes containing only whitespace.
removeEmptyTextNodes(document);
return document;
}
private static Document createEmptyDocument() {
try {
return DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument();
} catch (ParserConfigurationException e) {
throw new MetafactureException(e);
}
}
private static XMLReader createXmlFilterPipeline(String schemaFile,
Document document) {
XMLReader pipelineHead = createSaxReader(loadSchema(schemaFile));
pipelineHead = new LocationAnnotator(pipelineHead, document);
pipelineHead = new IgnorableWhitespaceFilter(pipelineHead);
pipelineHead = new CommentsFilter(pipelineHead);
pipelineHead = new CDataFilter(pipelineHead);
pipelineHead.setErrorHandler(SAX_ERROR_HANDLER);
return pipelineHead;
}
private static Schema loadSchema(String schemaFile) {
try {
return SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
.newSchema(getSchemaUrl(schemaFile));
} catch (SAXException e) {
throw new MetafactureException(e);
}
}
private static URL getSchemaUrl(String schemaFile) {
try {
return ResourceUtil.getUrl(schemaFile);
} catch (final MalformedURLException e) {
throw new MetafactureException("'" + schemaFile + "' not found:", e);
}
}
private static XMLReader createSaxReader(Schema schema) {
final SAXParserFactory parserFactory = SAXParserFactory.newInstance();
parserFactory.setSchema(schema);
parserFactory.setNamespaceAware(true);
parserFactory.setXIncludeAware(true);
try {
return parserFactory.newSAXParser().getXMLReader();
} catch (ParserConfigurationException | SAXException e) {
throw new MetafactureException(e);
}
}
private static void process(Source source, Result result) {
final Transformer transformer = createTransformer();
try {
transformer.transform(source, result);
} catch (TransformerException e) {
throw new MetafactureException(e);
}
}
private static Transformer createTransformer() {
try {
final Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setErrorListener(TRANSFORMER_ERROR_HANDLER);
return transformer;
} catch (TransformerConfigurationException e) {
throw new MetafactureException(e);
}
}
private static void removeEmptyTextNodes(final Node node) {
Node child = node.getFirstChild();
while (child != null) {
if (child.getNodeType() == Node.TEXT_NODE) {
final Node old = child;
child = child.getNextSibling();
if(old.getNodeValue().trim().isEmpty()) {
node.removeChild(old);
}
} else {
removeEmptyTextNodes(child);
child = child.getNextSibling();
}
}
}
/**
* Error handler for the SAX parser.
*
*/
private static class SaxErrorHandler implements ErrorHandler {
SaxErrorHandler() {
// Avoid synthetic accessor methods
}
@Override
public void warning(final SAXParseException exception) throws SAXException {
handle(exception);
}
@Override
public void fatalError(final SAXParseException exception) throws SAXException {
handle(exception);
}
@Override
public void error(final SAXParseException exception) throws SAXException {
handle(exception);
}
private void handle(final SAXParseException exception) {
throw new MetafactureException("Error parsing xml: " +
exception.getMessage(), exception);
}
}
/**
* Error handler for the transformer.
*
*/
private static class TransformerErrorHandler implements ErrorListener {
TransformerErrorHandler() {
// Avoid synthetic accessor methods
}
@Override
public void warning(final TransformerException exception)
throws TransformerException {
handle(exception);
}
@Override
public void error(final TransformerException exception)
throws TransformerException {
handle(exception);
}
@Override
public void fatalError(final TransformerException exception)
throws TransformerException {
handle(exception);
}
private void handle(final TransformerException exception) {
throw new MetafactureException("Error during DOM creation: " +
exception.getMessage(), exception);
}
}
}