/**
* Copyright (C) 2010 Orbeon, Inc.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation; either version
* 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
*/
package org.orbeon.oxf.xml;
import orbeon.apache.xerces.impl.Constants;
import orbeon.apache.xerces.impl.XMLEntityManager;
import orbeon.apache.xerces.impl.XMLErrorReporter;
import orbeon.apache.xerces.xni.parser.XMLInputSource;
import org.apache.log4j.Logger;
import org.orbeon.oxf.common.OXFException;
import org.orbeon.oxf.common.ValidationException;
import org.orbeon.oxf.processor.URIProcessorOutputImpl;
import org.orbeon.oxf.processor.transformer.TransformerURIResolver;
import org.orbeon.oxf.resources.URLFactory;
import org.orbeon.oxf.util.StringUtils;
import org.orbeon.oxf.util.SequenceReader;
import org.orbeon.oxf.xml.dom4j.LocationData;
import org.orbeon.oxf.xml.xerces.XercesSAXParserFactoryImpl;
import org.w3c.dom.Document;
import org.xml.sax.*;
import javax.xml.parsers.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
public class XMLParsing {
private static Logger logger = Logger.getLogger(XMLParsing.class);
public static final EntityResolver ENTITY_RESOLVER = new EntityResolver();
public static final ErrorHandler ERROR_HANDLER = new ErrorHandler();
private static final ContentHandler NULL_CONTENT_HANDLER = new XMLReceiverAdapter();
private static final DocumentBuilderFactory documentBuilderFactory;
private static Map<Thread, DocumentBuilder> documentBuilders = null;
private static Map<String, SAXParserFactory> parserFactories = new HashMap<String, SAXParserFactory>();
public static class ParserConfiguration {
public final boolean validating;
public final boolean handleXInclude;
public final boolean externalEntities;
public final URIProcessorOutputImpl.URIReferences uriReferences;
public ParserConfiguration(boolean validating, boolean handleXInclude, boolean externalEntities) {
this(validating, handleXInclude, externalEntities, null);
}
public ParserConfiguration(boolean validating, boolean handleXInclude, boolean externalEntities, URIProcessorOutputImpl.URIReferences uriReferences) {
this.validating = validating;
this.handleXInclude = handleXInclude;
this.externalEntities = externalEntities;
this.uriReferences = uriReferences;
}
public ParserConfiguration(ParserConfiguration parserConfiguration, URIProcessorOutputImpl.URIReferences uriReferences) {
this.validating = parserConfiguration.validating;
this.handleXInclude = parserConfiguration.handleXInclude;
this.externalEntities = parserConfiguration.externalEntities;
this.uriReferences = uriReferences;
}
public String getKey() {
return (validating ? "1" : "0") + (handleXInclude ? "1" : "0") + (externalEntities ? "1" : "0");
}
public static final ParserConfiguration PLAIN = new ParserConfiguration(false, false, false);
public static final ParserConfiguration XINCLUDE_ONLY = new ParserConfiguration(false, true, false);
}
static {
try {
// Create factory
documentBuilderFactory = (DocumentBuilderFactory) Class.forName
("orbeon.apache.xerces.jaxp.DocumentBuilderFactoryImpl").newInstance();
// Configure factory
documentBuilderFactory.setNamespaceAware(true);
} catch (Exception e) {
throw new OXFException(e);
}
}
private XMLParsing() {}
/**
* Create a new DocumentBuilder.
*
* WARNING: Check how this is used in this file first before calling!
*/
private static DocumentBuilder newDocumentBuilder() {
synchronized (documentBuilderFactory) {
try {
return documentBuilderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new OXFException(e);
}
}
}
/**
* Create a new SAX parser factory.
*
* WARNING: Use this only in special cases. In general, use newSAXParser().
*/
public static SAXParserFactory createSAXParserFactory(ParserConfiguration parserConfiguration) {
try {
return new XercesSAXParserFactoryImpl(parserConfiguration);
} catch (Exception e) {
throw new OXFException(e);
}
}
/**
* Get a SAXParserFactory to build combinations of validating and XInclude-aware SAXParser.
*
* @param parserConfiguration parser configuration
* @return the SAXParserFactory
*/
public static synchronized SAXParserFactory getSAXParserFactory(ParserConfiguration parserConfiguration) {
final String key = parserConfiguration.getKey();
final SAXParserFactory existingFactory = parserFactories.get(key);
if (existingFactory != null)
return existingFactory;
final SAXParserFactory newFactory = createSAXParserFactory(parserConfiguration);
parserFactories.put(key, newFactory);
return newFactory;
}
/**
* Create a new SAXParser, which can be a combination of validating and/or XInclude-aware.
*
* @param parserConfiguration parser configuration
* @return the SAXParser
*/
public static synchronized SAXParser newSAXParser(ParserConfiguration parserConfiguration) {
try {
return getSAXParserFactory(parserConfiguration).newSAXParser();
} catch (Exception e) {
throw new OXFException(e);
}
}
public static XMLReader newXMLReader(ParserConfiguration parserConfiguration) {
final SAXParser saxParser = XMLParsing.newSAXParser(parserConfiguration);
try {
final XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setEntityResolver(XMLParsing.ENTITY_RESOLVER);
xmlReader.setErrorHandler(XMLParsing.ERROR_HANDLER);
return xmlReader;
} catch (Exception e) {
throw new OXFException(e);
}
}
/**
* Given an input stream, return a reader. This performs encoding detection as per the XML spec. Caller must close
* the resulting Reader when done.
*
* @param inputStream InputStream to process
* @return Reader initialized with the proper encoding
* @throws IOException
*/
public static Reader getReaderFromXMLInputStream(InputStream inputStream) throws IOException {
// Create a Xerces XMLInputSource
final XMLInputSource inputSource = new XMLInputSource(null, null, null, inputStream, null);
// Obtain encoding from Xerces
final XMLEntityManager entityManager = new XMLEntityManager();
entityManager.setProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY, new XMLErrorReporter());// prevent NPE by providing this
entityManager.setupCurrentEntity("[xml]", inputSource, false, true);// the result is the encoding, but we don't use it directly
return entityManager.getCurrentEntity().reader;
}
public static class EntityResolver implements org.xml.sax.EntityResolver {
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
final InputSource is = new InputSource();
is.setSystemId(systemId);
is.setPublicId(publicId);
final URL url = URLFactory.createURL(systemId);
// Would be nice to support XML Catalogs or similar here. See:
// http://xerces.apache.org/xerces2-j/faq-xcatalogs.html
if (url.getProtocol().equals("http")) {
logger.warn("XML entity resolver for public id: " + publicId + " is accessing external entity via HTTP: " + url.toExternalForm());
}
is.setByteStream(url.openStream());
return is;
}
}
public static class ErrorHandler implements org.xml.sax.ErrorHandler {
public void error(SAXParseException exception) throws SAXException {
// NOTE: We used to throw here, but we probably shouldn't.
logger.info("Error: " + exception);
}
public void fatalError(SAXParseException exception) throws SAXException {
throw new ValidationException("Fatal error: " + exception.getMessage(), new LocationData(exception));
}
public void warning(SAXParseException exception) throws SAXException {
logger.info("Warning: " + exception);
}
}
public static Document createDocument() {
return getThreadDocumentBuilder().newDocument();
}
public static Document stringToDOM(String xml) {
try {
return getThreadDocumentBuilder().parse(new InputSource(new StringReader(xml)));
} catch (SAXException e) {
throw new OXFException(e);
} catch (IOException e) {
throw new OXFException(e);
}
}
/**
* Parse a string into SAX events. If the string is empty or only contains white space, output an empty document.
*
* @param xml XML string
* @param urlString URL of the document, or null
* @param xmlReceiver receiver to output to
* @param parserConfiguration parser configuration
* @param handleLexical whether the XML parser must output SAX LexicalHandler events, including comments
*/
public static void stringToSAX(String xml, String urlString, XMLReceiver xmlReceiver, ParserConfiguration parserConfiguration, boolean handleLexical) {
if (StringUtils.trimAllToEmpty(xml).equals("")) {
try {
xmlReceiver.startDocument();
xmlReceiver.endDocument();
} catch (SAXException e) {
throw new OXFException(e);
}
} else {
readerToSAX(new StringReader(xml), urlString, xmlReceiver, parserConfiguration, handleLexical);
}
}
/**
* Read a URL into SAX events.
*
* @param urlString URL of the document
* @param xmlReceiver receiver to output to
* @param parserConfiguration parser configuration
* @param handleLexical whether the XML parser must output SAX LexicalHandler events, including comments
*/
public static void urlToSAX(String urlString, XMLReceiver xmlReceiver, ParserConfiguration parserConfiguration, boolean handleLexical) {
try {
final URL url = URLFactory.createURL(urlString);
final InputStream is = url.openStream();
final InputSource inputSource = new InputSource(is);
inputSource.setSystemId(urlString);
try {
inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical);
} finally {
is.close();
}
} catch (IOException e) {
throw new OXFException(e);
}
}
public static void inputStreamToSAX(InputStream inputStream, String urlString, XMLReceiver xmlReceiver, ParserConfiguration parserConfiguration, boolean handleLexical) {
final InputSource inputSource = new InputSource(inputStream);
inputSource.setSystemId(urlString);
inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical);
}
public static void readerToSAX(Reader reader, String urlString, XMLReceiver xmlReceiver, ParserConfiguration parserConfiguration, boolean handleLexical) {
final InputSource inputSource = new InputSource(reader);
inputSource.setSystemId(urlString);
inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical);
}
private static void inputSourceToSAX(InputSource inputSource, XMLReceiver xmlReceiver, ParserConfiguration parserConfiguration, boolean handleLexical) {
// Insert XInclude processor if needed
final TransformerURIResolver resolver;
if (parserConfiguration.handleXInclude) {
parserConfiguration = new ParserConfiguration(parserConfiguration.validating, false, parserConfiguration.externalEntities, parserConfiguration.uriReferences);
resolver = new TransformerURIResolver(ParserConfiguration.PLAIN);
xmlReceiver = new XIncludeReceiver(null, xmlReceiver, parserConfiguration.uriReferences, resolver);
} else {
resolver = null;
}
try {
final XMLReader xmlReader = newSAXParser(parserConfiguration).getXMLReader();
xmlReader.setContentHandler(xmlReceiver);
if (handleLexical)
xmlReader.setProperty(XMLConstants.SAX_LEXICAL_HANDLER, xmlReceiver);
xmlReader.setEntityResolver(ENTITY_RESOLVER);
xmlReader.setErrorHandler(ERROR_HANDLER);
xmlReader.parse(inputSource);
} catch (SAXParseException e) {
throw new ValidationException(e.getMessage(), new LocationData(e));
} catch (Exception e) {
throw new OXFException(e);
} finally {
if (resolver != null)
resolver.destroy();
}
}
/**
* Return whether the given string contains well-formed XML.
*
* @param xmlString string to check
* @return true iif the given string contains well-formed XML
*/
public static boolean isWellFormedXML(String xmlString) {
// Empty string is never well-formed XML
if (StringUtils.trimAllToEmpty(xmlString).length() == 0)
return false;
try {
final XMLReader xmlReader = newSAXParser(ParserConfiguration.PLAIN).getXMLReader();
xmlReader.setContentHandler(NULL_CONTENT_HANDLER);
xmlReader.setEntityResolver(ENTITY_RESOLVER);
xmlReader.setErrorHandler(new org.xml.sax.ErrorHandler() {
public void error(SAXParseException exception) throws SAXException {
throw exception;
}
public void fatalError(SAXParseException exception) throws SAXException {
throw exception;
}
public void warning(SAXParseException exception) throws SAXException {
}
});
xmlReader.parse(new InputSource(new StringReader(xmlString)));
return true;
} catch (Exception e) {
// Ideally we would like the parser to not throw as this is time-consuming, but not sure how to achieve that
return false;
}
}
/**
* Associated one DocumentBuilder per thread. This is so we avoid synchronizing (parse() for
* example may take a lot of time on a DocumentBuilder) or creating DocumentBuilder instances
* all the time. Since typically in an app server we work with a thread pool, not too many
* instances of DocumentBuilder should be created.
*/
private static DocumentBuilder getThreadDocumentBuilder() {
Thread thread = Thread.currentThread();
DocumentBuilder documentBuilder = (documentBuilders == null) ? null : documentBuilders.get(thread);
// Try a first test outside the synchronized block
if (documentBuilder == null) {
synchronized (documentBuilderFactory) {
// Redo the test within the synchronized block
documentBuilder = (documentBuilders == null) ? null : documentBuilders.get(thread);
if (documentBuilder == null) {
if (documentBuilders == null)
documentBuilders = new HashMap<Thread, DocumentBuilder>();
documentBuilder = newDocumentBuilder();
documentBuilders.put(thread, documentBuilder);
}
}
}
return documentBuilder;
}
public static void parseDocumentFragment(Reader reader, XMLReceiver xmlReceiver) throws SAXException {
try {
final XMLReader xmlReader = newSAXParser(ParserConfiguration.PLAIN).getXMLReader();
xmlReader.setContentHandler(new XMLFragmentReceiver(xmlReceiver));
final ArrayList<Reader> readers = new ArrayList<Reader>(3);
readers.add(new StringReader("<root>"));
readers.add(reader);
readers.add(new StringReader("</root>"));
xmlReader.parse(new InputSource(new SequenceReader(readers.iterator())));
} catch (IOException e) {
throw new OXFException(e);
}
}
public static void parseDocumentFragment(String fragment, XMLReceiver xmlReceiver) throws SAXException {
if (fragment.contains("<") || fragment.contains("&")) {
try {
final XMLReader xmlReader = newSAXParser(ParserConfiguration.PLAIN).getXMLReader();
xmlReader.setContentHandler(new XMLFragmentReceiver(xmlReceiver));
xmlReader.parse(new InputSource(new StringReader("<root>" + fragment + "</root>")));
} catch (IOException e) {
throw new OXFException(e);
}
} else {
// Optimization when fragment looks like text
xmlReceiver.characters(fragment.toCharArray(), 0, fragment.length());
}
}
private static class XMLFragmentReceiver extends ForwardingXMLReceiver {
private int elementCount = 0;
public XMLFragmentReceiver(XMLReceiver xmlReceiver) {
super(xmlReceiver);
}
public void startElement(String uri, String localname, String qName, Attributes attributes) throws SAXException {
elementCount++;
if (elementCount > 1)
super.startElement(uri, localname, qName, attributes);
}
public void endElement(String uri, String localname, String qName) throws SAXException {
elementCount--;
if (elementCount > 0)
super.endElement(uri, localname, qName);
}
public void startDocument() throws SAXException {}
public void endDocument() throws SAXException {}
}
}