package net.sourceforge.dita4publishers.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.Collection; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeSet; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.URIResolver; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPathFactory; import net.sourceforge.dita4publishers.api.bos.BosMemberValidationException; import net.sourceforge.dita4publishers.impl.bos.BosConstructionOptions; import org.apache.log4j.Logger; import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; import org.apache.xerces.parsers.DOMParser; import org.apache.xerces.parsers.XIncludeAwareParserConfiguration; import org.apache.xerces.util.XMLCatalogResolver; import org.apache.xerces.xni.grammars.XMLGrammarPool; import org.apache.xerces.xni.parser.XMLParserConfiguration; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import com.sun.org.apache.xml.internal.serialize.OutputFormat; import com.sun.org.apache.xml.internal.serialize.XMLSerializer; /** * Utilities for constructing W3C DOMs. */ public class DomUtil { static Logger logger = Logger.getLogger(DomUtil.class); /** * Constructs a DOM from the specified XML document file. * @param xmlFile Document entity of the file to load. * @param throwExceptionIfInvalid * @return The DOM Document object for the document. * @throws DomException * @throws FileNotFoundException * @throws BosMemberValidationException * @throws UnsupportedEncodingException */ public static Document getDomForDocument(File xmlFile, BosConstructionOptions domOptions, boolean throwExceptionIfInvalid) throws DomException, FileNotFoundException, BosMemberValidationException, UnsupportedEncodingException { InputSource source = new InputSource(new FileInputStream(xmlFile)); source.setSystemId(xmlFile.toURI().toString()); return getDomForSource(source, domOptions, throwExceptionIfInvalid); } public static Document getDomForDocument(File xmlFile, BosConstructionOptions domOptions) throws DomException, FileNotFoundException, BosMemberValidationException, UnsupportedEncodingException { return getDomForDocument(xmlFile, domOptions, false); } /** * Constructs a DOM from the specified XML document file. * @param xmlResource Document entity of the file to load. * @param bosOptions * @return The DOM Document object for the document. * @throws DomException * @throws IOException * @throws MalformedURLException * @throws BosMemberValidationException * @throws FileNotFoundException */ public static Document getDomForUri(URI xmlResource, BosConstructionOptions bosOptions) throws DomException, MalformedURLException, IOException, BosMemberValidationException { InputSource source = new InputSource(xmlResource.toURL().openStream()); source.setSystemId(xmlResource.toString()); return getDomForSource(source, bosOptions, false); } /** * Constructs a DOM from the specified XML document file. * @param stream InputStream containing the document data to be parsed. * @param bosOptions * @return The DOM Document object for the document. * @throws DomException * @throws BosMemberValidationException * @throws UnsupportedEncodingException */ public static Document getDomForStream(InputStream stream, BosConstructionOptions bosOptions) throws DomException, BosMemberValidationException, UnsupportedEncodingException { return getDomForSource(new InputSource(stream), bosOptions, false, true); } /** * Constructs a DOM from the specified XML document file. * @param stream InputStream containing the document data to be parsed. * @param bosOptions * @return The DOM Document object for the document. * @throws DomException * @throws BosMemberValidationException * @throws UnsupportedEncodingException */ public static Document getDomForStream(InputStream stream, BosConstructionOptions bosOptions, boolean validate) throws DomException, BosMemberValidationException, UnsupportedEncodingException { return getDomForSource(new InputSource(stream), bosOptions, false, validate); } /** * Constructs a DOM from the specified XML document file. * @param throwExceptionIfInvalid * @param source InputSource containing the data to be parsed. * @param bosOptions * @return The DOM Document object for the document. * @throws DomException * @throws BosMemberValidationException * @throws UnsupportedEncodingException */ public static Document getDomForSource(InputSource source, BosConstructionOptions bosOptions, boolean throwExceptionIfInvalid) throws DomException, BosMemberValidationException, UnsupportedEncodingException { return getDomForSource(source, bosOptions, throwExceptionIfInvalid, true); } /** * Constructs a DOM from the specified XML document file. * @param throwExceptionIfInvalid * @param source InputSource containing the data to be parsed. * @param bosOptions * @return The DOM Document object for the document. * @throws DomException * @throws BosMemberValidationException * @throws UnsupportedEncodingException */ public static Document getDomForSource(InputSource source, BosConstructionOptions bosOptions, boolean throwExceptionIfInvalid, boolean validate) throws DomException, BosMemberValidationException, UnsupportedEncodingException { URI docUri = null; try { String sysId = source.getSystemId(); if (sysId != null) { docUri = new URI(URLEncoder.encode(sysId, "utf-8")); } } catch (URISyntaxException e) { throw new DomException("Exception constructing URI from system ID \"" + source.getSystemId() + "\" for document source: " + e.getMessage(), e); } if (bosOptions.getDomCache().containsKey(docUri)) { logger.debug("getDomForSource(): Found DOM for \"" + source.getSystemId() + "\" in DOM cache, returning it."); return bosOptions.getDomCache().get(docUri); } logger.debug("getDomForSource(): Parsing input source \"" + source.getSystemId() + "\"..."); XMLCatalogResolver resolver = new XMLCatalogResolver(); String[] catalogs = bosOptions.getCatalogs(); String catalogList = System.getProperty("xml.catalog.files"); if (catalogList != null && !"".equals(catalogList)) { StringTokenizer tokens = new StringTokenizer(catalogList, ";"); if (tokens.hasMoreTokens()) { catalogs = new String[tokens.countTokens()]; int i = 0; while (tokens.hasMoreTokens()) { catalogs[i++] = tokens.nextToken(); } } } XMLGrammarPool grammarPool = bosOptions.getGrammarPool(); if (grammarPool == null) { grammarPool = GrammarPoolManager.getGrammarPool(); } resolver.setCatalogList(catalogs); DOMParser dp = null; try { XMLParserConfiguration config = new XIncludeAwareParserConfiguration(); config.setProperty("http://apache.org/xml/properties/internal/grammar-pool", grammarPool); dp = new org.apache.xerces.parsers.DOMParser(config); dp.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true); dp.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false); dp.setFeature("http://xml.org/sax/features/validation", validate); dp.setProperty("http://apache.org/xml/properties/internal/entity-resolver", resolver); dp.parse(source); } catch (Exception e) { try { bosOptions.registerInvalidDocument(source.getSystemId()); if (throwExceptionIfInvalid) { throw new BosMemberValidationException("BOS member is not valid: " + source.getSystemId()); } } catch (URISyntaxException e1) { throw new RuntimeException("Unexpected URI syntax exception from system ID \"" + source.getSystemId() + "\": " + e1.getMessage()); } throw new DomException(e); } finally { // if (errorHandler.getGotError()) // try { // domOptions.registerInvalidDocument(source.getSystemId()); // if (throwExceptionIfInvalid) { // throw new BosMemberValidationException(0, "BOS member is not valid: " + source.getSystemId()); // } // } catch (URISyntaxException e1) { // throw new RuntimeException("Unexpected URI syntax exception from system ID \"" + source.getSystemId() + "\": " + e1.getMessage()); // } } Document doc = dp.getDocument(); if (docUri != null) { logger.debug("getDomForSource(): Adding document \"" + docUri.toString() + "\" to DOM cache."); bosOptions.getDomCache().put(docUri, doc); } logger.debug("getDomForSource(): Returning DOM for document \"" + source.getSystemId() + "\""); return doc; } /** * Given an element node and a namespace URI, returns the local prefix associated with that * namespace (that is, the nearest ancestor or self element that declares the namespace). * @param element The element to check. * @param nsURI The URI of the namespace whose prefix you want. * @return Returns the prefix or null if the namespace is not declared. */ public static String getNamespacePrefix(Element element, String nsURI) { NamedNodeMap atts = element.getAttributes(); for (int i = 0; i < atts.getLength(); i++) { Attr att = (Attr)(atts.item(i)); String prefix = att.getPrefix(); if (prefix != null && prefix.equals("xmlns")) { if (att.getValue().equals(nsURI)) { return att.getLocalName(); } } } return null; } /** * Given an element, returns the list of all unique namespace URIs used in the element * tree rooted at that element. * @param elem Element to calculate the namespaces used. */ public static Collection<String> getDocumentNamespaces(Element elem) { Set<String> namespaces = new TreeSet<String>(); getSubtreeNamespaces(elem, namespaces); return namespaces; } /** * Recursive method to get all the namespaces within a subtree of elements. * @param elem * @param namespaces */ public static void getSubtreeNamespaces(Element elem, Set<String> namespaces) { DomUtil.getElementNamespaces(elem, namespaces); if (elem.hasChildNodes()) { NodeList childs = elem.getChildNodes(); for (int i = 0; i < childs.getLength(); i++) { Node child = childs.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { getSubtreeNamespaces((Element)child, namespaces); } } } } /** * Gets the namespaces declared on the element and adds them to the specified set. * @param element The element to examine. * @param namespaces A set to which new namespace URIs will be added. */ public static void getElementNamespaces(Element element, Set<String> namespaces) { NamedNodeMap atts = element.getAttributes(); for (int i = 0; i < atts.getLength(); i++) { Attr att = (Attr)(atts.item(i)); String prefix = att.getPrefix(); if (prefix != null && prefix.equals("xmlns")) { String nsURI = att.getValue(); namespaces.add(nsURI); } else { if (att.getName().equals("xmlns")) { namespaces.add(att.getValue()); } } } } /** * Given a schemaLocation attribute value, replaces the URI for a given namespace * with the specified new URI. * @param originalAttVal * @param schemaUri * @param newSchemaLoc * @return */ public static String UpdateSchemaLocationValue(String originalAttVal, String schemaUri, String newSchemaLoc) { StringBuffer resultVal = new StringBuffer(); StringTokenizer tokenizer = new StringTokenizer(originalAttVal, " "); while (tokenizer.hasMoreTokens()) { String nsName = tokenizer.nextToken(); String schemaLoc = tokenizer.nextToken(); // Will throw a runtime exception if the attribute value is not a sequence of pairs. if (nsName.equals(schemaUri)) { schemaLoc = newSchemaLoc; } resultVal.append(nsName + " " + schemaLoc + " "); } return resultVal.toString(); } /** * Uses a null transform to serialize a DOM into an input stream using the default encoding. * @param doc Document to be serialized. * @return InputStream on the serialized bytes. * @throws Exception */ public static InputStream serializeToInputStream( Document doc) throws Exception { return serializeToInputStream(doc, null); } /** * Uses a null transform to serialize a DOM into an input stream. * @param doc Document to be serialized. * @param encoding Encoding to serialize to. If null, encoding is utf-8 * @return InputStream on the serialized bytes. * @throws Exception */ public static InputStream serializeToInputStream( Document doc, String encoding) throws Exception { if (encoding == null) encoding = "utf-8"; ByteArrayOutputStream bos = new ByteArrayOutputStream(); javax.xml.transform.Result rslt = new StreamResult(bos); serializeToResult(doc, encoding, rslt); return new ByteArrayInputStream(bos.toByteArray()); } /** * @param doc * @param encoding * @param bos * @param rslt * @return * @throws Exception */ public static void serializeToResult(Document doc, String encoding, javax.xml.transform.Result rslt) throws Exception { javax.xml.transform.Source src = new DOMSource(doc); try { Transformer transformer = getTransformerFactory().newTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING, encoding); transformer.setOutputProperty (OutputKeys.OMIT_XML_DECLARATION, "no"); // Set a property on the transformer if the caller wants a doctype and // the node's owning document has the information if (doc.getDoctype() != null) { DocumentType doctype = doc.getDoctype(); if (doctype.getPublicId() != null) transformer.setOutputProperty (OutputKeys.DOCTYPE_PUBLIC, doctype.getPublicId()); if (doctype.getSystemId() != null) transformer.setOutputProperty (OutputKeys.DOCTYPE_SYSTEM, doctype.getSystemId()); } transformer.transform(src, rslt); } catch (TransformerException e) { throw new RuntimeException("Cannot serialize document: ", e); } } /** * Serialize document to XML string. * @param doc Document node. * @return XML string. */ public static String serializeToString( Document doc ) { if (doc == null) { return null; } else { StringWriter stringOut = new StringWriter(); //Writer will be a String; try { OutputFormat format = new OutputFormat(doc); //Serialize DOM format.setOmitXMLDeclaration(false); format.setOmitDocumentType(false); format.setIndenting(false); format.setPreserveSpace(true); format.setEncoding("UTF-8"); XMLSerializer serial = new XMLSerializer( stringOut, format ); serial.asDOMSerializer(); // As a DOM Serializer serial.serialize(doc); } catch (IOException e) {} return stringOut.toString(); } } /** * Gets a transformer factory using the default URI resolver. * @return transformer factory instance. * @throws RSuiteException */ public static TransformerFactory getTransformerFactory() throws Exception { return getTransformerFactory(null); } /** * Gets a transformer fractory configured using the specified URI resolver. * @param uriResolver URI resolver to use with the factory. If null, default URI resolver is used. * @return transformer factory instance. * @throws RSuiteException */ public static TransformerFactory getTransformerFactory(URIResolver uriResolver) throws Exception { TransformerFactory factory = new net.sf.saxon.TransformerFactoryImpl(); // TransformerFactory.newInstance(); // Replace the default URI resolver with a chained resolver // that calls our internal resolver first, and then falls // back to the secondary resolver (set from the original value). if (uriResolver != null) factory.setURIResolver(uriResolver); return factory; } public static Document getNewDom() throws ParserConfigurationException { DocumentBuilder builder = DocumentBuilderFactoryImpl.newInstance().newDocumentBuilder(); return builder.newDocument(); } public static XPathFactory getXPathFactory(){ return new net.sf.saxon.xpath.XPathFactoryImpl(); } }