/** * Copyright (c) 2009 Juwi MacMillan Group GmbH * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.tizzit.util.xml; import java.util.Stack; import org.w3c.dom.*; import org.xml.sax.*; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.Locator; import org.xml.sax.ext.LexicalHandler; /** * This class takes SAX events (in addition to some extra events that SAX doesn't handle yet) and adds the result to a * document or document fragment. * * @xsl.usage general */ public class DOMBuilder implements ContentHandler, LexicalHandler { /** Root document */ private Document doc; /** Current node */ private Node currNode = null; /** First node of document fragment or null if not a DocumentFragment */ private DocumentFragment docFrag = null; /** Vector of element nodes */ private Stack elemStack = new Stack(); /** * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment. * * @param doc Root document * @param node Current node */ public DOMBuilder(Document doc, Node node) { this.doc = doc; this.currNode = node; } /** * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment. * * @param doc Root document * @param docFrag Document fragment */ public DOMBuilder(Document doc, DocumentFragment docFrag) { this.doc = doc; this.docFrag = docFrag; } /** * DOMBuilder instance constructor... it will add the DOM nodes to the document. * * @param doc Root document */ public DOMBuilder(Document doc) { this.doc = doc; } /** * Get the root node of the DOM being created. This is either a Document or a DocumentFragment. * * @return The root document or document fragment if not null */ public Node getRootNode() { return (null != docFrag) ? (Node) docFrag : (Node) getDoc(); } /** * Get the node currently being processed. * * @return the current node being processed */ public Node getCurrentNode() { return this.currNode; } /** * Return null since there is no Writer for this class. * * @return null */ public java.io.Writer getWriter() { return null; } /** * Append a node to the current container. * * @param newNode New node to append */ protected void append(Node newNode) throws org.xml.sax.SAXException { Node currentNode = currNode; if (null != currentNode) { currentNode.appendChild(newNode); } else if (null != docFrag) { docFrag.appendChild(newNode); } else { boolean ok = true; short type = newNode.getNodeType(); if (type == Node.TEXT_NODE) { String data = newNode.getNodeValue(); if ((null != data) && (data.trim().length() > 0)) { throw new org.xml.sax.SAXException("ER_CANT_OUTPUT_TEXT_BEFORE_DOC"); //"Warning: can't output text before document element! // Ignoring..."); } ok = false; } else if (type == Node.ELEMENT_NODE) { if (getDoc().getDocumentElement() != null) { throw new org.xml.sax.SAXException("ER_CANT_HAVE_MORE_THAN_ONE_ROOT"); //"Can't have more than one root on a DOM!"); } } if (ok) getDoc().appendChild(newNode); } } /** * Receive an object for locating the origin of SAX document events. * <p> * SAX parsers are strongly encouraged (though not absolutely required) to supply a locator: if it does so, it must * supply the locator to the application by invoking this method before invoking any of the other methods in the * ContentHandler interface. * </p> * <p> * The locator allows the application to determine the end position of any document-related event, even if the * parser is not reporting an error. Typically, the application will use this information for reporting its own * errors (such as character content that does not match an application's business rules). The information returned * by the locator is probably not sufficient for use with a search engine. * </p> * <p> * Note that the locator will return correct information only during the invocation of the events in this interface. * The application should not attempt to use it at any other time. * </p> * * @param locator An object that can return the location of any SAX document event. * @see org.xml.sax.Locator */ public void setDocumentLocator(Locator locator) { // No action for the moment. } /** * Receive notification of the beginning of a document. * <p> * The SAX parser will invoke this method only once, before any other methods in this interface or in DTDHandler * (except for setDocumentLocator). * </p> */ public void startDocument() throws org.xml.sax.SAXException { // No action for the moment. } /** * Receive notification of the end of a document. * <p> * The SAX parser will invoke this method only once, and it will be the last method invoked during the parse. The * parser shall not invoke this method until it has either abandoned parsing (because of an unrecoverable error) or * reached the end of input. * </p> */ public void endDocument() throws org.xml.sax.SAXException { // No action for the moment. } /** * Receive notification of the beginning of an element. * <p> * The Parser will invoke this method at the beginning of every element in the XML document; there will be a * corresponding endElement() event for every startElement() event (even when the element is empty). All of the * element's content will be reported, in order, before the corresponding endElement() event. * </p> * <p> * If the element name has a namespace prefix, the prefix will still be attached. Note that the attribute list * provided will contain only attributes with explicit values (specified or defaulted): #IMPLIED attributes will be * omitted. * </p> * * @param ns The namespace of the node * @param localName The local part of the qualified name * @param name The element name. * @param atts The attributes attached to the element, if any. * @see #endElement * @see org.xml.sax.Attributes */ public void startElement(String ns, String localName, String name, Attributes atts) throws org.xml.sax.SAXException { Element elem; // Note that the namespace-aware call must be used to correctly // construct a Level 2 DOM, even for non-namespaced nodes. if ((null == ns) || (ns.length() == 0)) { elem = getDoc().createElementNS(null, name); } else { elem = getDoc().createElementNS(ns, name); } append(elem); try { int nAtts = atts.getLength(); if (0 != nAtts) { for (int i = 0; i < nAtts; i++) { // First handle a possible ID attribute if (atts.getType(i).equalsIgnoreCase("ID")) setIDAttribute(atts.getValue(i), elem); String attrNS = atts.getURI(i); if ("".equals(attrNS)) attrNS = null; // DOM represents no-namespace as null // Crimson won't let us set an xmlns: attribute on the DOM. String attrQName = atts.getQName(i); // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns // namespace if (attrQName.startsWith("xmlns:")) attrNS = "http://www.w3.org/2000/xmlns/"; // ALWAYS use the DOM Level 2 call! elem.setAttributeNS(attrNS, attrQName, atts.getValue(i)); } } // append(elem); elemStack.push(elem); currNode = elem; // append(elem); } catch (Exception de) { throw new SAXException(de); } } /** * Receive notification of the end of an element. * <p> * The SAX parser will invoke this method at the end of every element in the XML document; there will be a * corresponding startElement() event for every endElement() event (even when the element is empty). * </p> * <p> * If the element name has a namespace prefix, the prefix will still be attached to the name. * </p> * * @param ns the namespace of the element * @param localName The local part of the qualified name of the element * @param name The element name */ public void endElement(String ns, String localName, String name) throws org.xml.sax.SAXException { elemStack.pop(); currNode = elemStack.isEmpty() ? null : (Node) elemStack.peek(); } /** * Set an ID string to node association in the ID table. * * @param id The ID string. * @param elem The associated ID. */ public void setIDAttribute(String id, Element elem) { // Do nothing. This method is meant to be overiden. } /** * Receive notification of character data. * <p> * The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous * character data in a single chunk, or they may split it into several chunks; however, all of the characters in any * single event must come from the same external entity, so that the Locator provides useful information. * </p> * <p> * The application must not attempt to read from the array outside of the specified range. * </p> * <p> * Note that some parsers will report whitespace using the ignorableWhitespace() method rather than this one * (validating parsers must do so). * </p> * * @param ch The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @see #ignorableWhitespace * @see org.xml.sax.Locator */ public void characters(char[] ch, int start, int length) throws SAXException { if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid // DOM006 // Hierarchy // request // error if (inCData) { cdata(ch, start, length); return; } String s = new String(ch, start, length); Node childNode; childNode = currNode != null ? currNode.getLastChild() : null; if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) { ((Text) childNode).appendData(s); } else { Text text = getDoc().createTextNode(s); append(text); } } /** * If available, when the disable-output-escaping attribute is used, output raw text without escaping. A PI will be * inserted in front of the node with the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom". * * @param ch Array containing the characters * @param start Index to start of characters in the array * @param length Number of characters in the array */ public void charactersRaw(char[] ch, int start, int length) throws org.xml.sax.SAXException { if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid // DOM006 // Hierarchy // request // error String s = new String(ch, start, length); append(getDoc().createProcessingInstruction("xslt-next-is-raw", "formatter-to-dom")); append(getDoc().createTextNode(s)); } /** * Report the beginning of an entity. The start and end of the document entity are not reported. The start and end * of the external DTD subset are reported using the pseudo-name "[dtd]". All other events must be properly nested * within start/end entity events. * * @param name The name of the entity. If it is a parameter entity, the name will begin with '%'. * @see #endEntity * @see org.xml.sax.ext.DeclHandler#internalEntityDecl * @see org.xml.sax.ext.DeclHandler#externalEntityDecl */ public void startEntity(String name) throws org.xml.sax.SAXException { // Almost certainly the wrong behavior... // entityReference(name); } /** * Report the end of an entity. * * @param name The name of the entity that is ending. * @see #startEntity */ public void endEntity(String name) throws org.xml.sax.SAXException { } /** * Receive notivication of a entityReference. * * @param name name of the entity reference */ public void entityReference(String name) throws org.xml.sax.SAXException { append(getDoc().createEntityReference(name)); } /** * Receive notification of ignorable whitespace in element content. * <p> * Validating Parsers must use this method to report each chunk of ignorable whitespace (see the W3C XML 1.0 * recommendation, section 2.10): non-validating parsers may also use this method if they are capable of parsing and * using content models. * </p> * <p> * SAX parsers may return all contiguous whitespace in a single chunk, or they may split it into several chunks; * however, all of the characters in any single event must come from the same external entity, so that the Locator * provides useful information. * </p> * <p> * The application must not attempt to read from the array outside of the specified range. * </p> * * @param ch The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @see #characters */ public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException { if (isOutsideDocElem()) return; // avoid DOM006 Hierarchy request error String s = new String(ch, start, length); append(getDoc().createTextNode(s)); } /** * Tell if the current node is outside the document element. * * @return true if the current node is outside the document element. */ private boolean isOutsideDocElem() { return (null == docFrag) && elemStack.size() == 0 && (null == currNode || currNode.getNodeType() == Node.DOCUMENT_NODE); } /** * Receive notification of a processing instruction. * <p> * The Parser will invoke this method once for each processing instruction found: note that processing instructions * may occur before or after the main document element. * </p> * <p> * A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text declaration (XML 1.0, * section 4.3.1) using this method. * </p> * * @param target The processing instruction target. * @param data The processing instruction data, or null if none was supplied. */ public void processingInstruction(String target, String data) throws org.xml.sax.SAXException { append(getDoc().createProcessingInstruction(target, data)); } /** * Report an XML comment anywhere in the document. This callback will be used for comments inside or outside the * document element, including comments in the external DTD subset (if read). * * @param ch An array holding the characters in the comment. * @param start The starting position in the array. * @param length The number of characters to use from the array. */ public void comment(char[] ch, int start, int length) throws org.xml.sax.SAXException { append(getDoc().createComment(new String(ch, start, length))); } /** Flag indicating that we are processing a CData section */ private boolean inCData = false; /** * Report the start of a CDATA section. * * @see #endCDATA */ public void startCDATA() throws org.xml.sax.SAXException { inCData = true; append(getDoc().createCDATASection("")); } /** * Report the end of a CDATA section. * * @see #startCDATA */ public void endCDATA() throws org.xml.sax.SAXException { inCData = false; } /** * Receive notification of cdata. * <p> * The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous * character data in a single chunk, or they may split it into several chunks; however, all of the characters in any * single event must come from the same external entity, so that the Locator provides useful information. * </p> * <p> * The application must not attempt to read from the array outside of the specified range. * </p> * <p> * Note that some parsers will report whitespace using the ignorableWhitespace() method rather than this one * (validating parsers must do so). * </p> * * @param ch The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @see #ignorableWhitespace * @see org.xml.sax.Locator */ public void cdata(char[] ch, int start, int length) throws org.xml.sax.SAXException { if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid // DOM006 // Hierarchy // request // error String s = new String(ch, start, length); CDATASection section = (CDATASection) currNode.getLastChild(); section.appendData(s); } /** * Report the start of DTD declarations, if any. Any declarations are assumed to be in the internal subset unless * otherwise indicated. * * @param name The document type name. * @param publicId The declared public identifier for the external DTD subset, or null if none was declared. * @param systemId The declared system identifier for the external DTD subset, or null if none was declared. * @see #endDTD * @see #startEntity */ public void startDTD(String name, String publicId, String systemId) throws org.xml.sax.SAXException { // Do nothing for now. } /** * Report the end of DTD declarations. * * @see #startDTD */ public void endDTD() throws org.xml.sax.SAXException { // Do nothing for now. } /** * Begin the scope of a prefix-URI Namespace mapping. * <p> * The information from this event is not necessary for normal Namespace processing: the SAX XML reader will * automatically replace prefixes for element and attribute names when the http://xml.org/sax/features/namespaces * feature is true (the default). * </p> * <p> * There are cases, however, when applications need to use prefixes in character data or in attribute values, where * they cannot safely be expanded automatically; the start/endPrefixMapping event supplies the information to the * application to expand prefixes in those contexts itself, if necessary. * </p> * <p> * Note that start/endPrefixMapping events are not guaranteed to be properly nested relative to each-other: all * startPrefixMapping events will occur before the corresponding startElement event, and all endPrefixMapping events * will occur after the corresponding endElement event, but their order is not guaranteed. * </p> * * @param prefix The Namespace prefix being declared. * @param uri The Namespace URI the prefix is mapped to. * @see #endPrefixMapping * @see #startElement */ public void startPrefixMapping(String prefix, String uri) throws org.xml.sax.SAXException { /* * // Not sure if this is needed or wanted // Also, it fails in the stree. if((null != m_currentNode) && * (m_currentNode.getNodeType() == Node.ELEMENT_NODE)) { String qname; if(((null != prefix) && (prefix.length() == * 0)) || (null == prefix)) qname = "xmlns"; else qname = "xmlns:"+prefix; Element elem = * (Element)m_currentNode; String val = elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == * null) { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname, uri); } } */ } /** * End the scope of a prefix-URI mapping. * <p> * See startPrefixMapping for details. This event will always occur after the corresponding endElement event, but * the order of endPrefixMapping events is not otherwise guaranteed. * </p> * * @param prefix The prefix that was being mapping. * @see #startPrefixMapping * @see #endElement */ public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException { } /** * Receive notification of a skipped entity. * <p> * The Parser will invoke this method once for each entity skipped. Non-validating processors may skip entities if * they have not seen the declarations (because, for example, the entity was declared in an external DTD subset). * All processors may skip external entities, depending on the values of the * http://xml.org/sax/features/external-general-entities and the * http://xml.org/sax/features/external-parameter-entities properties. * </p> * * @param name The name of the skipped entity. If it is a parameter entity, the name will begin with '%'. */ public void skippedEntity(String name) throws org.xml.sax.SAXException { } /** * @return Returns the doc. */ public Document getDoc() { return doc; } /** * Copied from the Xalan-J_2_7_0 sources. * Returns true if the string is whitespace. * * @param ch Character array to check as XML whitespace. * @param start Start index of characters in the array * @param length Number of characters in the array * * @return True if the characters in the array are XML whitespace; otherwise, false. */ private static boolean isWhiteSpace(char[] ch, int start, int length) { int end = start + length; for (int s = start; s < end; s++) { if (!isWhiteSpace(ch[s])) return false; } return true; } /** * Copied from the Xalan-J_2_7_0 sources. * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> * the definition of <CODE>S</CODE></A> for details. * * @param ch Character to check as XML whitespace. * * @return =true if <var>ch</var> is XML whitespace; otherwise =false. */ private static boolean isWhiteSpace(char ch) { return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA); } }