DOMBuilder.java example

Explorer
tizzit-master
/**
 * Copyright (c) 2009 Juwi MacMillan Group GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tizzit.util.xml;

import java.util.Stack;

import org.w3c.dom.*;
import org.xml.sax.*;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.ext.LexicalHandler;

/**
 * This class takes SAX events (in addition to some extra events that SAX doesn't handle yet) and adds the result to a
 * document or document fragment.
 * 
 * @xsl.usage general
 */
public class DOMBuilder implements ContentHandler, LexicalHandler {
	/** Root document */
	private Document doc;

	/** Current node */
	private Node currNode = null;

	/** First node of document fragment or null if not a DocumentFragment */
	private DocumentFragment docFrag = null;

	/** Vector of element nodes */
	private Stack elemStack = new Stack();

	/**
	 * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment.
	 * 
	 * @param doc Root document
	 * @param node Current node
	 */
	public DOMBuilder(Document doc, Node node) {
		this.doc = doc;
		this.currNode = node;
	}

	/**
	 * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment.
	 * 
	 * @param doc Root document
	 * @param docFrag Document fragment
	 */
	public DOMBuilder(Document doc, DocumentFragment docFrag) {
		this.doc = doc;
		this.docFrag = docFrag;
	}

	/**
	 * DOMBuilder instance constructor... it will add the DOM nodes to the document.
	 * 
	 * @param doc Root document
	 */
	public DOMBuilder(Document doc) {
		this.doc = doc;
	}

	/**
	 * Get the root node of the DOM being created. This is either a Document or a DocumentFragment.
	 * 
	 * @return The root document or document fragment if not null
	 */
	public Node getRootNode() {
		return (null != docFrag) ? (Node) docFrag : (Node) getDoc();
	}

	/**
	 * Get the node currently being processed.
	 * 
	 * @return the current node being processed
	 */
	public Node getCurrentNode() {
		return this.currNode;
	}

	/**
	 * Return null since there is no Writer for this class.
	 * 
	 * @return null
	 */
	public java.io.Writer getWriter() {
		return null;
	}

	/**
	 * Append a node to the current container.
	 * 
	 * @param newNode New node to append
	 */
	protected void append(Node newNode) throws org.xml.sax.SAXException {

		Node currentNode = currNode;

		if (null != currentNode) {
			currentNode.appendChild(newNode);
		} else if (null != docFrag) {
			docFrag.appendChild(newNode);
		} else {
			boolean ok = true;
			short type = newNode.getNodeType();

			if (type == Node.TEXT_NODE) {
				String data = newNode.getNodeValue();

				if ((null != data) && (data.trim().length() > 0)) { throw new org.xml.sax.SAXException("ER_CANT_OUTPUT_TEXT_BEFORE_DOC"); //"Warning: can't output text before document element!
				// Ignoring...");
				}

				ok = false;
			} else if (type == Node.ELEMENT_NODE) {
				if (getDoc().getDocumentElement() != null) { throw new org.xml.sax.SAXException("ER_CANT_HAVE_MORE_THAN_ONE_ROOT"); //"Can't have more than one root on a DOM!");
				}
			}

			if (ok) getDoc().appendChild(newNode);
		}
	}

	/**
	 * Receive an object for locating the origin of SAX document events.
	 * <p>
	 * SAX parsers are strongly encouraged (though not absolutely required) to supply a locator: if it does so, it must
	 * supply the locator to the application by invoking this method before invoking any of the other methods in the
	 * ContentHandler interface.
	 * </p>
	 * <p>
	 * The locator allows the application to determine the end position of any document-related event, even if the
	 * parser is not reporting an error. Typically, the application will use this information for reporting its own
	 * errors (such as character content that does not match an application's business rules). The information returned
	 * by the locator is probably not sufficient for use with a search engine.
	 * </p>
	 * <p>
	 * Note that the locator will return correct information only during the invocation of the events in this interface.
	 * The application should not attempt to use it at any other time.
	 * </p>
	 * 
	 * @param locator An object that can return the location of any SAX document event.
	 * @see org.xml.sax.Locator
	 */
	public void setDocumentLocator(Locator locator) {

		// No action for the moment.
	}

	/**
	 * Receive notification of the beginning of a document.
	 * <p>
	 * The SAX parser will invoke this method only once, before any other methods in this interface or in DTDHandler
	 * (except for setDocumentLocator).
	 * </p>
	 */
	public void startDocument() throws org.xml.sax.SAXException {

		// No action for the moment.
	}

	/**
	 * Receive notification of the end of a document.
	 * <p>
	 * The SAX parser will invoke this method only once, and it will be the last method invoked during the parse. The
	 * parser shall not invoke this method until it has either abandoned parsing (because of an unrecoverable error) or
	 * reached the end of input.
	 * </p>
	 */
	public void endDocument() throws org.xml.sax.SAXException {

		// No action for the moment.
	}

	/**
	 * Receive notification of the beginning of an element.
	 * <p>
	 * The Parser will invoke this method at the beginning of every element in the XML document; there will be a
	 * corresponding endElement() event for every startElement() event (even when the element is empty). All of the
	 * element's content will be reported, in order, before the corresponding endElement() event.
	 * </p>
	 * <p>
	 * If the element name has a namespace prefix, the prefix will still be attached. Note that the attribute list
	 * provided will contain only attributes with explicit values (specified or defaulted): #IMPLIED attributes will be
	 * omitted.
	 * </p>
	 * 
	 * @param ns The namespace of the node
	 * @param localName The local part of the qualified name
	 * @param name The element name.
	 * @param atts The attributes attached to the element, if any.
	 * @see #endElement
	 * @see org.xml.sax.Attributes
	 */
	public void startElement(String ns, String localName, String name, Attributes atts) throws org.xml.sax.SAXException {

		Element elem;

		// Note that the namespace-aware call must be used to correctly
		// construct a Level 2 DOM, even for non-namespaced nodes.
		if ((null == ns) || (ns.length() == 0)) {
			elem = getDoc().createElementNS(null, name);
		} else {
			elem = getDoc().createElementNS(ns, name);
		}
		append(elem);

		try {
			int nAtts = atts.getLength();

			if (0 != nAtts) {
				for (int i = 0; i < nAtts; i++) {
					// First handle a possible ID attribute
					if (atts.getType(i).equalsIgnoreCase("ID")) setIDAttribute(atts.getValue(i), elem);

					String attrNS = atts.getURI(i);

					if ("".equals(attrNS)) attrNS = null; // DOM represents no-namespace as null
					// Crimson won't let us set an xmlns: attribute on the DOM.
					String attrQName = atts.getQName(i);

					// In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns
					// namespace
					if (attrQName.startsWith("xmlns:")) attrNS = "http://www.w3.org/2000/xmlns/";

					// ALWAYS use the DOM Level 2 call!
					elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
				}
			}

			// append(elem);

			elemStack.push(elem);

			currNode = elem;

			// append(elem);
		} catch (Exception de) {
			throw new SAXException(de);
		}

	}

	/**
	 * Receive notification of the end of an element.
	 * <p>
	 * The SAX parser will invoke this method at the end of every element in the XML document; there will be a
	 * corresponding startElement() event for every endElement() event (even when the element is empty).
	 * </p>
	 * <p>
	 * If the element name has a namespace prefix, the prefix will still be attached to the name.
	 * </p>
	 * 
	 * @param ns the namespace of the element
	 * @param localName The local part of the qualified name of the element
	 * @param name The element name
	 */
	public void endElement(String ns, String localName, String name) throws org.xml.sax.SAXException {
		elemStack.pop();
		currNode = elemStack.isEmpty() ? null : (Node) elemStack.peek();
	}

	/**
	 * Set an ID string to node association in the ID table.
	 * 
	 * @param id The ID string.
	 * @param elem The associated ID.
	 */
	public void setIDAttribute(String id, Element elem) {

		// Do nothing. This method is meant to be overiden.
	}

	/**
	 * Receive notification of character data.
	 * <p>
	 * The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous
	 * character data in a single chunk, or they may split it into several chunks; however, all of the characters in any
	 * single event must come from the same external entity, so that the Locator provides useful information.
	 * </p>
	 * <p>
	 * The application must not attempt to read from the array outside of the specified range.
	 * </p>
	 * <p>
	 * Note that some parsers will report whitespace using the ignorableWhitespace() method rather than this one
	 * (validating parsers must do so).
	 * </p>
	 * 
	 * @param ch The characters from the XML document.
	 * @param start The start position in the array.
	 * @param length The number of characters to read from the array.
	 * @see #ignorableWhitespace
	 * @see org.xml.sax.Locator
	 */
	public void characters(char[] ch, int start, int length) throws SAXException {
		if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid
		// DOM006
		// Hierarchy
		// request
		// error

		if (inCData) {
			cdata(ch, start, length);

			return;
		}

		String s = new String(ch, start, length);
		Node childNode;
		childNode = currNode != null ? currNode.getLastChild() : null;
		if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
			((Text) childNode).appendData(s);
		} else {
			Text text = getDoc().createTextNode(s);
			append(text);
		}
	}

	/**
	 * If available, when the disable-output-escaping attribute is used, output raw text without escaping. A PI will be
	 * inserted in front of the node with the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
	 * 
	 * @param ch Array containing the characters
	 * @param start Index to start of characters in the array
	 * @param length Number of characters in the array
	 */
	public void charactersRaw(char[] ch, int start, int length) throws org.xml.sax.SAXException {
		if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid
		// DOM006
		// Hierarchy
		// request
		// error

		String s = new String(ch, start, length);

		append(getDoc().createProcessingInstruction("xslt-next-is-raw", "formatter-to-dom"));
		append(getDoc().createTextNode(s));
	}

	/**
	 * Report the beginning of an entity. The start and end of the document entity are not reported. The start and end
	 * of the external DTD subset are reported using the pseudo-name "[dtd]". All other events must be properly nested
	 * within start/end entity events.
	 * 
	 * @param name The name of the entity. If it is a parameter entity, the name will begin with '%'.
	 * @see #endEntity
	 * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
	 * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
	 */
	public void startEntity(String name) throws org.xml.sax.SAXException {

		// Almost certainly the wrong behavior...
		// entityReference(name);
	}

	/**
	 * Report the end of an entity.
	 * 
	 * @param name The name of the entity that is ending.
	 * @see #startEntity
	 */
	public void endEntity(String name) throws org.xml.sax.SAXException {
	}

	/**
	 * Receive notivication of a entityReference.
	 * 
	 * @param name name of the entity reference
	 */
	public void entityReference(String name) throws org.xml.sax.SAXException {
		append(getDoc().createEntityReference(name));
	}

	/**
	 * Receive notification of ignorable whitespace in element content.
	 * <p>
	 * Validating Parsers must use this method to report each chunk of ignorable whitespace (see the W3C XML 1.0
	 * recommendation, section 2.10): non-validating parsers may also use this method if they are capable of parsing and
	 * using content models.
	 * </p>
	 * <p>
	 * SAX parsers may return all contiguous whitespace in a single chunk, or they may split it into several chunks;
	 * however, all of the characters in any single event must come from the same external entity, so that the Locator
	 * provides useful information.
	 * </p>
	 * <p>
	 * The application must not attempt to read from the array outside of the specified range.
	 * </p>
	 * 
	 * @param ch The characters from the XML document.
	 * @param start The start position in the array.
	 * @param length The number of characters to read from the array.
	 * @see #characters
	 */
	public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException {
		if (isOutsideDocElem()) return; // avoid DOM006 Hierarchy request error
		String s = new String(ch, start, length);
		append(getDoc().createTextNode(s));
	}

	/**
	 * Tell if the current node is outside the document element.
	 * 
	 * @return true if the current node is outside the document element.
	 */
	private boolean isOutsideDocElem() {
		return (null == docFrag) && elemStack.size() == 0 && (null == currNode || currNode.getNodeType() == Node.DOCUMENT_NODE);
	}

	/**
	 * Receive notification of a processing instruction.
	 * <p>
	 * The Parser will invoke this method once for each processing instruction found: note that processing instructions
	 * may occur before or after the main document element.
	 * </p>
	 * <p>
	 * A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text declaration (XML 1.0,
	 * section 4.3.1) using this method.
	 * </p>
	 * 
	 * @param target The processing instruction target.
	 * @param data The processing instruction data, or null if none was supplied.
	 */
	public void processingInstruction(String target, String data) throws org.xml.sax.SAXException {
		append(getDoc().createProcessingInstruction(target, data));
	}

	/**
	 * Report an XML comment anywhere in the document. This callback will be used for comments inside or outside the
	 * document element, including comments in the external DTD subset (if read).
	 * 
	 * @param ch An array holding the characters in the comment.
	 * @param start The starting position in the array.
	 * @param length The number of characters to use from the array.
	 */
	public void comment(char[] ch, int start, int length) throws org.xml.sax.SAXException {
		append(getDoc().createComment(new String(ch, start, length)));
	}

	/** Flag indicating that we are processing a CData section */
	private boolean inCData = false;

	/**
	 * Report the start of a CDATA section.
	 * 
	 * @see #endCDATA
	 */
	public void startCDATA() throws org.xml.sax.SAXException {
		inCData = true;
		append(getDoc().createCDATASection(""));
	}

	/**
	 * Report the end of a CDATA section.
	 * 
	 * @see #startCDATA
	 */
	public void endCDATA() throws org.xml.sax.SAXException {
		inCData = false;
	}

	/**
	 * Receive notification of cdata.
	 * <p>
	 * The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous
	 * character data in a single chunk, or they may split it into several chunks; however, all of the characters in any
	 * single event must come from the same external entity, so that the Locator provides useful information.
	 * </p>
	 * <p>
	 * The application must not attempt to read from the array outside of the specified range.
	 * </p>
	 * <p>
	 * Note that some parsers will report whitespace using the ignorableWhitespace() method rather than this one
	 * (validating parsers must do so).
	 * </p>
	 * 
	 * @param ch The characters from the XML document.
	 * @param start The start position in the array.
	 * @param length The number of characters to read from the array.
	 * @see #ignorableWhitespace
	 * @see org.xml.sax.Locator
	 */
	public void cdata(char[] ch, int start, int length) throws org.xml.sax.SAXException {
		if (isOutsideDocElem() && isWhiteSpace(ch, start, length)) return; // avoid
		// DOM006
		// Hierarchy
		// request
		// error

		String s = new String(ch, start, length);

		CDATASection section = (CDATASection) currNode.getLastChild();
		section.appendData(s);
	}

	/**
	 * Report the start of DTD declarations, if any. Any declarations are assumed to be in the internal subset unless
	 * otherwise indicated.
	 * 
	 * @param name The document type name.
	 * @param publicId The declared public identifier for the external DTD subset, or null if none was declared.
	 * @param systemId The declared system identifier for the external DTD subset, or null if none was declared.
	 * @see #endDTD
	 * @see #startEntity
	 */
	public void startDTD(String name, String publicId, String systemId) throws org.xml.sax.SAXException {

		// Do nothing for now.
	}

	/**
	 * Report the end of DTD declarations.
	 * 
	 * @see #startDTD
	 */
	public void endDTD() throws org.xml.sax.SAXException {

		// Do nothing for now.
	}

	/**
	 * Begin the scope of a prefix-URI Namespace mapping.
	 * <p>
	 * The information from this event is not necessary for normal Namespace processing: the SAX XML reader will
	 * automatically replace prefixes for element and attribute names when the http://xml.org/sax/features/namespaces
	 * feature is true (the default).
	 * </p>
	 * <p>
	 * There are cases, however, when applications need to use prefixes in character data or in attribute values, where
	 * they cannot safely be expanded automatically; the start/endPrefixMapping event supplies the information to the
	 * application to expand prefixes in those contexts itself, if necessary.
	 * </p>
	 * <p>
	 * Note that start/endPrefixMapping events are not guaranteed to be properly nested relative to each-other: all
	 * startPrefixMapping events will occur before the corresponding startElement event, and all endPrefixMapping events
	 * will occur after the corresponding endElement event, but their order is not guaranteed.
	 * </p>
	 * 
	 * @param prefix The Namespace prefix being declared.
	 * @param uri The Namespace URI the prefix is mapped to.
	 * @see #endPrefixMapping
	 * @see #startElement
	 */
	public void startPrefixMapping(String prefix, String uri) throws org.xml.sax.SAXException {

		/*
		 * // Not sure if this is needed or wanted // Also, it fails in the stree. if((null != m_currentNode) &&
		 * (m_currentNode.getNodeType() == Node.ELEMENT_NODE)) { String qname; if(((null != prefix) && (prefix.length() ==
		 * 0)) || (null == prefix)) qname = "xmlns"; else qname = "xmlns:"+prefix; Element elem =
		 * (Element)m_currentNode; String val = elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val ==
		 * null) { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname, uri); } }
		 */
	}

	/**
	 * End the scope of a prefix-URI mapping.
	 * <p>
	 * See startPrefixMapping for details. This event will always occur after the corresponding endElement event, but
	 * the order of endPrefixMapping events is not otherwise guaranteed.
	 * </p>
	 * 
	 * @param prefix The prefix that was being mapping.
	 * @see #startPrefixMapping
	 * @see #endElement
	 */
	public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
	}

	/**
	 * Receive notification of a skipped entity.
	 * <p>
	 * The Parser will invoke this method once for each entity skipped. Non-validating processors may skip entities if
	 * they have not seen the declarations (because, for example, the entity was declared in an external DTD subset).
	 * All processors may skip external entities, depending on the values of the
	 * http://xml.org/sax/features/external-general-entities and the
	 * http://xml.org/sax/features/external-parameter-entities properties.
	 * </p>
	 * 
	 * @param name The name of the skipped entity. If it is a parameter entity, the name will begin with '%'.
	 */
	public void skippedEntity(String name) throws org.xml.sax.SAXException {
	}

	/**
	 * @return Returns the doc.
	 */
	public Document getDoc() {
		return doc;
	}

	/**
	 * Copied from the Xalan-J_2_7_0 sources.
	 * Returns true if the string is whitespace.
	 *
	 * @param ch Character array to check as XML whitespace.
	 * @param start Start index of characters in the array
	 * @param length Number of characters in the array
	 *  
	 * @return True if the characters in the array are XML whitespace; otherwise, false.
	 */
	private static boolean isWhiteSpace(char[] ch, int start, int length) {
		int end = start + length;
		for (int s = start; s < end; s++) {
			if (!isWhiteSpace(ch[s])) return false;
		}

		return true;
	}

	/**
	 * Copied from the Xalan-J_2_7_0 sources.
	 * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
	 * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
	 * the definition of <CODE>S</CODE></A> for details.
	 * 
	 * @param ch Character to check as XML whitespace.
	 * 
	 * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
	 */
	private static boolean isWhiteSpace(char ch) {
		return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
	}
}