package org.compass.core.xml.javax.converter.support;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
/**
* Builds a DOM {@link org.w3c.dom.Document} using a
* {@link javax.xml.stream.XMLStreamReader}.
*/
public class Stax2DomBuilder {
// // // Configuration settings:
/**
* Whether ignorable white space should be ignored, ie not added
* in the resulting JDOM tree. If true, it will be ignored; if false,
* it will be added in the tree. Default value if false.
*/
protected boolean mCfgIgnoreWs = false;
protected boolean mNsAware = true;
// // Trivial caching...
protected String mLastPrefix = null;
protected String mLastLocalName = null;
protected String mLastQName = null;
/**
* Default constructor.
*/
public Stax2DomBuilder() {
}
/**
* Method used to change whether the build methods will add ignorable
* (element) white space in the DOM tree or not.
* <p>
* Whether all-whitespace text segment is ignorable white space or
* not is based on DTD read in, as per XML specifications (white space
* is only significant in mixed content or pure text elements).
*/
public void setIgnoreWhitespace(boolean state) {
mCfgIgnoreWs = state;
}
/**
* This method will create a {@link org.w3c.dom.Document} instance using
* the default JAXP mechanism and
* populate using the given StAX stream reader.
*
* @param r Stream reader from which input is read.
* @return <code>Document</code> - DOM document object.
* @throws XMLStreamException If the reader threw such exception (to
* indicate a parsing or I/O problem)
*/
public Document build(XMLStreamReader r)
throws ParserConfigurationException, XMLStreamException {
return build(r, DocumentBuilderFactory.newInstance().newDocumentBuilder());
}
public Document build(XMLStreamReader r, DocumentBuilder docbuilder)
throws XMLStreamException {
Document doc = docbuilder.newDocument();
build(r, doc);
return doc;
}
/**
* This method will populate given {@link org.w3c.dom.Document} using
* the given StAX stream reader instance.
*
* @param r Stream reader from which input is read.
* @throws XMLStreamException If the reader threw such exception (to
* indicate a parsing or I/O problem)
*/
public void build(XMLStreamReader r, Document doc)
throws XMLStreamException {
buildTree(r, doc);
}
/**
* This method takes a <code>XMLStreamReader</code> and builds up
* a JDOM tree. Recursion has been eliminated by using nodes'
* parent/child relationship; this improves performance somewhat
* (classic recursion-by-iteration-and-explicit stack transformation)
*/
protected void buildTree(XMLStreamReader r, Document doc)
throws XMLStreamException {
checkReaderSettings(r);
Node current = doc; // At top level
main_loop:
while (true) {
int evtType = r.next();
Node child;
switch (evtType) {
case XMLStreamConstants.CDATA:
child = doc.createCDATASection(r.getText());
break;
case XMLStreamConstants.SPACE:
if (mCfgIgnoreWs) {
continue main_loop;
}
/* Oh great. DOM is brain-dead in that ignorable white space
* can not be added, even though it is legal, and often
* reported by StAX/SAX impls...
*/
if (current == doc) { // better just ignore, thus...
continue;
}
// fall through
case XMLStreamConstants.CHARACTERS:
child = doc.createTextNode(r.getText());
break;
case XMLStreamConstants.COMMENT:
child = doc.createComment(r.getText());
break;
case XMLStreamConstants.END_DOCUMENT:
break main_loop;
case XMLStreamConstants.END_ELEMENT:
current = current.getParentNode();
if (current == null) {
current = doc;
}
continue main_loop;
case XMLStreamConstants.ENTITY_DECLARATION:
case XMLStreamConstants.NOTATION_DECLARATION:
/* Shouldn't really get these, but maybe some stream readers
* do provide the info. If so, better ignore it -- DTD event
* should have most/all we need.
*/
continue main_loop;
case XMLStreamConstants.ENTITY_REFERENCE:
child = doc.createEntityReference(r.getLocalName());
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
child = doc.createProcessingInstruction(r.getPITarget(), r.getPIData());
break;
case XMLStreamConstants.START_ELEMENT:
// Ok, need to add a new element...
{
String ln = r.getLocalName();
Element newElem;
if (mNsAware) {
String elemPrefix = r.getPrefix();
// Doh, DOM requires a silly qualified name...
if (elemPrefix != null && elemPrefix.length() > 0) {
newElem = doc.createElementNS(r.getNamespaceURI(),
getQualified(elemPrefix, ln));
} else {
newElem = doc.createElementNS(r.getNamespaceURI(), ln);
}
} else { // if non-ns-aware, things are simpler:
newElem = doc.createElement(ln);
}
/* No need to check namespace bindings, unlikes with some
* other frameworks (JDOM)
*/
// And then the attributes:
for (int i = 0, len = r.getAttributeCount(); i < len; ++i) {
ln = r.getAttributeLocalName(i);
if (mNsAware) {
String prefix = r.getAttributePrefix(i);
if (prefix != null && prefix.length() > 0) {
ln = getQualified(prefix, ln);
}
Attr attr = doc.createAttributeNS(r.getAttributeNamespace(i), ln);
attr.setValue(r.getAttributeValue(i));
newElem.setAttributeNodeNS(attr);
} else {
Attr attr = doc.createAttribute(ln);
attr.setValue(r.getAttributeValue(i));
newElem.setAttributeNode(attr);
}
}
// And then 'push' new element...
current.appendChild(newElem);
current = newElem;
continue main_loop;
}
case XMLStreamConstants.START_DOCUMENT:
/* This should only be received at the beginning of document...
* so, should we indicate the problem or not?
*/
/* For now, let it pass: maybe some (broken) readers pass
* that info as first event in beginning of doc?
*/
continue main_loop;
case XMLStreamConstants.DTD:
/* !!! Note: StAX does not expose enough information about
* doctype declaration (specifically, public and system id!);
* (altough StAX2 would...)
*
* Worse, DOM1/2 do not specify a way to create the DocType
* node, even if StAX provided it. This is pretty silly,
* all in all.
*/
continue main_loop;
// Should never get these, from a stream reader:
/* (commented out entries are just FYI; default catches
* them all)
*/
//case XMLStreamConstants.ATTRIBUTE:
//case XMLStreamConstants.NAMESPACE:
default:
throw new XMLStreamException("Unrecognized iterator event type: " + r.getEventType() + "; should not receive such types (broken stream reader?)");
}
if (child != null) {
current.appendChild(child);
}
}
}
// // // Overridable helper methods:
protected String getQualified(String prefix, String localName) {
/* This mostly/only helps with empty/text-only elements...
* might make sense to do 'real' caching...
*/
if (localName == mLastLocalName &&
prefix == mLastPrefix) {
return mLastQName;
}
String qn = prefix + ":" + localName;
mLastQName = qn;
return qn;
}
protected void checkReaderSettings(XMLStreamReader r)
throws XMLStreamException {
Object o = r.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE);
/* StAX defaults to namespace aware, so let's use similar
* logics (although all compliant implementations really should
* return a valid value)
*/
if ((o instanceof Boolean) && !((Boolean) o).booleanValue()) {
mNsAware = false;
} else {
mNsAware = true;
}
}
// // // Private methods:
// // // Testing
/**
* Trivial test driver for testing functionality.
*/
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.err.println("Usage: java ... [file]");
System.exit(1);
}
String filename = args[0];
java.io.Reader r = new java.io.FileReader(filename);
javax.xml.stream.XMLInputFactory f = javax.xml.stream.XMLInputFactory.newInstance();
XMLStreamReader sr = f.createXMLStreamReader(r);
Stax2DomBuilder builder = new Stax2DomBuilder();
Document domDoc = builder.build(sr);
System.out.println("Done [with " + sr.getClass() + "]:");
System.out.println("----- Dom -----");
java.io.PrintWriter pw = new java.io.PrintWriter(System.out);
pw.println(domDoc.toString());
pw.flush();
System.out.println("----- /Dom -----");
}
}