package org.docx4j.model.datastorage;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import org.docx4j.model.datastorage.xpathtracker.Histgram;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class DomToXPathMap {
private static Logger log = LoggerFactory.getLogger(DomToXPathMap.class);
private final Stack<Histgram> histgrams = new Stack<Histgram>();
private Document document;
private Map<String, String> pathMap = null;
public DomToXPathMap(Document document) {
this.document = document;
}
public Map<String, String> map() {
histgrams.clear();
histgrams.push(new Histgram());
pathMap = new HashMap<String, String>();
walkTree(document);
return pathMap;
}
private String getLocalName(Node sourceNode) {
if (sourceNode.getLocalName()==null) {
// eg element was created using createElement()
return sourceNode.getNodeName();
} else {
return sourceNode.getLocalName();
}
}
public void walkTree( Node sourceNode ) {
// log.debug("node type" + sourceNode.getNodeType());
switch (sourceNode.getNodeType() ) {
case Node.DOCUMENT_NODE: // type 9
case Node.DOCUMENT_FRAGMENT_NODE: // type 11
// log.debug("DOCUMENT:" + w3CDomNodeToString(sourceNode) );
// if (sourceNode.getChildNodes().getLength()==0) {
// log.debug("..no children!");
// }
// recurse on each child
NodeList nodes = sourceNode.getChildNodes();
if (nodes != null) {
for (int i=0; i<nodes.getLength(); i++) {
log.debug("child " + i + "of DOCUMENT_NODE");
//treeCopy((DTMNodeProxy)nodes.item(i), destParent);
walkTree((Node)nodes.item(i));
}
}
break;
case Node.ELEMENT_NODE:
try {
histgrams.peek().update(
sourceNode.getNamespaceURI(),
getLocalName(sourceNode),
/* qname */ sourceNode.getNodeName() );
histgrams.push(new Histgram());
} catch (java.lang.IllegalArgumentException iae) {
log.error(sourceNode.getClass().getName());
log.error("sourceNode.getNodeName(): " + sourceNode.getNodeName());
log.error("sourceNode.getNamespaceURI(): " + sourceNode.getNamespaceURI());
log.error("sourceNode.getLocalName(): " + sourceNode.getLocalName());
log.error("sourceNode.getPrefix(): " + sourceNode.getPrefix());
log.error("java.vendor="+System.getProperty("java.vendor"));
log.error("java.version="+System.getProperty("java.version"));
throw iae;
}
// recurse on each child
NodeList children = sourceNode.getChildNodes();
if (children == null
|| children.getLength()==0) {
// Record the fact this is an empty leaf node
String xpath = getXPath();
pathMap.put(xpath, "");
} else {
for (int i=0; i<children.getLength(); i++) {
walkTree( (Node)children.item(i));
}
}
histgrams.pop();
break;
case Node.TEXT_NODE:
// better than doing getTextContent() at the element level??
String xpath = getXPath();
String existing = pathMap.get(xpath);
if (existing==null) {
// if (sourceNode.getNodeValue().endsWith("\n")
// || sourceNode.getNodeValue().endsWith("\r")) {
pathMap.put(xpath, sourceNode.getNodeValue()); // some whitespace is significant
} else {
// Happens a lot
//log.debug("concat..");
pathMap.put(xpath, existing + sourceNode.getNodeValue());
}
// log.debug("Put " + xpath + "=" + sourceNode.getNodeValue());
break;
// case Node.CDATA_SECTION_NODE:
// writer.write("<![CDATA[" +
// node.getNodeValue() + "]]>");
// break;
//
// case Node.COMMENT_NODE:
// writer.write(indentLevel + "<!-- " +
// node.getNodeValue() + " -->");
// writer.write(lineSeparator);
// break;
//
// case Node.PROCESSING_INSTRUCTION_NODE:
// writer.write("<?" + node.getNodeName() +
// " " + node.getNodeValue() +
// "?>");
// writer.write(lineSeparator);
// break;
//
// case Node.ENTITY_REFERENCE_NODE:
// writer.write("&" + node.getNodeName() + ";");
// break;
//
// case Node.DOCUMENT_TYPE_NODE:
// DocumentType docType = (DocumentType)node;
// writer.write("<!DOCTYPE " + docType.getName());
// if (docType.getPublicId() != null) {
// System.out.print(" PUBLIC \"" +
// docType.getPublicId() + "\" ");
// } else {
// writer.write(" SYSTEM ");
// }
// writer.write("\"" + docType.getSystemId() + "\">");
// writer.write(lineSeparator);
// break;
}
}
/**
* Gets the XPath to the current element.
*/
public String getXPath() {
StringBuilder buf = new StringBuilder();
for (Histgram h : histgrams) {
h.appendPath(buf);
}
return buf.toString();
}
}