/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-04 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.memtree;
import org.apache.log4j.Logger;
import org.exist.EXistException;
import org.exist.Namespaces;
import org.exist.collections.CollectionConfiguration;
import org.exist.dom.*;
import org.exist.dom.CommentImpl;
import org.exist.dom.ElementImpl;
import org.exist.dom.ProcessingInstructionImpl;
import org.exist.dom.TextImpl;
import org.exist.numbering.NodeId;
import org.exist.storage.DBBroker;
import org.exist.storage.IndexSpec;
import org.exist.storage.NodePath;
import org.exist.storage.txn.Txn;
import org.exist.util.pool.NodePool;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
/**
* Helper class to make a in-memory document fragment persistent.
* The class directly accesses the in-memory document structure and writes
* it into a temporary doc on the database. This is much faster than first serializing the
* document tree to SAX and passing it to
* {@link org.exist.collections.Collection#store(org.exist.storage.txn.Txn, org.exist.storage.DBBroker, org.exist.collections.IndexInfo, org.xml.sax.InputSource, boolean)}.
*
* As the in-memory document fragment may not be a well-formed XML doc (having more
* than one root element), a wrapper element is put around the content nodes.
*
* @author wolf
*/
public class DOMIndexer {
private static final Logger LOG = Logger.getLogger(DOMIndexer.class);
public final static QName ROOT_QNAME = new QName("temp", Namespaces.EXIST_NS, "exist");
private DBBroker broker;
private Txn transaction;
private DocumentImpl doc;
private org.exist.dom.DocumentImpl targetDoc;
private IndexSpec indexSpec = null;
private Stack stack = new Stack();
private TextImpl text = new TextImpl();
private StoredNode prevNode = null;
private CommentImpl comment = new CommentImpl();
private ProcessingInstructionImpl pi = new ProcessingInstructionImpl();
public DOMIndexer(DBBroker broker, Txn transaction, DocumentImpl doc, org.exist.dom.DocumentImpl targetDoc) {
this.broker = broker;
this.transaction = transaction;
this.doc = doc;
this.targetDoc = targetDoc;
CollectionConfiguration config = targetDoc.getCollection().getConfiguration(broker);
if (config != null)
this.indexSpec = config.getIndexConfiguration();
}
/**
* Scan the DOM tree once to determine its structure.
*
* @throws EXistException
*/
public void scan() throws EXistException {
//Creates a dummy DOCTYPE
final DocumentTypeImpl dt = new DocumentTypeImpl("temp", null, "");
targetDoc.setDocumentType(dt);
}
/**
* Store the nodes.
*
*/
public void store() {
// create a wrapper element as root node
ElementImpl elem = new ElementImpl(ROOT_QNAME);
elem.setNodeId(broker.getBrokerPool().getNodeFactory().createInstance());
elem.setOwnerDocument(targetDoc);
elem.setChildCount(doc.getChildCount());
elem.addNamespaceMapping("exist", Namespaces.EXIST_NS);
NodePath path = new NodePath();
path.addComponent(ROOT_QNAME);
stack.push(elem);
broker.storeNode(transaction, elem, path, indexSpec);
targetDoc.appendChild(elem);
elem.setChildCount(0);
// store the document nodes
int top = doc.size > 1 ? 1 : -1;
while(top > 0) {
store(top, path);
top = doc.getNextSiblingFor(top);
}
// close the wrapper element
stack.pop();
broker.endElement(elem, path, null);
path.removeLastComponent();
}
private void store(int top, NodePath currentPath) {
int nodeNr = top;
while (nodeNr > 0) {
startNode(nodeNr, currentPath);
int nextNode = doc.getFirstChildFor(nodeNr);
while (nextNode == -1) {
endNode(nodeNr, currentPath);
if (top == nodeNr)
break;
nextNode = doc.getNextSiblingFor(nodeNr);
if (nextNode == -1) {
nodeNr = doc.getParentNodeFor(nodeNr);
if (nodeNr == -1 || top == nodeNr) {
endNode(nodeNr, currentPath);
nextNode = -1;
break;
}
}
}
nodeNr = nextNode;
}
}
/**
* @param nodeNr
*/
private void startNode(int nodeNr, NodePath currentPath) {
ElementImpl last;
switch (doc.nodeKind[nodeNr]) {
case Node.ELEMENT_NODE :
ElementImpl elem = (ElementImpl) NodePool.getInstance().borrowNode(Node.ELEMENT_NODE);
if(stack.empty()) {
elem.setNodeId(broker.getBrokerPool().getNodeFactory().createInstance());
initElement(nodeNr, elem);
stack.push(elem);
broker.storeNode(transaction, elem, currentPath, indexSpec);
targetDoc.appendChild(elem);
elem.setChildCount(0);
} else {
last = (ElementImpl) stack.peek();
initElement(nodeNr, elem);
last.appendChildInternal(prevNode, elem);
stack.push(elem);
broker.storeNode(transaction, elem, currentPath, indexSpec);
elem.setChildCount(0);
}
setPrevious(null);
currentPath.addComponent(elem.getQName());
storeAttributes(nodeNr, elem, currentPath);
break;
case Node.TEXT_NODE :
if (prevNode != null &&
(prevNode.getNodeType() == Node.TEXT_NODE ||
prevNode.getNodeType() == Node.CDATA_SECTION_NODE)) {
break;
}
last = (ElementImpl) stack.peek();
text.setData(new String(doc.characters, doc.alpha[nodeNr], doc.alphaLen[nodeNr]));
text.setOwnerDocument(targetDoc);
last.appendChildInternal(prevNode, text);
setPrevious(text);
broker.storeNode(transaction, text, null, indexSpec);
break;
case Node.CDATA_SECTION_NODE :
last = (ElementImpl) stack.peek();
org.exist.dom.CDATASectionImpl cdata =
(org.exist.dom.CDATASectionImpl) NodePool.getInstance().borrowNode(Node.CDATA_SECTION_NODE);
cdata.setData(doc.characters, doc.alpha[nodeNr], doc.alphaLen[nodeNr]);
cdata.setOwnerDocument(targetDoc);
last.appendChildInternal(prevNode, cdata);
setPrevious(cdata);
broker.storeNode(transaction, cdata, null, indexSpec);
break;
case Node.COMMENT_NODE :
comment.setData(doc.characters, doc.alpha[nodeNr], doc.alphaLen[nodeNr]);
comment.setOwnerDocument(targetDoc);
if (stack.empty()) {
comment.setNodeId(NodeId.DOCUMENT_NODE);
targetDoc.appendChild(comment);
broker.storeNode(transaction, comment, null, indexSpec);
} else {
last = (ElementImpl) stack.peek();
last.appendChildInternal(prevNode, comment);
broker.storeNode(transaction, comment, null, indexSpec);
setPrevious(comment);
}
break;
case Node.PROCESSING_INSTRUCTION_NODE :
QName qn = doc.nodeName[nodeNr];
pi.setTarget(qn.getLocalName());
pi.setData(new String(doc.characters, doc.alpha[nodeNr], doc.alphaLen[nodeNr]));
pi.setOwnerDocument(targetDoc);
if (stack.empty()) {
pi.setNodeId(NodeId.DOCUMENT_NODE);
targetDoc.appendChild(pi);
} else {
last = (ElementImpl) stack.peek();
last.appendChildInternal(prevNode, pi);
setPrevious(pi);
}
broker.storeNode(transaction, pi, null, indexSpec);
break;
default:
LOG.debug("Skipped indexing of in-memory node of type "
+ doc.nodeKind[nodeNr]);
}
}
/**
* @param nodeNr
* @param elem
*/
private void initElement(int nodeNr, ElementImpl elem) {
short attribs = (short) doc.getAttributesCountFor(nodeNr);
elem.setOwnerDocument(targetDoc);
elem.setAttributes(attribs);
elem.setChildCount(doc.getChildCountFor(nodeNr) + attribs);
elem.setNodeName(doc.nodeName[nodeNr]);
Map ns = getNamespaces(nodeNr);
if (ns != null)
elem.setNamespaceMappings(ns);
}
private Map getNamespaces(int nodeNr) {
int ns = doc.alphaLen[nodeNr];
if (ns < 0)
return null;
Map map = new HashMap();
while (ns < doc.nextNamespace && doc.namespaceParent[ns] == nodeNr) {
QName qn = doc.namespaceCode[ns];
if ("xmlns".equals(qn.getLocalName()))
map.put("", qn.getNamespaceURI());
else
map.put(qn.getLocalName(), qn.getNamespaceURI());
++ns;
}
return map;
}
/**
* @param nodeNr
* @param elem
* @throws DOMException
*/
private void storeAttributes(int nodeNr, ElementImpl elem, NodePath path) throws DOMException {
int attr = doc.alpha[nodeNr];
if(-1 < attr) {
while (attr < doc.nextAttr && doc.attrParent[attr] == nodeNr) {
QName qn = doc.attrName[attr];
AttrImpl attrib = (AttrImpl) NodePool.getInstance().borrowNode(Node.ATTRIBUTE_NODE);
attrib.setNodeName(qn);
attrib.setValue(doc.attrValue[attr]);
attrib.setOwnerDocument(targetDoc);
elem.appendChildInternal(prevNode, attrib);
setPrevious(attrib);
broker.storeNode(transaction, attrib, path, indexSpec);
++attr;
}
}
}
/**
* @param nodeNr
*/
private void endNode(int nodeNr, NodePath currentPath) {
if (doc.nodeKind[nodeNr] == Node.ELEMENT_NODE) {
ElementImpl last = (ElementImpl) stack.pop();
broker.endElement(last, currentPath, null);
currentPath.removeLastComponent();
setPrevious(last);
}
}
private void setPrevious(StoredNode previous) {
if (prevNode != null) {
if (prevNode.getNodeType() == Node.TEXT_NODE
|| prevNode.getNodeType() == Node.COMMENT_NODE
|| prevNode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE
)
if (previous == null || prevNode.getNodeType() != previous.getNodeType())
prevNode.clear();
}
prevNode = previous;
}
}