package net.sf.saxon.tinytree;
/**
* Hacked version of the TinyBuilder class. Changes include:
* (1) Made some members public so they can be read to write a lazy tree file.
* (2) Added straight-to-disk text storage.
* (3) Changed to filter out processing instructions and comments.
*/
import java.io.IOException;
import org.cdlib.xtf.util.PackedByteBuf;
import org.cdlib.xtf.util.StructuredStore;
import org.cdlib.xtf.util.SubStoreWriter;
import net.sf.saxon.event.Builder;
import net.sf.saxon.event.LocationProvider;
import net.sf.saxon.event.ReceiverOptions;
import net.sf.saxon.event.SourceLocationProvider;
import net.sf.saxon.om.StandardNames;
import net.sf.saxon.tinytree.TinyDocumentImpl;
import net.sf.saxon.tinytree.TinyTree;
import net.sf.saxon.trans.DynamicError;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.Type;
public class HackedTinyBuilder extends Builder
{
// MCH: Added stuff for straight-to-disk text storage.
private PackedByteBuf textBuf = new PackedByteBuf(1000);
private StructuredStore treeStore;
private SubStoreWriter textStore;
public void setTreeStore(StructuredStore treeStore) {
this.treeStore = treeStore;
}
public StructuredStore getTreeStore() {
return treeStore;
}
public void setTextStore(SubStoreWriter textStore) {
this.textStore = textStore;
}
public SubStoreWriter getTextStore() {
return textStore;
}
// MCH: Get rid of parent pointers, which mess up our node counts.
public static final int PARENT_POINTER_INTERVAL = Integer.MAX_VALUE;
// a lower value allocates more parent pointers which takes more space but reduces
// the length of parent searches
private TinyTree tree;
private int currentDepth = 0;
private int nodeNr = 0; // this is the local sequence within this document
private boolean ended = false;
private int[] sizeParameters; // estimate of number of nodes, attributes, namespaces, characters
public HackedTinyBuilder() {
}
public void setSizeParameters(int[] params) {
sizeParameters = params;
}
public int[] getSizeParameters()
{
int[] params = {
tree.getNumberOfNodes(), tree.getNumberOfAttributes(),
tree.getNumberOfNamespaces(),
tree.getCharacterBuffer().length()
};
return params;
}
private int[] prevAtDepth = new int[100];
// this array is scaffolding used while constructing the tree, it is
// not present in the final tree. For each level of the tree, it records the
// node number of the most recent node at that level.
private int[] siblingsAtDepth = new int[100];
// more scaffolding. For each level of the tree, this array records the
// number of siblings processed at that level. When this exceeds a threshold value,
// a dummy node is inserted into the arrays to contain a parent pointer: this it to
// prevent excessively long searches for a parent node, which is normally found by
// scanning the siblings.
private boolean isIDElement = false;
public TinyTree getTree() {
return tree;
}
/**
* Open the event stream
*/
public void open()
throws XPathException
{
if (started)
{
// this happens when using an IdentityTransformer
return;
}
if (tree == null)
{
if (sizeParameters == null) {
tree = new TinyTree();
}
else {
tree = new TinyTree(sizeParameters[0],
sizeParameters[1],
sizeParameters[2],
sizeParameters[3]);
}
tree.setConfiguration(config);
currentDepth = 0;
if (lineNumbering) {
tree.setLineNumbering();
}
}
super.open();
}
/**
* Write a document node to the tree
*/
public void startDocument(int properties)
throws XPathException
{
// if (currentDepth == 0 && tree.numberOfNodes != 0) {
// System.err.println("**** FOREST DOCUMENT ****");
// }
if ((started && !ended) || currentDepth > 0)
{
// this happens when using an IdentityTransformer, or when copying a document node to form
// the content of an element
return;
}
started = true;
ended = false;
currentRoot = new TinyDocumentImpl(tree);
TinyDocumentImpl doc = (TinyDocumentImpl)currentRoot;
doc.setSystemId(getSystemId());
doc.setBaseURI(getBaseURI());
doc.setConfiguration(config);
currentDepth = 0;
tree.addDocumentNode((TinyDocumentImpl)currentRoot);
prevAtDepth[0] = 0;
prevAtDepth[1] = -1;
siblingsAtDepth[0] = 0;
siblingsAtDepth[1] = 0;
tree.next[0] = -1;
currentDepth++;
super.startDocument(0);
}
/**
* Callback interface for SAX: not for application use
*/
public void endDocument()
throws XPathException
{
// System.err.println("TinyBuilder: " + this + " End document");
if (currentDepth > 1)
return;
// happens when copying a document node as the child of an element
if (ended)
return; // happens when using an IdentityTransformer
ended = true;
prevAtDepth[currentDepth] = -1;
currentDepth--;
}
public void close()
throws XPathException
{
//System.err.println("Tree.close " + tree + " size=" + tree.numberOfNodes);
tree.addNode(Type.STOPPER, 0, 0, 0, -1);
tree.condense();
super.close();
}
/**
* Notify the start tag of an element
*/
public void startElement(int nameCode, int typeCode, int locationId,
int properties)
throws XPathException
{
// if (currentDepth == 0 && tree.numberOfNodes != 0) {
// System.err.println("**** FOREST ELEMENT **** trees=" + tree.rootIndexUsed );
// }
// if the number of siblings exceeds a certain threshold, add a parent pointer, in the form
// of a pseudo-node
if (siblingsAtDepth[currentDepth] > PARENT_POINTER_INTERVAL)
{
nodeNr = tree.addNode(Type.PARENT_POINTER,
currentDepth,
prevAtDepth[currentDepth - 1],
0,
0);
int prev = prevAtDepth[currentDepth];
if (prev > 0) {
tree.next[prev] = nodeNr;
}
tree.next[nodeNr] = prevAtDepth[currentDepth - 1];
prevAtDepth[currentDepth] = nodeNr;
siblingsAtDepth[currentDepth] = 0;
}
// now add the element node itself
nodeNr = tree.addNode(Type.ELEMENT, currentDepth, -1, -1, nameCode);
isIDElement = ((properties & ReceiverOptions.IS_ID) != 0);
if (typeCode != StandardNames.XS_UNTYPED && typeCode != -1)
{
tree.setElementAnnotation(nodeNr, typeCode);
if (!isIDElement && config.getTypeHierarchy().isIdCode(typeCode)) {
isIDElement = true;
}
}
if (currentDepth == 0)
{
prevAtDepth[0] = nodeNr;
prevAtDepth[1] = -1;
//tree.next[0] = -1;
currentRoot = tree.getNode(nodeNr);
}
else {
int prev = prevAtDepth[currentDepth];
if (prev > 0) {
tree.next[prev] = nodeNr;
}
tree.next[nodeNr] = prevAtDepth[currentDepth - 1]; // *O* owner pointer in last sibling
prevAtDepth[currentDepth] = nodeNr;
siblingsAtDepth[currentDepth]++;
}
currentDepth++;
if (currentDepth == prevAtDepth.length) {
int[] p2 = new int[currentDepth * 2];
System.arraycopy(prevAtDepth, 0, p2, 0, currentDepth);
prevAtDepth = p2;
p2 = new int[currentDepth * 2];
System.arraycopy(siblingsAtDepth, 0, p2, 0, currentDepth);
siblingsAtDepth = p2;
}
prevAtDepth[currentDepth] = -1;
siblingsAtDepth[currentDepth] = 0;
LocationProvider locator = pipe.getLocationProvider();
if (locator instanceof SourceLocationProvider)
{
tree.setSystemId(nodeNr, locator.getSystemId(locationId));
if (lineNumbering) {
tree.setLineNumber(nodeNr, locator.getLineNumber(locationId));
}
}
else if (currentDepth == 1) {
tree.setSystemId(nodeNr, systemId);
}
}
public void namespace(int namespaceCode, int properties)
throws XPathException
{
tree.addNamespace(nodeNr, namespaceCode);
}
public void attribute(int nameCode, int typeCode, CharSequence value,
int locationId, int properties)
throws XPathException
{
// System.err.println("attribute " + nameCode + "=" + value);
tree.addAttribute(currentRoot, nodeNr, nameCode, typeCode, value, properties);
}
public void startContent() {
nodeNr++;
}
/**
* Callback interface for SAX: not for application use
*/
public void endElement()
throws XPathException
{
prevAtDepth[currentDepth] = -1;
siblingsAtDepth[currentDepth] = 0;
currentDepth--;
if (isIDElement)
{
// we're relying on the fact that an ID element has no element children!
tree.indexIDElement(currentRoot,
prevAtDepth[currentDepth],
config.getNameChecker());
isIDElement = false;
}
}
/**
* Callback interface for SAX: not for application use
*/
public void characters(CharSequence chars, int locationId, int properties)
throws XPathException
{
// MCH: Added straight-to-disk storage of text
final int len = chars.length();
if (len > 0)
{
long startPos;
textBuf.reset();
textBuf.writeCharSequence(chars);
try {
startPos = textStore.length();
textBuf.output(textStore);
}
catch (IOException e) {
throw new DynamicError(e);
}
nodeNr = tree.addNode(Type.TEXT,
currentDepth,
(int)startPos,
textBuf.length(),
-1);
int prev = prevAtDepth[currentDepth];
if (prev > 0) {
tree.next[prev] = nodeNr;
}
tree.next[nodeNr] = prevAtDepth[currentDepth - 1]; // *O* owner pointer in last sibling
prevAtDepth[currentDepth] = nodeNr;
siblingsAtDepth[currentDepth]++;
}
}
/**
* Callback interface for SAX: not for application use<BR>
*/
public void processingInstruction(String piname, CharSequence remainder,
int locationId, int properties)
throws XPathException
{
// MCH: Filter out processing instructions
}
/**
* Callback interface for SAX: not for application use
*/
public void comment(CharSequence chars, int locationId, int properties)
throws XPathException
{
// MCH: Filter out comments
}
/**
* Set an unparsed entity in the document
*/
public void setUnparsedEntity(String name, String uri, String publicId) {
((TinyDocumentImpl)currentRoot).setUnparsedEntity(name, uri, publicId);
}
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Portions created by Martin Haye, marked by "MCH:", are Copyright (C) 2005, Regents of the University of California. All Rights Reserved.
//
// Contributor(s): Martin Haye.
//