package org.cdlib.xtf.lazyTree;
import java.io.IOException;
import java.lang.ref.SoftReference;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Vector;
import net.sf.saxon.Configuration;
import net.sf.saxon.event.Receiver;
import net.sf.saxon.om.Axis;
import net.sf.saxon.om.AxisIterator;
import net.sf.saxon.om.DocumentInfo;
import net.sf.saxon.om.FastStringBuffer;
import net.sf.saxon.om.Item;
import net.sf.saxon.om.NamePool;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.NodeListIterator;
import net.sf.saxon.om.StrippedNode;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.SystemIdMap;
import net.sf.saxon.type.Type;
import org.cdlib.xtf.util.DiskHashReader;
import org.cdlib.xtf.util.DiskHashWriter;
import org.cdlib.xtf.util.PackedByteBuf;
import org.cdlib.xtf.util.StructuredStore;
import org.cdlib.xtf.util.SubStoreReader;
import org.cdlib.xtf.util.SubStoreWriter;
import org.cdlib.xtf.util.ThreadWatcher;
import org.cdlib.xtf.util.Trace;
/**
* <p>LazyDocument accesses the binary persistent disk file created by
* {@link LazyTreeBuilder}, loading nodes on demand rather than holding all
* of them in RAM.</p>
*
* <p>This class should never be instatiated directly, but rather loaded by
* LazyTreeBuilder.</p>
*
* <p>Once loaded, a soft reference to the node is kept in RAM; if memory runs
* low, these soft references will be thrown away. This behavior can be
* defeated by calling {@link #setAllPermanent(boolean)}.</p>
*
* @author Martin Haye
*/
public class LazyDocument extends ParentNodeImpl implements DocumentInfo,
PersistentTree
{
/** Saxon configuration info */
protected Configuration config;
/** Name pool used to look up namecodes */
protected NamePool namePool;
/** Unique number assigned to each document */
protected int documentNumber;
/** Determines whether this document is using namespaces. Not sure why
* this works when false, but it does.
*/
protected boolean usesNamespaces = false;
/**
* This structure supports trees whose root is an element node rather than
* a document node. The document node still exists, for implementation
* reasons, but it is not regarded as part of the tree. The variable
* rootNode identifies the actual root of the tree, which is the document
* node by default.
*/
protected int rootNodeNum = 0;
/** Flag denoting whether to print out when key indexes are created */
protected boolean debug = false;
/** The structured file that contains all our subfiles */
protected StructuredStore mainStore;
/** Contains all the text, processing instructions, and comments */
protected SubStoreReader textFile;
/** Contains all the nodes */
protected SubStoreReader nodeFile;
/** How many nodes, excluding attributes and namespaces. */
protected int numberOfNodes;
/** Size of the header on the node file */
protected static final int NODE_FILE_HEADER_SIZE = 12;
/** The size of the largest node entry on disk */
protected int maxNodeSize;
/** Byte buffer for reading nodes */
protected byte[] nodeBytes;
/** Buffer for unpacking nodes */
protected PackedByteBuf nodeBuf;
/** Contains all the attributes */
protected SubStoreReader attrFile;
/** The max size of any attribute block */
protected int maxAttrSize;
/** Byte buffer for reading nodes */
protected byte[] attrBytes;
/** Buffer for unpacking nodes */
protected PackedByteBuf attrBuf;
/** Number of namespaces currently declared */
public int numberOfNamespaces = 0;
/** namespaceParent is the index of the element node owning the namespace
* declaration
*/
public int[] namespaceParent;
/** namespaceCode is the namespace code used by the name pool: the top
* half is the prefix code, the bottom half the URI code
*/
public int[] namespaceCode;
/** Maps system IDs to nodes in the tree */
public SystemIdMap systemIdMap = null;
/** Maps name numbers in the file to namecodes in the current NamePool */
int[] nameNumToCode;
/** Caches nodes in memory so they only have to be loaded once. */
HashMap nodeCache = new HashMap();
/** True if nodes in the cache should be permanent, false for weak refs */
boolean allPermanent = false;
/** Notified of profile-related events */
private LazyProfilingListener profileListener;
/** Counter to govern periodic checking for thread time limit */
private int killCheckCounter = 0;
/**
* Construct a new (empty) document. Should call
* {@link #init(NamePool, StructuredStore)} afterward.
*/
public LazyDocument(Configuration config) {
this.config = config;
documentNumber = config.getDocumentNumberAllocator()
.allocateDocumentNumber();
// Check if we're being profiled.
if (config.getTraceListener() instanceof LazyProfilingListener)
profileListener = (LazyProfilingListener)config.getTraceListener();
}
/**
* Open a lazy tree and read in the root node.
*
* @param pool The name pool to map namecodes with
* @param store The file to open
*/
public void init(NamePool pool, StructuredStore store)
throws IOException
{
this.mainStore = store;
nodeNum = 0;
parentNum = -1;
document = this;
// Record the name pool.
namePool = pool;
// First, read in the names.
synchronized (mainStore)
{
readNames(store.openSubStore("names"));
// Now open the other files and read their headers.
nodeFile = store.openSubStore("nodes");
rootNodeNum = nodeFile.readInt();
numberOfNodes = nodeFile.readInt();
maxNodeSize = nodeFile.readInt();
attrFile = store.openSubStore("attributes");
maxAttrSize = attrFile.readInt();
textFile = store.openSubStore("text");
// Allocate the buffer for reading nodes.
nodeBytes = new byte[maxNodeSize];
nodeBuf = new PackedByteBuf(0);
// Likewise for reading attributes.
attrBytes = new byte[maxAttrSize];
attrBuf = new PackedByteBuf(0);
// Read in the root node (shenanigans to force loading)
nodeNum = rootNodeNum;
rootNodeNum = -1;
getNode(nodeNum);
rootNodeNum = nodeNum;
}
} // constructor
/**
* If 'flag' is true, all loaded nodes will be cached until the tree goes
* away, instead of being held by weak references.
*/
public void setAllPermanent(boolean flag) {
allPermanent = flag;
if (allPermanent)
nodeCache.put(Integer.valueOf(0), this);
}
/**
* Establish whether to print out debugging statements when key indexes
* are created.
*/
public void setDebug(boolean flag) {
this.debug = flag;
}
/** Find out whether debug lines are printed during key index creation */
public boolean getDebug() {
return debug;
}
/** Print out the profile (if one was collected) */
public void printProfile()
throws IOException
{
if (profileListener != null)
profileListener.printProfile();
}
/**
* Closes all disk files opened by the document. While this will
* theoretically be done when the LazyDocument is garbage collected, it's
* a good idea to conserve file handles by closing them promptly as soon
* as the tree's usefulness is done.
*/
public void close()
{
try
{
textFile.close();
nodeFile.close();
attrFile.close();
mainStore.close();
}
catch (IOException e) {
// Not a big deal if we can't close... ignore the error.
}
} // close()
/**
* Fetches the name list from a sub-file in the persistent disk file.
*
* @param in The subfile to load from
*/
private void readNames(SubStoreReader in)
throws IOException
{
// Read in the packed data.
byte[] data = new byte[(int)in.length()];
in.read(data);
PackedByteBuf buf = new PackedByteBuf(data);
// Read in the namespaces and calculate their codes.
numberOfNamespaces = buf.readInt();
namespaceParent = new int[numberOfNamespaces];
namespaceCode = new int[numberOfNamespaces];
for (int i = 0; i < numberOfNamespaces; i++) {
String prefix = buf.readString();
String uri = buf.readString();
namespaceCode[i] = namePool.allocateNamespaceCode(prefix, uri);
namespaceParent[i] = buf.readInt();
}
// Now process all the namecodes.
int nNamecodes = buf.readInt();
nameNumToCode = new int[nNamecodes];
for (int i = 0; i < nNamecodes; i++) {
String prefix = buf.readString();
String uri = buf.readString();
String localName = buf.readString();
nameNumToCode[i] = namePool.allocate(prefix, uri, localName);
}
} // readNames()
/**
* Writes a disk-based version of an xsl:key index. Use getIndex() later
* to read it back.
*
* @param indexName Uniquely computed name
* @param index HashMap mapping String -> ArrayList[NodeImpl]
*/
public void putIndex(String indexName, Map index)
throws IOException
{
DiskHashWriter writer = new DiskHashWriter();
PackedByteBuf buf = new PackedByteBuf(100);
// Pack up each key and put into the DiskHashWriter
for (Iterator iter = index.keySet().iterator(); iter.hasNext();)
{
String key = (String)iter.next();
ArrayList list = (ArrayList)index.get(key);
buf.reset();
buf.writeInt(list.size());
int currentNum = 0;
for (int i = 0; i < list.size(); i++)
{
Item node = (Item)list.get(i);
int nodeNum;
if (node instanceof NodeImpl)
nodeNum = ((NodeImpl)node).nodeNum;
else if (node instanceof StrippedNode)
nodeNum = ((NodeImpl)((StrippedNode)node).getUnderlyingNode()).nodeNum;
else {
assert false : "Cannot get node number";
nodeNum = 1;
}
buf.writeInt(nodeNum - currentNum);
currentNum = nodeNum;
} // for i
writer.put(key, buf);
} // for iter
// Now write out the full hash. Be careful to avoid writing two files
// at the same time.
//
synchronized (mainStore) {
writer.outputTo(mainStore.createSubStore(indexName));
}
} // putIndex()
/**
* Access a disk-based xsl:key index stored by putIndex(). Note that the
* entire index isn't loaded, just the header. Individual entries will
* be loaded as needed by the DiskHashReader.
*
* @param indexName Name of the index to load
* @return Reader to access the index with.
*/
public DiskHashReader getIndex(String indexName)
{
try
{
synchronized (mainStore) {
SubStoreReader indexFile = mainStore.openSubStore(indexName);
return new DiskHashReader(indexFile);
}
}
catch (Exception e) {
return null;
}
} // getIndex()
/**
* Get the configuration previously set using setConfiguration
*/
public Configuration getConfiguration() {
return config;
}
/**
* Get the name pool used for the names in this document
*/
public NamePool getNamePool() {
return namePool;
}
/**
* Get the unique document number
*/
public int getDocumentNumber() {
return documentNumber;
}
/**
* Set the root node. Parentless elements are implemented using a full tree structure
* containing a document node, but the document node is not regarded as part of the tree
*/
public void setRootNode(NodeInfo root) {
rootNodeNum = ((NodeImpl)root).nodeNum;
}
/**
* Set the type annotation of an element node
*/
protected void setElementAnnotation(int nodeNum, int typeCode) {
assert false : "LazyTree doesn't support element annotations yet";
}
/**
* Get the type annotation of a node.
* -1 if there is no type annotation
*/
protected int getTypeAnnotation(int nodeNum) {
return -1;
}
/**
* Get the type of node this document is -- ie it's a document node.
*/
public int getNodeKind() {
return Type.DOCUMENT;
}
/**
* Get a node by its node number, loading it from disk if necessary.
*
* @param num The number to get
* @return A node, or null if the number is invalid.
*/
public NodeImpl getNode(int num)
{
// If it's in the cache, we need do no more.
NodeImpl node = checkCache(num);
if (node != null)
return node;
try
{
// Validate the number
if (num >= numberOfNodes || num < 0) {
assert num == -1;
return null;
}
// Easy out for root node
if (num == rootNodeNum)
return this;
// Bump the count if we're profiling
if (profileListener != null)
profileListener.bumpCount(num);
// Read the most data it could be.
synchronized (mainStore) {
nodeFile.seek(NODE_FILE_HEADER_SIZE + (num * maxNodeSize));
nodeFile.read(nodeBytes);
}
// Get the type and the flags.
nodeBuf.setBytes(nodeBytes);
short kind = nodeBuf.readByte();
int flags = nodeBuf.readInt();
// Construct the node based on the kind.
switch (kind) {
case Type.DOCUMENT:
node = this;
break;
case Type.ELEMENT:
node = createElementNode();
break;
case Type.TEXT:
node = createTextNode();
break;
case Type.COMMENT:
assert false : "comments not yet supported";
break;
case Type.PROCESSING_INSTRUCTION:
assert false : "processing instructions not yet supported";
break;
default:
assert false : "Invalid node kind";
return null;
}
// Make sure the node knows how to get back to the document.
node.nodeNum = num;
node.document = this;
// Read other stuff according to the flags.
if ((flags & Flag.HAS_NAMECODE) != 0) {
int nameIdx = nodeBuf.readInt();
node.nameCode = nameNumToCode[nameIdx];
}
else
node.nameCode = -1;
if ((flags & Flag.HAS_PARENT) != 0)
node.parentNum = nodeBuf.readInt();
else
node.parentNum = -1;
if ((flags & Flag.HAS_PREV_SIBLING) != 0)
node.prevSibNum = nodeBuf.readInt();
else
node.prevSibNum = -1;
if ((flags & Flag.HAS_NEXT_SIBLING) != 0)
node.nextSibNum = nodeBuf.readInt();
else
node.nextSibNum = -1;
assert node.prevSibNum != node.nextSibNum || node.prevSibNum < 0;
assert node.prevSibNum < node.nodeNum;
assert node.nextSibNum > node.nodeNum || node.nextSibNum < 0;
if ((flags & Flag.HAS_CHILD) != 0) {
assert node instanceof ParentNodeImpl;
((ParentNodeImpl)node).childNum = nodeBuf.readInt();
assert ((ParentNodeImpl)node).childNum > 0;
}
else if (node instanceof ParentNodeImpl)
((ParentNodeImpl)node).childNum = -1;
int alpha = -1;
if ((flags & Flag.HAS_ALPHA) != 0)
alpha = nodeBuf.readInt();
int beta = -1;
if ((flags & Flag.HAS_BETA) != 0)
beta = nodeBuf.readInt();
node.init(alpha, beta);
// All done!
nodeCache.put(Integer.valueOf(num), new SoftReference(node));
return node;
} // try
catch (IOException e) {
return null;
}
} // getNode()
/**
* Checks to see if we've already loaded the node corresponding with the
* given number. If so, return it, else null.
*/
protected NodeImpl checkCache(int num)
{
// Every once in a while, check if our thread has exceeded its time
// limit and should kill itself.
//
if (killCheckCounter++ > 1000) {
killCheckCounter = 0;
if (ThreadWatcher.shouldDie(Thread.currentThread()))
throw new RuntimeException("Runaway request - time limit exceeded");
}
// Do we have a reference in the cache? If not, return. And if it's a
// strong reference to a node, return it.
//
Object ref = nodeCache.get(Integer.valueOf(num));
NodeImpl node = null;
if (ref instanceof NodeImpl)
node = (NodeImpl)ref;
else if (ref instanceof SoftReference)
{
// Is the reference still valid? If not, remove it.
SoftReference weak = (SoftReference)ref;
node = (NodeImpl)weak.get();
if (node == null)
nodeCache.remove(Integer.valueOf(num));
}
// All done.
return node;
} // checkCache()
/**
* Create an element node. Derived classes can override this to provide their
* own element implementation.
*/
protected NodeImpl createElementNode() {
return new ElementImpl();
}
/**
* Create a text node. Derived classes can override this to provide their
* own text implementation.
*/
protected NodeImpl createTextNode() {
return new TextImpl();
}
/**
* Get the node sequence number (in document order). Sequence numbers are
* monotonic but not consecutive.
*/
public long getSequenceNumber() {
return 0;
}
/**
* Get next sibling - always null
* @return null
*/
public final NodeInfo getNextSibling() {
return null;
}
/**
* Get previous sibling - always null
* @return null
*/
public final NodeInfo getPreviousSibling() {
return null;
}
/**
* Get a character string that uniquely identifies this node
* @param buffer a buffer into which will be placed a string based on the document number
*
*/
public void generateId(FastStringBuffer buffer) {
buffer.append('d');
buffer.append(Integer.toString(documentNumber));
}
/**
* determine whether this document uses namespaces
*/
protected boolean isUsingNamespaces() {
return usesNamespaces;
}
/**
* Set the system id of this node
*/
public void setSystemId(String uri) {
if (uri == null)
uri = "";
if (systemIdMap == null)
systemIdMap = new SystemIdMap();
systemIdMap.setSystemId(nodeNum, uri);
}
/**
* Get the system id of this root node
*/
public String getSystemId() {
if (systemIdMap == null)
return null;
return systemIdMap.getSystemId(nodeNum);
}
/**
* Get the base URI of this root node. For a root node the base URI is the same as the
* System ID.
*/
public String getBaseURI() {
return getSystemId();
}
/**
* Set the system id of an element in the document
*/
protected void setSystemId(int seq, String uri)
{
if (uri == null) {
uri = "";
}
if (systemIdMap == null) {
systemIdMap = new SystemIdMap();
}
systemIdMap.setSystemId(seq, uri);
}
/**
* Get the system id of an element in the document
*/
protected String getSystemId(int seq)
{
if (systemIdMap == null) {
return null;
}
return systemIdMap.getSystemId(seq);
}
/**
* Set line numbering on
*/
public void setLineNumbering() {
assert false : "LazyTree does not support line numbering yet";
}
/**
* Set the line number for an element. Ignored if line numbering is off.
*/
protected void setLineNumber(int sequence, int line) {
assert false : "LazyTree does not support line numbering yet";
}
/**
* Get the line number for an element. Return -1 if line numbering is off.
*/
protected int getLineNumber(int sequence) {
assert false : "LazyTree does not support line numbering yet";
return -1;
}
/**
* Get the line number of this root node.
* @return 0 always
*/
public int getLineNumber() {
return 0;
}
/**
* Return the type of node.
* @return Type.DOCUMENT (always)
*/
public final int getItemType() {
return Type.DOCUMENT;
}
/**
* Get the root node
* @return the NodeInfo that is the root of the tree - not necessarily a document node
*/
public NodeInfo getRoot() {
return ((rootNodeNum == nodeNum) ? this : getNode(rootNodeNum));
}
/**
* Get the root (document) node
* @return the DocumentInfo representing the document node, or null if the
* root of the tree is not a document node
*/
public DocumentInfo getDocumentRoot() {
return ((rootNodeNum == nodeNum) ? this : null);
}
/**
* Get a character string that uniquely identifies this node
* @return an identifier based on the document number
*/
public String generateId() {
return "d" + documentNumber;
}
/**
* Get a list of all elements with a given name. This is implemented
* as a memo function: the first time it is called for a particular
* element type, it remembers the result for next time.
*/
protected AxisIterator getAllElements(int fingerprint)
{
synchronized(mainStore)
{
// See if there's already a subfile for this name.
String subName = "all-" + namePool.getDisplayName(fingerprint);
try
{
SubStoreReader indexFile = mainStore.openSubStore(subName);
PackedByteBuf buf = new PackedByteBuf(indexFile, (int)indexFile.length());
int nNodes = buf.readInt();
ArrayList nodes = new ArrayList(nNodes);
int curNodeNum = 0;
for (int i = 0; i < nNodes; i++) {
curNodeNum += buf.readInt();
nodes.add(getNode(curNodeNum));
}
indexFile.close();
return new NodeListIterator(nodes);
}
catch (IOException e) {
}
if (debug) {
Trace.debug(
"Building list of elements named '" +
namePool.getDisplayName(fingerprint) + "'.");
}
// Okay, we need to build a list.
ArrayList nodes = new ArrayList(numberOfNodes / 8);
Vector nodeNums = new Vector(numberOfNodes / 8);
for (int i = 0; i < numberOfNodes; i++) {
NodeImpl node = getNode(i);
if (node == null || (node.getNameCode() & 0xfffff) != fingerprint)
continue;
nodes.add(node);
nodeNums.add(Integer.valueOf(node.nodeNum));
}
// Pack up the results.
PackedByteBuf buf = new PackedByteBuf(nodeNums.size() * 3);
buf.writeInt(nodeNums.size());
int curNum = 0;
for (int i = 0; i < nodeNums.size(); i++) {
int num = ((Integer)nodeNums.get(i)).intValue();
buf.writeInt(num - curNum);
curNum = num;
}
try
{
// Now write a new sub-file.
SubStoreWriter indexFile = mainStore.createSubStore(subName);
buf.output(indexFile);
indexFile.close();
}
catch (IOException e) {
}
// Return the list we made (no need to re-read it).
return new NodeListIterator(nodes);
}
} // getAllElements()
/**
* Get the element with a given ID.
* @param id The unique ID of the required element, previously registered using registerID()
* @return The NodeInfo (always an Element) for the given ID if one has been registered,
* otherwise null.
*/
public NodeInfo selectID(String id) {
assert false : "LazyTree does not support selectId() yet";
return null;
}
/**
* Get the unparsed entity with a given nameID if there is one, or null if not. If the entity
* does not exist, return null.
* @param name the name of the entity
* @return if the entity exists, return an array of two Strings, the first holding the system ID
* of the entity, the second holding the public
*/
public String[] getUnparsedEntity(String name) {
assert false : "LazyTree does not support unparsed entities yet";
return null;
}
/**
* Copy this node to a given outputter
*/
public void copy(Receiver out, int whichNamespaces, boolean copyAnnotations,
int locationId)
throws XPathException
{
// output the children
AxisIterator children = iterateAxis(Axis.CHILD);
while (true) {
NodeInfo child = (NodeInfo)children.next();
if (child == null)
break;
child.copy(out, whichNamespaces, copyAnnotations, locationId);
}
}
} // class DocumentImpl
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none.
//