package org.cdlib.xtf.lazyTree;
import net.sf.saxon.Configuration;
import net.sf.saxon.expr.Expression;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.DocumentInfo;
import net.sf.saxon.om.NamePool;
import net.sf.saxon.om.StrippedNode;
import net.sf.saxon.pattern.NodeTestPattern;
import net.sf.saxon.pattern.PatternFinder;
import net.sf.saxon.sort.IntIterator;
import net.sf.saxon.type.BuiltInAtomicType;
import net.sf.saxon.value.StringValue;
import net.sf.saxon.trans.KeyDefinition;
import net.sf.saxon.trans.KeyManager;
import net.sf.saxon.trans.XPathException;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Map;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.Hash64;
import org.cdlib.xtf.util.DiskHashReader;
import org.cdlib.xtf.util.Trace;
/**
* LazyKeyManager wraps a Saxon KeyManager, but stores keys on disk instead
* of keeping them in RAM. If the same index is accessed later, it need not
* be recomputed.
*
* @author Martin Haye
*/
public class LazyKeyManager extends KeyManager
{
/** Count of keys actually stored on disk */
private int nKeysStored;
/**
* Construct and initialize the manager, grabbing existing key definitions
* from the previous key manager.
*/
public LazyKeyManager(Configuration config, KeyManager prevMgr) {
super(config);
keyList = prevMgr.keyList;
}
// inherit JavaDoc
public synchronized Map buildIndex(int keyNameFingerprint,
BuiltInAtomicType itemType,
Set foundItemTypes, DocumentInfo doc,
XPathContext context)
throws XPathException
{
// If the document isn't a lazy tree, just do normal index building
// (we can't store keys for a non-lazy tree).
//
LazyDocument document = getDocumentImpl(doc);
if (document == null)
return super.buildIndex(keyNameFingerprint, itemType, foundItemTypes, doc, context);
// If the key name has 'dynamic' in it, this is a signal that we
// shouldn't store the index...
//
NamePool pool = context.getController().getNamePool();
String fingerName = pool.getDisplayName(keyNameFingerprint);
if (fingerName.indexOf("dynamic") >= 0)
{
if (document.getDebug()) {
Trace.debug(
"Building dynamic (non-stored) index " +
new File(doc.getSystemId()).getName() + ": '" + fingerName + ":" +
itemType + "'");
}
Map index = super.buildIndex(keyNameFingerprint, itemType, foundItemTypes, doc, context);
if (document.getDebug())
Trace.debug("...done");
return index;
}
// Calculate a string to uniquely describe this index.
List definitions = getKeyDefinitions(keyNameFingerprint);
String indexName = calcIndexName(pool, fingerName, definitions, document.config);
// Do we already have a stored version of this index?
DiskHashReader reader = document.getIndex(indexName);
if (reader != null)
return new LazyHashMap(document, reader);
if (document.getDebug()) {
Trace.info(
"Building key index " + new File(doc.getSystemId()).getName() +
": '" + fingerName + "' {" + indexName + "}...");
}
// Alert! since our indexes on disk are always string, convert to
// string.
//
itemType = BuiltInAtomicType.STRING;
// Use Saxon's method to do the work of computing the nodes
Map index = super.buildIndex(keyNameFingerprint,
itemType,
foundItemTypes,
doc,
context);
// Store it, then return.
try {
document.putIndex(indexName, index);
nKeysStored++;
}
catch (IOException e) {
Trace.error("Error storing persistent index! " + e);
return index;
}
if (document.getDebug()) {
Trace.info("...done");
}
return new LazyHashMap(document, document.getIndex(indexName));
}
/**
* Optimized to use node test directly when possible, for speed.
*/
protected void constructIndex(DocumentInfo doc, Map index,
KeyDefinition keydef,
BuiltInAtomicType soughtItemType,
Set foundItemTypes, XPathContext context,
boolean isFirst)
throws XPathException
{
PatternFinder match = keydef.getMatch();
if (match instanceof NodeTestPattern) {
match = new FastNodeTestPattern(((NodeTestPattern)match).getNodeTest());
KeyDefinition oldDef = keydef;
keydef = new KeyDefinition(match,
oldDef.getUse(),
oldDef.getCollationName(),
oldDef.getCollation());
}
super.constructIndex(doc, index, keydef, soughtItemType,
foundItemTypes, context, isFirst);
}
/**
* Tells whether any keys have been registered.
*/
public boolean isEmpty() {
return keyList.size() == 0;
}
/**
* Called after creation of a lazy tree during the index process.
* Iterates through all registered keys, and builds the associated
* disk-based key indexes on the given tree.
*
* @param doc The LazyTree to work on.
* @param context Context used for name pool, etc.
*
* @return int The number of keys created
*/
public synchronized int createAllKeys(LazyDocument doc, XPathContext context)
throws XPathException
{
StringValue val = new StringValue("1");
NamePool pool = context.getController().getNamePool();
// In debug mode, output keys being created.
if (Trace.getOutputLevel() == Trace.debug)
doc.setDebug(true);
// Create a key for every definition we have, and count how many actually
// get stored on disk.
//
nKeysStored = 0;
IntIterator iter = keyList.keyIterator();
while (iter.hasNext()) {
int fingerprint = iter.next();
String fingerName = pool.getDisplayName(fingerprint);
if (fingerName.indexOf("dynamic") >= 0)
continue;
// Do a fake lookup on this fingerprint, and ignore the results.
// This will have the effect of building the on-disk hash.
//
selectByKey(fingerprint, doc, val, context);
}
return nKeysStored;
} // createAllKeys()
/**
* Retrieve the lazy document for the given doc, if possible.
*/
public static LazyDocument getDocumentImpl(DocumentInfo doc)
{
while (doc instanceof StrippedNode)
doc = (DocumentInfo)((StrippedNode)doc).getUnderlyingNode();
if (doc instanceof LazyDocument)
return (LazyDocument)doc;
else
return null;
} // getDocumentImpl()
/**
* Calculates a string name for a given set of xsl:key definitions. This
* is done very carefully to ensure that the same key will generate the
* same name, regardless of ephemeral things like particular name codes
* or other variables that might be different on a different run.
*
* @param pool Name pool used to look up names
* @param fingerName Fingerprint of the key
* @param definitions List of key definitions
* @param config Associated Saxon configuration
*
* @return A unique string for this xsl:key
*/
private String calcIndexName(NamePool pool,
String fingerName,
List definitions,
Configuration config)
{
StringBuffer sbuf = new StringBuffer();
sbuf.append("key|" + fingerName);
for (int k = 0; k < definitions.size(); k++)
{
KeyDefinition def = (KeyDefinition)definitions.get(k);
// Capture the match pattern.
String matchStr = def.getMatch().toString();
sbuf.append("|" + Long.toString(Hash64.hash(matchStr), 16));
// Capture the 'use' expression
if (def.getUse() instanceof Expression)
{
// Saxon likes to dump debug stuff to a PrintStream, and we need to
// capture to a buffer.
//
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(bytes);
((Expression)def.getUse()).display(10, ps, config);
ps.flush();
String useStr = bytes.toString();
sbuf.append("|" + Long.toString(Hash64.hash(useStr), 16));
}
else
sbuf.append("|non-exp");
} // for k
return sbuf.toString();
} // calcIndexName()
}