/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-06 The eXist Project
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Library General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option) any
* later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
* details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Id$
*/
package org.exist.storage;
import org.apache.log4j.Logger;
import org.exist.EXistException;
import org.exist.collections.Collection;
import org.exist.dom.AttrImpl;
import org.exist.dom.ByDocumentIterator;
import org.exist.dom.DocumentImpl;
import org.exist.dom.DocumentSet;
import org.exist.dom.ExtNodeSet;
import org.exist.dom.NewArrayNodeSet;
import org.exist.dom.NodeProxy;
import org.exist.dom.NodeSet;
import org.exist.dom.QName;
import org.exist.dom.StoredNode;
import org.exist.dom.SymbolTable;
import org.exist.dom.TextImpl;
import org.exist.numbering.DLN;
import org.exist.numbering.NodeId;
import org.exist.security.Permission;
import org.exist.security.PermissionDeniedException;
import org.exist.security.User;
import org.exist.security.xacml.AccessContext;
import org.exist.storage.btree.BTreeException;
import org.exist.storage.btree.DBException;
import org.exist.storage.btree.IndexQuery;
import org.exist.storage.btree.Value;
import org.exist.storage.index.BFile;
import org.exist.storage.io.VariableByteArrayInput;
import org.exist.storage.io.VariableByteInput;
import org.exist.storage.io.VariableByteOutputStream;
import org.exist.storage.lock.Lock;
import org.exist.util.ByteConversion;
import org.exist.util.Configuration;
import org.exist.util.FastQSort;
import org.exist.util.LockException;
import org.exist.util.Occurrences;
import org.exist.util.ProgressIndicator;
import org.exist.util.ReadOnlyException;
import org.exist.xquery.Constants;
import org.exist.xquery.DescendantSelector;
import org.exist.xquery.Expression;
import org.exist.xquery.NodeSelector;
import org.exist.xquery.TerminatedException;
import org.exist.xquery.XQueryContext;
import org.w3c.dom.Node;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/** The indexing occurs in this class. That is, during the loading of a document
* into the database, the process of associating a long gid with each element,
* and the subsequent storing of the {@link NodeProxy} on disk.
*/
public class NativeElementIndex extends ElementIndex implements ContentLoadingObserver {
public static final String FILE_NAME = "elements.dbx";
public static final String FILE_KEY_IN_CONFIG = "db-connection.elements";
public static final double DEFAULT_STRUCTURAL_CACHE_GROWTH = 1.25;
public static final double DEFAULT_STRUCTURAL_KEY_THRESHOLD = 0.01;
public static final double DEFAULT_STRUCTURAL_VALUE_THRESHOLD = 0.04;
private final static byte ENTRIES_ORDERED = 0;
private final static byte ENTRIES_UNORDERED = 1;
//TODO : check
public static int OFFSET_COLLECTION_ID = 0;
//TODO : check
public static int OFFSET_TYPE = OFFSET_COLLECTION_ID + Collection.LENGTH_COLLECTION_ID; //4
public static int OFFSET_SYMBOL = OFFSET_TYPE + ElementValue.LENGTH_TYPE; //5
public static int OFFSET_NSSYMBOL = OFFSET_SYMBOL + SymbolTable.LENGTH_LOCAL_NAME; //7
/** The datastore for this node index */
protected BFile dbNodes;
protected Configuration config;
/** Work output Stream that should be cleared before every use */
private VariableByteOutputStream os = new VariableByteOutputStream();
public NativeElementIndex(DBBroker broker, byte id, String dataDir, Configuration config) throws DBException {
super(broker);
this.config = config;
//TODO : read from configuration (key ?)
double cacheGrowth = NativeElementIndex.DEFAULT_STRUCTURAL_CACHE_GROWTH;
double cacheKeyThresdhold = NativeElementIndex.DEFAULT_STRUCTURAL_KEY_THRESHOLD;
double cacheValueThresHold = NativeElementIndex.DEFAULT_STRUCTURAL_VALUE_THRESHOLD;
BFile nativeFile = (BFile) config.getProperty(getConfigKeyForFile());
if (nativeFile == null) {
File file = new File(dataDir + File.separatorChar + getFileName());
LOG.debug("Creating '" + file.getName() + "'...");
nativeFile = new BFile(broker.getBrokerPool(), id, false,
file, broker.getBrokerPool().getCacheManager(), cacheGrowth, cacheKeyThresdhold, cacheValueThresHold);
config.setProperty(getConfigKeyForFile(), nativeFile);
}
this.dbNodes = nativeFile;
broker.addContentLoadingObserver(getInstance());
}
public String getFileName() {
return FILE_NAME;
}
public String getConfigKeyForFile() {
return FILE_KEY_IN_CONFIG;
}
public NativeElementIndex getInstance() {
return this;
}
/** Store the given node in the node index.
* @param qname The node's identity
* @param proxy The node's proxy
*/
public void addNode(QName qname, NodeProxy proxy) {
if (doc.getDocId() != proxy.getDocument().getDocId()) {
throw new IllegalArgumentException("Document id ('" + doc.getDocId() + "') and proxy id ('" +
proxy.getDocument().getDocId() + "') differ !");
}
//Is this qname already pending ?
ArrayList buf = (ArrayList) pending.get(qname);
if (buf == null) {
//Create a node list
buf = new ArrayList(50);
pending.put(qname, buf);
}
//Add node's proxy to the list
buf.add(proxy);
}
public void storeAttribute(AttrImpl node, NodePath currentPath, int indexingHint, RangeIndexSpec spec, boolean remove) {
// TODO Auto-generated method stub
}
public void storeText(TextImpl node, NodePath currentPath, int indexingHint) {
// TODO Auto-generated method stub
}
public void removeNode(StoredNode node, NodePath currentPath, String content) {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
* @see org.exist.storage.ContentLoadingObserver#sync()
*/
public void sync() {
final Lock lock = dbNodes.getLock();
try {
lock.acquire(Lock.WRITE_LOCK);
dbNodes.flush();
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
//TODO : throw an exception ? -pb
} catch (DBException e) {
LOG.error(e.getMessage(), e);
//TODO : throw an exception ? -pb
} finally {
lock.release(Lock.WRITE_LOCK);
}
}
/* (non-Javadoc)
* @see org.exist.storage.ContentLoadingObserver#flush()
*/
public void flush() {
//TODO : return if doc == null? -pb
if (pending.size() == 0)
return;
final ProgressIndicator progress = new ProgressIndicator(pending.size(), 5);
final int collectionId = this.doc.getCollection().getId();
final Lock lock = dbNodes.getLock();
int count = 0;
for (Iterator i = pending.entrySet().iterator(); i.hasNext(); count++) {
Map.Entry entry = (Map.Entry) i.next();
QName qname = (QName) entry.getKey();
//TODO : NativeValueIndex uses LongLinkedLists -pb
ArrayList gids = (ArrayList) entry.getValue();
int gidsCount = gids.size();
//Don't forget this one
FastQSort.sort(gids, 0, gidsCount - 1);
os.clear();
os.writeInt(this.doc.getDocId());
os.writeByte(inUpdateMode ? ENTRIES_UNORDERED : ENTRIES_ORDERED);
os.writeInt(gidsCount);
//TOUNDERSTAND -pb
int lenOffset = os.position();
os.writeFixedInt(0);
//Compute the GIDs list
NodeId previous = null;
for (int j = 0; j < gidsCount; j++) {
NodeProxy storedNode = (NodeProxy) gids.get(j);
if (doc.getDocId() != storedNode.getDocument().getDocId()) {
throw new IllegalArgumentException("Document id ('" + doc.getDocId() + "') and proxy id ('" +
storedNode.getDocument().getDocId() + "') differ !");
}
try {
previous = storedNode.getNodeId().write(previous, os);
} catch (IOException e) {
LOG.warn("IO error while writing structural index: " + e.getMessage(), e);
}
StorageAddress.write(storedNode.getInternalAddress(), os);
}
broker.getBrokerPool().getNodeFactory().writeEndOfDocument(os);
//What does this 4 stand for ?
os.writeFixedInt(lenOffset, os.position() - lenOffset - 4);
try {
lock.acquire(Lock.WRITE_LOCK);
//Store the data
final Value key = computeKey(collectionId, qname);
if (dbNodes.append(key, os.data()) == BFile.UNKNOWN_ADDRESS) {
LOG.error("Could not put index data for node '" + qname + "'");
//TODO : throw an exception ?
}
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
//TODO : return ?
} catch (IOException e) {
LOG.error(e.getMessage(), e);
//TODO : return ?
} catch (ReadOnlyException e) {
LOG.warn("Read-only error on '" + dbNodes.getFile().getName() + "'", e);
//Return without clearing the pending entries
return;
} finally {
lock.release(Lock.WRITE_LOCK);
os.clear();
}
progress.setValue(count);
if (progress.changed()) {
setChanged();
notifyObservers(progress);
}
}
progress.finish();
setChanged();
notifyObservers(progress);
pending.clear();
inUpdateMode = false;
}
public void remove() {
//TODO : return if doc == null? -pb
if (pending.size() == 0)
return;
final int collectionId = this.doc.getCollection().getId();
final Lock lock = dbNodes.getLock();
for (Iterator i = pending.entrySet().iterator(); i.hasNext();) {
Map.Entry entry = (Map.Entry) i.next();
List storedGIDList = (ArrayList) entry.getValue();
QName qname = (QName) entry.getKey();
final Value key = computeKey(collectionId, qname);
List newGIDList = new ArrayList();
os.clear();
try {
lock.acquire(Lock.WRITE_LOCK);
Value value = dbNodes.get(key);
//Does the node already exist in the index ?
if (value != null) {
//Add its data to the new list
VariableByteArrayInput is = new VariableByteArrayInput(value.getData());
try {
while (is.available() > 0) {
int storedDocId = is.readInt();
byte isOrdered = is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
int size = is.readFixedInt();
if (storedDocId != this.doc.getDocId()) {
// data are related to another document:
// append them to any existing data
os.writeInt(storedDocId);
os.writeByte(isOrdered);
os.writeInt(gidsCount);
os.writeFixedInt(size);
try {
is.copyRaw(os, size);
} catch(EOFException e) {
LOG.error(e.getMessage(), e);
//TODO : data will be saved although os is probably corrupted ! -pb
}
} else {
// data are related to our document:
// feed the new list with the GIDs
NodeId previous = null;
NodeId nodeId;
long address;
for (int j = 0; j < gidsCount; j++) {
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(previous, is);
previous = nodeId;
address = StorageAddress.read(is);
// add the node to the new list if it is not
// in the list of removed nodes
if (!containsNode(storedGIDList, nodeId)) {
newGIDList.add(new NodeProxy(doc, nodeId, address));
}
}
broker.getBrokerPool().getNodeFactory().createFromStream(NodeId.ROOT_NODE, is);
}
}
} catch (EOFException e) {
//TODO : remove this block if unexpected -pb
LOG.warn("REPORT ME " + e.getMessage(), e);
}
//append the data from the new list
if (newGIDList.size() > 0 ) {
int gidsCount = newGIDList.size();
//Don't forget this one
FastQSort.sort(newGIDList, 0, gidsCount - 1);
os.writeInt(this.doc.getDocId());
os.writeByte(ENTRIES_ORDERED);
os.writeInt(gidsCount);
//TOUNDERSTAND -pb
int lenOffset = os.position();
os.writeFixedInt(0);
NodeId previous = null;
NodeProxy storedNode;
for (int j = 0; j < gidsCount; j++) {
storedNode = (NodeProxy) newGIDList.get(j);
if (doc.getDocId() != storedNode.getDocument().getDocId()) {
throw new IllegalArgumentException("Document id ('" + doc.getDocId() + "') and proxy id ('" +
storedNode.getDocument().getDocId() + "') differ !");
}
try {
previous = storedNode.getNodeId().write(previous, os);
} catch (IOException e) {
LOG.warn("IO error while writing structural index: " + e.getMessage(), e);
//TODO : throw exception ?
}
StorageAddress.write(storedNode.getInternalAddress(), os);
}
broker.getBrokerPool().getNodeFactory().writeEndOfDocument(os);
//What does this 4 stand for ?
os.writeFixedInt(lenOffset, os.position() - lenOffset - 4);
}
}
//Store the data
if (value == null) {
if (dbNodes.put(key, os.data()) == BFile.UNKNOWN_ADDRESS) {
LOG.error("Could not put index data for node '" + qname + "'");
//TODO : throw exception ?
}
} else {
if (dbNodes.update(value.getAddress(), key, os.data()) == BFile.UNKNOWN_ADDRESS) {
LOG.error("Could not put index data for node '" + qname + "'");
//TODO : throw exception ?
}
}
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (ReadOnlyException e) {
LOG.warn("Read-only error on '" + dbNodes.getFile().getName() + "'", e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
} finally {
lock.release(Lock.WRITE_LOCK);
os.clear();
}
}
pending.clear();
}
/* Drop all index entries for the given collection.
* @see org.exist.storage.ContentLoadingObserver#dropIndex(org.exist.collections.Collection)
*/
public void dropIndex(Collection collection) {
final Value ref = new ElementValue(collection.getId());
final IndexQuery query = new IndexQuery(IndexQuery.TRUNC_RIGHT, ref);
final Lock lock = dbNodes.getLock();
try {
lock.acquire(Lock.WRITE_LOCK);
//TODO : flush ? -pb
dbNodes.removeAll(null, query);
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (BTreeException e) {
LOG.error(e.getMessage(), e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
} finally {
lock.release(Lock.WRITE_LOCK);
}
}
/* Drop all index entries for the given document.
* @see org.exist.storage.ContentLoadingObserver#dropIndex(org.exist.dom.DocumentImpl)
*/
//TODO : note that this is *not* this.doc -pb
public void dropIndex(DocumentImpl document) throws ReadOnlyException {
final int collectionId = document.getCollection().getId();
final Value ref = new ElementValue(collectionId);
final IndexQuery query = new IndexQuery(IndexQuery.TRUNC_RIGHT, ref);
final Lock lock = dbNodes.getLock();
try {
lock.acquire(Lock.WRITE_LOCK);
ArrayList elements = dbNodes.findKeys(query);
for (int i = 0; i < elements.size(); i++) {
boolean changed = false;
Value key = (Value) elements.get(i);
VariableByteInput is = dbNodes.getAsStream(key);
if (is == null)
continue;
os.clear();
try {
while (is.available() > 0) {
int storedDocId = is.readInt();
byte ordered = is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
int size = is.readFixedInt();
if (storedDocId != document.getDocId()) {
// data are related to another document:
// copy them to any existing data
os.writeInt(storedDocId);
os.writeByte(ordered);
os.writeInt(gidsCount);
os.writeFixedInt(size);
is.copyRaw(os, size);
} else {
// data are related to our document:
// skip them, they will be processed soon
changed = true;
is.skipBytes(size);
}
}
} catch (EOFException e) {
//EOF is expected here
}
if (changed) {
//TODO : no call to dbNodes.remove if no data ? -pb
//TODO : why not use the same construct as above :
//dbNodes.update(value.getAddress(), ref, os.data()) -pb
if (os.data().size() == 0) {
dbNodes.remove(key);
} else if (dbNodes.put(key, os.data()) == BFile.UNKNOWN_ADDRESS) {
LOG.error("Could not put index data for value '" + ref + "'");
//TODO : thow exception ?
}
}
}
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (TerminatedException e) {
LOG.warn(e.getMessage(), e);
} catch (BTreeException e) {
LOG.error(e.getMessage(), e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
} finally {
lock.release(Lock.WRITE_LOCK);
os.clear();
}
if (os.size() > 512000)
// garbage collect the output stream if it is larger than 512k, otherwise reuse it
os = new VariableByteOutputStream();
}
/**
* Lookup elements or attributes in the index matching a given {@link QName} and
* {@link NodeSelector}. The NodeSelector argument is optional. If selector is
* null, all elements or attributes matching qname will be returned.
*
* @param type either {@link ElementValue#ATTRIBUTE}, {@link ElementValue#ELEMENT}.
* @param docs the set of documents to look up in the index
* @param qname the QName of the attribute or element
* @param selector an (optional) NodeSelector
*/
public NodeSet findElementsByTagName(byte type, DocumentSet docs, QName qname, NodeSelector selector) {
short nodeType = getIndexType(type);
final NewArrayNodeSet result = new NewArrayNodeSet(docs.getDocumentCount(), 256);
final Lock lock = dbNodes.getLock();
// true if the output document set is the same as the input document set
boolean sameDocSet = true;
boolean descendantAxis = selector instanceof DescendantSelector;
for (Iterator i = docs.getCollectionIterator(); i.hasNext();) {
//Compute a key for the node
Collection collection = (Collection) i.next();
int collectionId = collection.getId();
final Value key = computeTypedKey(type, collectionId, qname);
try {
lock.acquire(Lock.READ_LOCK);
VariableByteInput is = dbNodes.getAsStream(key);
//Does the node already has data in the index ?
if (is == null) {
sameDocSet = false;
continue;
}
while (is.available() > 0) {
int storedDocId = is.readInt();
byte ordered = is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
int size = is.readFixedInt();
DocumentImpl storedDocument = docs.getDoc(storedDocId);
//Exit if the document is not concerned
if (storedDocument == null) {
is.skipBytes(size);
continue;
}
//Process the nodes
NodeId nodeId;
NodeId previous = null;
for (int k = 0; k < gidsCount; k++) {
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(previous, is);
previous = nodeId;
if (selector == null) {
long address = StorageAddress.read(is);
NodeProxy storedNode = new NodeProxy(storedDocument, nodeId, nodeType, address);
result.add(storedNode, gidsCount);
} else {
//Filter out the node if requested to do so
NodeProxy storedNode = selector.match(storedDocument, nodeId);
if (storedNode != null) {
long address = StorageAddress.read(is);
storedNode.setInternalAddress(address);
storedNode.setNodeType(nodeType);
result.add(storedNode, gidsCount);
} else {
//What does this 3 stand for ?
is.skip(3);
sameDocSet = false;
}
}
}
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(NodeId.ROOT_NODE, is);
result.setSorted(storedDocument, ordered == ENTRIES_ORDERED && !descendantAxis);
}
} catch (EOFException e) {
//EOFExceptions are expected here
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
//TODO : return ?
} finally {
lock.release(Lock.READ_LOCK);
}
}
// LOG.debug("Found: " + result.getLength() + " for " + qname);
if (sameDocSet) {
result.setDocumentSet(docs);
}
return result;
}
/**
* Optimized lookup method which directly implements the ancestor-descendant join. The algorithm
* does directly operate on the input stream containing the potential descendant nodes. It thus needs
* less comparisons than {@link #findElementsByTagName(byte, DocumentSet, QName, NodeSelector)}.
*
* @param type either {@link ElementValue#ATTRIBUTE} or {@link ElementValue#ELEMENT}
* @param docs the set of documents to look up in the index
* @param contextSet the set of ancestor nodes for which the method will try to find descendants
* @param contextId id of the current context expression as passed by the query engine
* @param qname the QName to search for
*/
public NodeSet findDescendantsByTagName(byte type, QName qname, int axis,
DocumentSet docs, ExtNodeSet contextSet, int contextId) {
// LOG.debug(contextSet.toString());
short nodeType = getIndexType(type);
ByDocumentIterator citer = contextSet.iterateByDocument();
final NewArrayNodeSet result = new NewArrayNodeSet(docs.getDocumentCount(), 256);
final Lock lock = dbNodes.getLock();
// true if the output document set is the same as the input document set
boolean sameDocSet = true;
for (Iterator i = docs.getCollectionIterator(); i.hasNext();) {
//Compute a key for the node
Collection collection = (Collection) i.next();
int collectionId = collection.getId();
final Value key = computeTypedKey(type, collectionId, qname);
try {
lock.acquire(Lock.READ_LOCK);
VariableByteInput is;
/*
//TODO : uncomment and implement properly
//TODO : beware of null NS prefix : it looks to be polysemic (none vs. all)
//Test for "*" prefix
if (qname.getPrefix() == null) {
try {
final IndexQuery query = new IndexQuery(IndexQuery.TRUNC_RIGHT, key);
ArrayList elements = dbNodes.findKeys(query);
} catch (BTreeException e) {
LOG.error(e.getMessage(), e);
//TODO : throw an exception ? -pb
} catch (TerminatedException e) {
LOG.warn(e.getMessage(), e);
}
//TODO : iterate over the keys
} else */
is = dbNodes.getAsStream(key);
//Does the node already has data in the index ?
if (is == null) {
sameDocSet = false;
continue;
}
int lastDocId = DocumentImpl.UNKNOWN_DOCUMENT_ID;
NodeProxy ancestor = null;
while (is.available() > 0) {
int storedDocId = is.readInt();
byte ordered = is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
int size = is.readFixedInt();
DocumentImpl storedDocument = docs.getDoc(storedDocId);
//Exit if the document is not concerned
if (storedDocument == null) {
is.skipBytes(size);
continue;
}
// position the context iterator on the next document
if (storedDocId != lastDocId || ordered == ENTRIES_UNORDERED) {
citer.nextDocument(storedDocument);
lastDocId = storedDocId;
ancestor = citer.nextNode();
}
// no ancestor node in the context set, skip the document
if (ancestor == null || gidsCount == 0) {
is.skipBytes(size);
continue;
}
NodeId ancestorId = ancestor.getNodeId();
long prevPosition = ((BFile.PageInputStream)is).position();
long markedPosition = prevPosition;
NodeId markedId = null;
NodeId previousId = null;
NodeProxy lastAncestor = null;
// Process the nodes for the current document
NodeId nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(previousId, is);
previousId = nodeId;
long address = StorageAddress.read(is);
while (true) {
int relation = nodeId.computeRelation(ancestorId);
// System.out.println(ancestorId + " -> " + nodeId + ": " + relation);
if (relation != -1) {
// current node is a descendant. walk through the descendants
// and add them to the result
if (((axis == Constants.CHILD_AXIS || axis == Constants.ATTRIBUTE_AXIS) && relation == NodeId.IS_CHILD) ||
(axis == Constants.DESCENDANT_AXIS && (relation == NodeId.IS_DESCENDANT || relation == NodeId.IS_CHILD)) ||
axis == Constants.DESCENDANT_SELF_AXIS || axis == Constants.DESCENDANT_ATTRIBUTE_AXIS
) {
NodeProxy storedNode = new NodeProxy(storedDocument, nodeId, nodeType, address);
result.add(storedNode, gidsCount);
if (Expression.NO_CONTEXT_ID != contextId) {
storedNode.deepCopyContext(ancestor, contextId);
} else
storedNode.copyContext(ancestor);
storedNode.addMatches(ancestor);
}
prevPosition = ((BFile.PageInputStream)is).position();
NodeId next = broker.getBrokerPool().getNodeFactory().createFromStream(previousId, is);
previousId = next;
if (next != DLN.END_OF_DOCUMENT) {
// retrieve the next descendant from the stream
nodeId = next;
address = StorageAddress.read(is);
} else {
// no more descendants. check if there are more ancestors
if (citer.hasNextNode()) {
NodeProxy nextNode = citer.peekNode();
// reached the end of the input stream:
// if the ancestor set has more nodes and the following ancestor
// is a descendant of the previous one, we have to rescan the input stream
// for further matches
if (nextNode.getNodeId().isDescendantOf(ancestorId)) {
prevPosition = markedPosition;
((BFile.PageInputStream)is).seek(markedPosition);
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(markedId, is);
previousId = nodeId;
address = StorageAddress.read(is);
ancestor = citer.nextNode();
ancestorId = ancestor.getNodeId();
} else {
// ancestorId = ancestor.getNodeId();
break;
}
} else {
break;
}
}
} else {
// current node is not a descendant of the ancestor node. Compare the
// node ids and proceed with next descendant or ancestor.
int cmp = ancestorId.compareTo(nodeId);
if (cmp < 0) {
// check if we have more ancestors
if (citer.hasNextNode()) {
NodeProxy next = citer.nextNode();
// if the ancestor set has more nodes and the following ancestor
// is a descendant of the previous one, we have to rescan the input stream
// for further matches
if (next.getNodeId().isDescendantOf(ancestorId)) {
// rewind the input stream to the position from where we started
// for the previous ancestor node
prevPosition = markedPosition;
((BFile.PageInputStream)is).seek(markedPosition);
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(markedId, is);
previousId = nodeId;
address = StorageAddress.read(is);
} else {
// mark the current position in the input stream
markedPosition = prevPosition;
markedId = nodeId;
}
ancestor = next;
ancestorId = ancestor.getNodeId();
} else {
// no more ancestors: skip the remaining descendants for this document
while ((previousId = broker.getBrokerPool().getNodeFactory().createFromStream(previousId, is))
!= DLN.END_OF_DOCUMENT) {
StorageAddress.read(is);
}
break;
}
} else {
// load the next descendant from the input stream
prevPosition = ((BFile.PageInputStream)is).position();
NodeId nextId = broker.getBrokerPool().getNodeFactory().createFromStream(previousId, is);
previousId = nextId;
if (nextId != DLN.END_OF_DOCUMENT) {
nodeId = nextId;
address = StorageAddress.read(is);
} else {
// We need to remember the last ancestor in case there are more docs to process.
// Next document should start with this ancestor.
if (lastAncestor == null)
lastAncestor = ancestor;
// check if we have more ancestors
if (citer.hasNextNode()) {
ancestor = citer.nextNode();
// if the ancestor set has more nodes and the following ancestor
// is a descendant of the previous one, we have to rescan the input stream
// for further matches
if (ancestor.getNodeId().isDescendantOf(ancestorId)) {
// rewind the input stream to the position from where we started
// for the previous ancestor node
prevPosition = markedPosition;
((BFile.PageInputStream)is).seek(markedPosition);
nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(markedId, is);
previousId = nodeId;
address = StorageAddress.read(is);
ancestorId = ancestor.getNodeId();
} else {
ancestorId = ancestor.getNodeId();
break;
}
} else {
break;
}
}
}
}
}
// result.setSorted(storedDocument, ordered == ENTRIES_ORDERED);
if (lastAncestor != null) {
ancestor = lastAncestor;
citer.setPosition(ancestor);
}
}
} catch (EOFException e) {
//EOFExceptions are expected here
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
//TODO : return ?
} finally {
lock.release(Lock.READ_LOCK);
}
}
// LOG.debug("Found: " + result.getLength() + " for " + qname);
if (sameDocSet) {
result.setDocumentSet(docs);
}
return result;
}
private short getIndexType(byte type) {
switch (type) {
case ElementValue.ATTRIBUTE :
return Node.ATTRIBUTE_NODE;
case ElementValue.ELEMENT :
return Node.ELEMENT_NODE;
default :
throw new IllegalArgumentException("Invalid type");
}
}
public Occurrences[] scanIndexedElements(Collection collection, boolean inclusive)
throws PermissionDeniedException {
final User user = broker.getUser();
if (!collection.getPermissions().validate(user, Permission.READ))
throw new PermissionDeniedException("User '" + user.getName() +
"' has no permission to read collection '" + collection.getURI() + "'");
List collections;
if (inclusive)
collections = collection.getDescendants(broker, broker.getUser());
else
collections = new ArrayList();
collections.add(collection);
final SymbolTable symbols = broker.getBrokerPool().getSymbols();
final TreeMap map = new TreeMap();
final Lock lock = dbNodes.getLock();
for (Iterator i = collections.iterator(); i.hasNext();) {
Collection storedCollection = (Collection) i.next();
int storedCollectionId = storedCollection.getId();
ElementValue startKey = new ElementValue(ElementValue.ELEMENT, storedCollectionId);
IndexQuery query = new IndexQuery(IndexQuery.TRUNC_RIGHT, startKey);
try {
lock.acquire(Lock.READ_LOCK);
//TODO : NativeValueIndex uses LongLinkedLists -pb
ArrayList values = dbNodes.findEntries(query);
for (Iterator j = values.iterator(); j.hasNext();) {
//TOUNDERSTAND : what's in there ?
Value val[] = (Value[]) j.next();
short sym = ByteConversion.byteToShort(val[0].getData(), OFFSET_SYMBOL);
short nsSymbol = ByteConversion.byteToShort(val[0].getData(), OFFSET_NSSYMBOL);
String name = symbols.getName(sym);
String namespace;
if (nsSymbol == 0) {
namespace = "";
} else {
namespace = symbols.getNamespace(nsSymbol);
}
QName qname = new QName(name, namespace);
Occurrences oc = (Occurrences) map.get(qname);
if (oc == null) {
// required for namespace lookups
final XQueryContext context = new XQueryContext(broker, AccessContext.INTERNAL_PREFIX_LOOKUP);
qname.setPrefix(context.getPrefixForURI(namespace));
oc = new Occurrences(qname);
map.put(qname, oc);
}
VariableByteArrayInput is = new VariableByteArrayInput(val[1].data(), val[1].start(), val[1].getLength());
try {
while (is.available() > 0) {
is.readInt();
is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
int size = is.readFixedInt();
is.skipBytes(size);
oc.addOccurrences(gidsCount);
}
} catch (EOFException e) {
//TODO : remove this block if unexpected -pb
LOG.warn("REPORT ME " + e.getMessage(), e);
}
}
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
} catch (BTreeException e) {
LOG.error(e.getMessage(), e);
//TODO : return ?
} catch (IOException e) {
LOG.error(e.getMessage(), e);
//TODO : return ?
} catch (TerminatedException e) {
LOG.warn(e.getMessage(), e);
} finally {
lock.release(Lock.READ_LOCK);
}
}
Occurrences[] result = new Occurrences[map.size()];
return (Occurrences[]) map.values().toArray(result);
}
//TODO : note that this is *not* this.doc -pb
public void consistencyCheck(DocumentImpl document) throws EXistException {
final SymbolTable symbols = broker.getBrokerPool().getSymbols();
final int collectionId = document.getCollection().getId();
final Value ref = new ElementValue(collectionId);
final IndexQuery query = new IndexQuery(IndexQuery.TRUNC_RIGHT, ref);
final StringBuilder msg = new StringBuilder();
final Lock lock = dbNodes.getLock();
try {
lock.acquire(Lock.WRITE_LOCK);
//TODO : NativeValueIndex uses LongLinkedLists -pb
ArrayList elements = dbNodes.findKeys(query);
for (int i = 0; i < elements.size(); i++) {
Value key = (Value) elements.get(i);
Value value = dbNodes.get(key);
short sym = ByteConversion.byteToShort(key.data(), key.start() + OFFSET_SYMBOL);
String nodeName = symbols.getName(sym);
msg.setLength(0);
msg.append("Checking ").append(nodeName).append(": ");
VariableByteArrayInput is = new VariableByteArrayInput(value.getData());
try {
while (is.available() > 0) {
int storedDocId = is.readInt();
is.readByte();
int gidsCount = is.readInt();
//TOUNDERSTAND -pb
is.readFixedInt(); //unused
if (storedDocId != document.getDocId()) {
// data are related to another document:
// ignore them
is.skip(gidsCount * 4);
} else {
// data are related to our document:
// check
NodeId previous = null;
for (int j = 0; j < gidsCount; j++) {
NodeId nodeId = broker.getBrokerPool().getNodeFactory().createFromStream(previous, is);
previous = nodeId;
long address = StorageAddress.read(is);
Node storedNode = broker.objectWith(new NodeProxy(doc, nodeId, address));
if (storedNode == null) {
throw new EXistException("Node " + nodeId + " in document " + document.getFileURI() + " not found.");
}
if (storedNode.getNodeType() != Node.ELEMENT_NODE && storedNode.getNodeType() != Node.ATTRIBUTE_NODE) {
LOG.error("Node " + nodeId + " in document " + document.getFileURI() + " is not an element or attribute node.");
LOG.error("Type = " + storedNode.getNodeType() + "; name = " + storedNode.getNodeName() + "; value = " + storedNode.getNodeValue());
throw new EXistException("Node " + nodeId + " in document " + document.getURI() + " is not an element or attribute node.");
}
if(!storedNode.getLocalName().equals(nodeName)) {
LOG.error("Node name does not correspond to index entry. Expected " + nodeName + "; found " + storedNode.getLocalName());
//TODO : also throw an exception here ?
}
//TODO : better message (see above) -pb
msg.append(StorageAddress.toString(address)).append(" ");
}
}
}
} catch (EOFException e) {
//TODO : remove this block if unexpected -pb
LOG.warn("REPORT ME " + e.getMessage(), e);
}
LOG.debug(msg.toString());
}
} catch (LockException e) {
LOG.warn("Failed to acquire lock for '" + dbNodes.getFile().getName() + "'", e);
//TODO : throw an exception ? -pb
} catch (BTreeException e) {
LOG.error(e.getMessage(), e);
//TODO : throw an exception ? -pb
} catch (IOException e) {
LOG.error(e.getMessage(), e);
//TODO : throw an exception ? -pb
} catch (TerminatedException e) {
LOG.warn(e.getMessage(), e);
//TODO : throw an exception ? -pb
} finally {
lock.release(Lock.WRITE_LOCK);
}
}
private Value computeKey(int collectionId, QName qname) {
return computeTypedKey(qname.getNameType(), collectionId, qname);
}
private Value computeTypedKey(byte type, int collectionId, QName qname) {
final SymbolTable symbols = broker.getBrokerPool().getSymbols();
short sym = symbols.getSymbol(qname.getLocalName());
//TODO : should we truncate the key ?
//TODO : beware of the polysemy for getPrefix == null
//if (qname.getPrefix() == null)
// return new ElementValue(type, collectionId, sym);
short nsSym = symbols.getNSSymbol(qname.getNamespaceURI());
return new ElementValue(type, collectionId, sym, nsSym);
}
private static boolean containsNode(List list, NodeId nodeId) {
for (int i = 0; i < list.size(); i++) {
if (((NodeProxy) list.get(i)).getNodeId().equals(nodeId))
return true;
}
return false;
}
public void closeAndRemove() {
config.setProperty(getConfigKeyForFile(), null);
//Do not uncomment yet !
//broker.removeContentLoadingObserver(getInstance());
dbNodes.closeAndRemove();
}
public boolean close() throws DBException {
config.setProperty(getConfigKeyForFile(), null);
//Do not uncomment yet !
//broker.removeContentLoadingObserver(getInstance());
return dbNodes.close();
}
public void printStatistics() {
dbNodes.printStatistics();
}
public String toString() {
return this.getClass().getName() + " at "+ dbNodes.getFile().getName() +
" owned by " + broker.toString();
}
}