/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-07 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.indexing;
import org.exist.collections.Collection;
import org.exist.dom.DocumentImpl;
import org.exist.dom.DocumentSet;
import org.exist.dom.NodeProxy;
import org.exist.dom.NodeSet;
import org.exist.dom.StoredNode;
import org.exist.storage.DBBroker;
import org.exist.storage.NodePath;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.Occurrences;
import org.exist.xquery.XQueryContext;
import org.w3c.dom.NodeList;
import java.util.Map;
/**
* Provide concurrent access to the index structure. Implements the core operations on the index.
* The methods in this class are used in a multi-threaded environment. Every thread accessing the
* database will have exactly one IndexWorker for every index. {@link org.exist.indexing.Index#getWorker(DBBroker)}
* should thus return a new IndexWorker whenever it is called. Implementations of IndexWorker have
* to take care of synchronizing access to shared resources.
*/
public interface IndexWorker {
/**
* A key to a QName {@link java.util.List} "hint" to be used when the index scans its index entries
*/
public static final String VALUE_COUNT = "value_count";
/**
* Returns an ID which uniquely identifies this worker's index.
* @return a unique name identifying this worker's index.
*/
public String getIndexId();
/**
* Returns a name which uniquely identifies this worker's index.
* @return a unique name identifying this worker's index.
*/
public String getIndexName();
/**
* Read an index configuration from an collection.xconf configuration document.
*
* This method is called by the {@link org.exist.collections.CollectionConfiguration} while
* reading the collection.xconf configuration file for a given collection. The configNodes
* parameter lists all top-level child nodes below the <index> element in the
* collection.xconf. The IndexWorker should scan this list and handle those elements
* it understands.
*
* The returned Object will be stored in the collection configuration structure associated
* with each collection. It can later be retrieved from the collection configuration, e.g. to
* check if a given node should be indexed or not.
*
* @param configNodes lists the top-level child nodes below the <index> element in collection.xconf
* @param namespaces the active prefix/namespace map
* @return an arbitrary configuration object to be kept for this index in the collection configuration
* @throws DatabaseConfigurationException if a configuration error occurs
*/
Object configure(IndexController controller, NodeList configNodes, Map namespaces) throws DatabaseConfigurationException;
/**
* Notify this worker to operate on the specified document.
*
* @param doc the document which is processed
*/
void setDocument(DocumentImpl doc);
/**
* Notify this worker to operate on the specified document, using the mode
* given. Mode will be one of {@link StreamListener#UNKNOWN}, {@link StreamListener#STORE},
* {@link StreamListener#REMOVE_SOME_NODES} or {@link StreamListener#REMOVE_ALL_NODES}.
*
* @param doc the document which is processed
* @param mode the current operation mode
*/
void setDocument(DocumentImpl doc, int mode);
/**
* Notify this worker to operate using the mode
* given. Mode will be one of {@link StreamListener#UNKNOWN}, {@link StreamListener#STORE},
* {@link StreamListener#REMOVE_SOME_NODES} or {@link StreamListener#REMOVE_ALL_NODES}.
*
* @param mode the current operation mode
*/
void setMode(int mode);
/**
* Returns the document for the next operation.
*
* @return the document
*/
DocumentImpl getDocument();
/**
* Returns the mode for the next operation.
*
* @return the document
*/
int getMode();
/**
* When adding or removing nodes to or from the document tree, it might become
* necessary to reindex some parts of the tree, in particular if indexes are defined
* on mixed content nodes. This method will call
* {@link IndexWorker#getReindexRoot(org.exist.dom.StoredNode, org.exist.storage.NodePath, boolean)}
* on each configured index. It will then return the top-most root.
*
* @param node the node to be modified.
* @param path path the NodePath of the node
* @param includeSelf if set to true, the current node itself will be included in the check
* @return the top-most root node to be reindexed
*/
StoredNode getReindexRoot(StoredNode node, NodePath path, boolean includeSelf);
/**
* Return a stream listener to index the current document in the current mode.
* There will never be more than one StreamListener being used per thread, so it is safe
* for the implementation to reuse a single StreamListener.
*
* Parameter mode specifies the type of the current operation.
*
* @return a StreamListener
*/
StreamListener getListener();
/**
* Returns a {@link org.exist.indexing.MatchListener}, which can be used to filter
* (and manipulate) the XML output generated by the serializer when serializing
* query results. The method should return null if the implementation is not interested
* in receiving serialization events.
*
* @param proxy the NodeProxy which is being serialized
* @return a MatchListener or null if the implementation does not want to receive
* serialization events
*/
MatchListener getMatchListener(DBBroker broker, NodeProxy proxy);
/**
* Flush the index. This method will be called when indexing a document. The implementation should
* immediately process all data it has buffered (if there is any), release as many memory resources
* as it can and prepare for being reused for a different job.
*/
void flush();
/**
* Remove all indexes for the given collection, its subcollections and
* all resources..
*
* @param collection The collection to remove
* @param broker The broker that will perform the operation
*/
void removeCollection(Collection collection, DBBroker broker);
/**
* Checking index could be delegated to a worker. Use this method to do so.
* @param broker The broker that will perform the operation
* @return Whether or not the index if in a suitable state
*/
boolean checkIndex(DBBroker broker);
/**
* Return <strong>aggregated</strong> (on a document count basis)
* index entries for the specified document set. Aggregation can only occur if
* the index entries can be compared, i.e. if the index implements
* {@link org.exist.indexing.OrderedValuesIndex}, otherwise each entry will be considered
* as a single occurence.
* @param context
* @param docs The documents to which the index entries belong
* @param contextSet
* @param hints Some "hints" for retrieving the index entries. See such hints in
* {@link org.exist.indexing.OrderedValuesIndex} and {@link org.exist.indexing.QNamedKeysIndex}.
* @return Occurrences objects that contain :
* <ol>
* <li>a <strong>string</strong> representation of the index entry. This may change in the future.</li>
* <li>the number of occurrences for the index entry over all the documents</li>
* <li>the list of the documents in which the index entry is</li>
* </ol>
*/
public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map hints);
//TODO : a scanIndex() method that would return an unaggregated list of index entries ?
}