IndexWorker.java example

Explorer
exist-master
/*
 *  eXist Open Source Native XML Database
 *  Copyright (C) 2001-2015 The eXist Project
 *  http://exist-db.org
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
package org.exist.indexing;

import org.exist.collections.Collection;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.dom.persistent.DocumentSet;
import org.exist.dom.persistent.IStoredNode;
import org.exist.dom.persistent.NodeProxy;
import org.exist.dom.persistent.NodeSet;
import org.exist.indexing.StreamListener.ReindexMode;
import org.exist.storage.DBBroker;
import org.exist.storage.NodePath;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.Occurrences;
import org.exist.xquery.QueryRewriter;
import org.exist.xquery.XQueryContext;
import org.w3c.dom.NodeList;

import java.util.Map;
import org.exist.security.PermissionDeniedException;

/**
 * Provide concurrent access to the index structure. Implements the core operations on the index.
 * The methods in this class are used in a multi-threaded environment. Every thread accessing the
 * database will have exactly one IndexWorker for every index. {@link org.exist.indexing.Index#getWorker(DBBroker)}
 * should thus return a new IndexWorker whenever it is  called. Implementations of IndexWorker have
 * to take care of synchronizing access to shared resources.
 */
public interface IndexWorker {

    /**
     * A key to a QName {@link java.util.List} "hint" to be used when the index scans its index entries
     */	
    public static final String VALUE_COUNT = "value_count";

    /**
     * Returns an ID which uniquely identifies this worker's index.
     * @return a unique name identifying this worker's index.
     */
    public String getIndexId();

    /**
     * Returns a name which uniquely identifies this worker's index.
     * @return a unique name identifying this worker's index.
     */
    public String getIndexName();

    /**
     * Read an index configuration from an collection.xconf configuration document.
     *
     * This method is called by the {@link org.exist.collections.CollectionConfiguration} while
     * reading the collection.xconf configuration file for a given collection. The configNodes
     * parameter lists all top-level child nodes below the <index> element in the
     * collection.xconf. The IndexWorker should scan this list and handle those elements
     * it understands.
     *
     * The returned Object will be stored in the collection configuration structure associated
     * with each collection. It can later be retrieved from the collection configuration, e.g. to
     * check if a given node should be indexed or not.
     *
     * @param configNodes lists the top-level child nodes below the <index> element in collection.xconf
     * @param namespaces the active prefix/namespace map
     * @return an arbitrary configuration object to be kept for this index in the collection configuration
     * @throws DatabaseConfigurationException if a configuration error occurs
     */
    Object configure(IndexController controller, NodeList configNodes,
        Map<String, String> namespaces) throws DatabaseConfigurationException;

    /**
     * Notify this worker to operate on the specified document.
     *
     * @param doc the document which is processed
     */
    void setDocument(DocumentImpl doc);

    /**
     * Notify this worker to operate on the specified document, using the mode
     * given. Mode will be one of {@link ReindexMode#UNKNOWN}, {@link ReindexMode#STORE},
     * {@link ReindexMode#REMOVE_SOME_NODES} or {@link ReindexMode#REMOVE_ALL_NODES}.
     *
     * @param doc the document which is processed
     * @param mode the current operation mode
     */
    void setDocument(DocumentImpl doc, ReindexMode mode);

    /**
     * Notify this worker to operate using the mode
     * given. Mode will be one of {@link ReindexMode#UNKNOWN}, {@link ReindexMode#STORE},
     * {@link ReindexMode#REMOVE_SOME_NODES} or {@link ReindexMode#REMOVE_ALL_NODES}.
     *
     * @param mode the current operation mode
     */
    void setMode(final ReindexMode mode);

    /**
     * Returns the document for the next operation.
     * 
     * @return the document
     */
    DocumentImpl getDocument();

    /**
     * Returns the mode for the next operation.
     * 
     * @return the document
     */
    ReindexMode getMode();

    /**
     * When adding or removing nodes to or from the document tree, it might become
     * necessary to re-index some parts of the tree, in particular if indexes are defined
     * on mixed content nodes. It will then return the top-most root.
     *
     * @param node the node to be modified.
     * @param path path the NodePath of the node
     * @param insert true if a node is being inserted or appended. In this case, the method
     *               will be called with the parent node as first argument. Usually a reindex is
     *               not required unless the index is defined on the parent node or an ancestor of it.
     * @param includeSelf if set to true, the current node itself will be included in the check
     * @return the top-most root node to be reindexed
     */
    <T extends IStoredNode> IStoredNode getReindexRoot(IStoredNode<T> node, NodePath path, boolean insert, boolean includeSelf);

    /**
     * Return a stream listener to index the current document in the current mode.
     * There will never be more than one StreamListener being used per thread, so it is safe
     * for the implementation to reuse a single StreamListener.
     *
     * Parameter mode specifies the type of the current operation.
     *
     * @return a StreamListener
     */
    StreamListener getListener();

    /**
     * Returns a {@link org.exist.indexing.MatchListener}, which can be used to filter
     * (and manipulate) the XML output generated by the serializer when serializing
     * query results. The method should return null if the implementation is not interested
     * in receiving serialization events.
     *
     * @param proxy the NodeProxy which is being serialized
     * @return a MatchListener or null if the implementation does not want to receive
     * serialization events
     */
    MatchListener getMatchListener(DBBroker broker, NodeProxy proxy);

    /**
     * Flush the index. This method will be called when indexing a document. The implementation should
     * immediately process all data it has buffered (if there is any), release as many memory resources
     * as it can and prepare for being reused for a different job.
     */
    void flush();

    /**
     * Remove all indexes for the given collection, its subcollections and
     * all resources..
     *
     * @param collection The collection to remove
     * @param broker The broker that will perform the operation
     */
    void removeCollection(Collection collection, DBBroker broker, boolean reindex) throws PermissionDeniedException;

    /** 
     * Checking index could be delegated to a worker. Use this method to do so.
     * @param broker The broker that will perform the operation
     * @return Whether or not the index if in a suitable state
     */
    boolean checkIndex(DBBroker broker);

    /** 
     * Return <strong>aggregated</strong> (on a document count basis) 
     * index entries for the specified document set. Aggregation can only occur if
     * the index entries can be compared, i.e. if the index implements 
     * {@link org.exist.indexing.OrderedValuesIndex}, otherwise each entry will be considered
     * as a single occurrence.
     * @param context 
     * @param docs The documents to which the index entries belong
     * @param contextSet
     * @param hints Some "hints" for retrieving the index entries. See such hints in
     * {@link org.exist.indexing.OrderedValuesIndex} and {@link org.exist.indexing.QNamedKeysIndex}.
     * @return Occurrences objects that contain :
     * <ol>
     * <li>a <strong>string</strong> representation of the index entry. This may change in the future.</li>
     * <li>the number of occurrences for the index entry over all the documents</li>
     * <li>the list of the documents in which the index entry is</li>
     * </ol> 
     */
    public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map<?,?> hints);

    /**
     * Returns a {@link QueryRewriter} to be called by the query optimizer.
     *
     * @param context the current XQuery context
     * @return the query rewriter or null if the index does no rewriting
     */
    QueryRewriter getQueryRewriter(XQueryContext context);

    //TODO : a scanIndex() method that would return an unaggregated list of index entries ?

}