SearchIndex.java example

Explorer
jackrabbit-master
- jackrabbit-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import javax.jcr.query.InvalidQueryException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.jackrabbit.core.HierarchyManager;
import org.apache.jackrabbit.core.SessionImpl;
import org.apache.jackrabbit.core.cluster.ChangeLogRecord;
import org.apache.jackrabbit.core.cluster.ClusterNode;
import org.apache.jackrabbit.core.cluster.ClusterRecord;
import org.apache.jackrabbit.core.cluster.ClusterRecordDeserializer;
import org.apache.jackrabbit.core.cluster.ClusterRecordProcessor;
import org.apache.jackrabbit.core.cluster.LockRecord;
import org.apache.jackrabbit.core.cluster.NamespaceRecord;
import org.apache.jackrabbit.core.cluster.NodeTypeRecord;
import org.apache.jackrabbit.core.cluster.PrivilegeRecord;
import org.apache.jackrabbit.core.cluster.WorkspaceRecord;
import org.apache.jackrabbit.core.fs.FileSystem;
import org.apache.jackrabbit.core.fs.FileSystemException;
import org.apache.jackrabbit.core.fs.FileSystemResource;
import org.apache.jackrabbit.core.fs.local.LocalFileSystem;
import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.core.journal.Journal;
import org.apache.jackrabbit.core.journal.JournalException;
import org.apache.jackrabbit.core.journal.Record;
import org.apache.jackrabbit.core.journal.RecordIterator;
import org.apache.jackrabbit.core.query.AbstractQueryHandler;
import org.apache.jackrabbit.core.query.ExecutableQuery;
import org.apache.jackrabbit.core.query.QueryHandler;
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.core.query.lucene.directory.DirectoryManager;
import org.apache.jackrabbit.core.query.lucene.directory.FSDirectoryManager;
import org.apache.jackrabbit.core.query.lucene.hits.AbstractHitCollector;
import org.apache.jackrabbit.core.session.SessionContext;
import org.apache.jackrabbit.core.state.ItemState;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.Path;
import org.apache.jackrabbit.spi.PathFactory;
import org.apache.jackrabbit.spi.commons.name.NameConstants;
import org.apache.jackrabbit.spi.commons.name.PathFactoryImpl;
import org.apache.jackrabbit.spi.commons.query.DefaultQueryNodeFactory;
import org.apache.jackrabbit.spi.commons.query.qom.OrderingImpl;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.fork.ForkParser;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;

/**
 * Implements a {@link org.apache.jackrabbit.core.query.QueryHandler} using
 * Lucene.
 */
public class SearchIndex extends AbstractQueryHandler {

    /**
     * Valid node type names under /jcr:system. Used to determine if a
     * query needs to be executed also against the /jcr:system tree.
     */
    public static final Collection<Name> VALID_SYSTEM_INDEX_NODE_TYPE_NAMES =
        Collections.unmodifiableCollection(Arrays.asList(
                NameConstants.NT_CHILDNODEDEFINITION,
                NameConstants.NT_FROZENNODE,
                NameConstants.NT_NODETYPE,
                NameConstants.NT_PROPERTYDEFINITION,
                NameConstants.NT_VERSION,
                NameConstants.NT_VERSIONEDCHILD,
                NameConstants.NT_VERSIONHISTORY,
                NameConstants.NT_VERSIONLABELS,
                NameConstants.REP_NODETYPES,
                NameConstants.REP_SYSTEM,
                NameConstants.REP_VERSIONSTORAGE,
                // Supertypes
                NameConstants.NT_BASE,
                NameConstants.MIX_REFERENCEABLE));
        
    /**
     * Default query node factory.
     */
    private static final DefaultQueryNodeFactory DEFAULT_QUERY_NODE_FACTORY =
        new DefaultQueryNodeFactory(VALID_SYSTEM_INDEX_NODE_TYPE_NAMES);

    /** The logger instance for this class */
    private static final Logger log = LoggerFactory.getLogger(SearchIndex.class);

    /**
     * Name of the file to persist search internal namespace mappings.
     */
    private static final String NS_MAPPING_FILE = "ns_mappings.properties";

    /**
     * The default value for property {@link #minMergeDocs}.
     */
    public static final int DEFAULT_MIN_MERGE_DOCS = 100;

    /**
     * The default value for property {@link #maxMergeDocs}.
     */
    public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;

    /**
     * the default value for property {@link #mergeFactor}.
     */
    public static final int DEFAULT_MERGE_FACTOR = 10;

    /**
     * the default value for property {@link #maxFieldLength}.
     */
    public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;

    /**
     * The default value for property {@link #extractorPoolSize}.
     * @deprecated this value is not used anymore. Instead the default value
     * is calculated as follows: 2 * Runtime.getRuntime().availableProcessors().
     */
    public static final int DEFAULT_EXTRACTOR_POOL_SIZE = 0;

    /**
     * The default value for property {@link #extractorBackLog}.
     */
    public static final int DEFAULT_EXTRACTOR_BACK_LOG = Integer.MAX_VALUE;

    /**
     * The default timeout in milliseconds which is granted to the text
     * extraction process until fulltext indexing is deferred to a background
     * thread.
     */
    public static final long DEFAULT_EXTRACTOR_TIMEOUT = 100;

    /**
     * The default value for {@link #termInfosIndexDivisor}.
     */
    public static final int DEFAULT_TERM_INFOS_INDEX_DIVISOR = 1;

    /**
     * The path factory.
     */
    protected static final PathFactory PATH_FACTORY = PathFactoryImpl.getInstance();

    /**
     * The path of the root node.
     */
    protected static final Path ROOT_PATH;

    /**
     * The path <code>/jcr:system</code>.
     */
    protected static final Path JCR_SYSTEM_PATH;

    static {
        ROOT_PATH = PATH_FACTORY.create(NameConstants.ROOT);
        try {
            JCR_SYSTEM_PATH = PATH_FACTORY.create(ROOT_PATH, NameConstants.JCR_SYSTEM, false);
        } catch (RepositoryException e) {
            // should never happen, path is always valid
            throw new InternalError(e.getMessage());
        }
    }

    /**
     * The actual index
     */
    protected MultiIndex index;

    /**
     * The analyzer we use for indexing.
     */
    private final JackrabbitAnalyzer analyzer = new JackrabbitAnalyzer();

    /**
     * Path of the Tika configuration file used for text extraction.
     */
    private String tikaConfigPath = null;

    /**
     * Java command used to fork external parser processes,
     * or <code>null</code> (the default) for in-process text extraction.
     */
    private String forkJavaCommand = null;

    /**
     * The Tika parser for extracting text content from binary properties.
     * Initialized by the {@link #getParser()} method during first access.
     */
    private Parser parser = null;

    /**
     * The namespace mappings used internally.
     */
    private NamespaceMappings nsMappings;

    /**
     * The location of the search index.
     * <p>
     * Note: This is a <b>mandatory</b> parameter!
     */
    private String path;

    /**
     * minMergeDocs config parameter.
     */
    private int minMergeDocs = DEFAULT_MIN_MERGE_DOCS;

    /**
     * The maximum volatile index size in bytes until it is written to disk.
     * The default value is 1048576 (1MB).
     */
    private long maxVolatileIndexSize = 1024 * 1024;

    /**
     * volatileIdleTime config parameter.
     */
    private int volatileIdleTime = 3;

    /**
     * The maximum age (in seconds) of the index history. The default value is
     * zero. Which means, index commits are deleted as soon as they are not used
     * anymore.
     */
    private long maxHistoryAge = 0;

    /**
     * maxMergeDocs config parameter
     */
    private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;

    /**
     * mergeFactor config parameter
     */
    private int mergeFactor = DEFAULT_MERGE_FACTOR;

    /**
     * maxFieldLength config parameter
     */
    private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;

    /**
     * maxExtractLength config parameter. Positive values are used as-is,
     * negative values are interpreted as factors of the maxFieldLength
     * parameter.
     */
    private int maxExtractLength = -10;

    /**
     * extractorPoolSize config parameter
     */
    private int extractorPoolSize = 2 * Runtime.getRuntime().availableProcessors();

    /**
     * extractorBackLog config parameter
     */
    private int extractorBackLog = DEFAULT_EXTRACTOR_BACK_LOG;

    /**
     * extractorTimeout config parameter
     */
    private long extractorTimeout = DEFAULT_EXTRACTOR_TIMEOUT;

    /**
     * Number of documents that are buffered before they are added to the index.
     */
    private int bufferSize = 10;

    /**
     * Compound file flag
     */
    private boolean useCompoundFile = true;

    /**
     * Flag indicating whether document order is enabled as the default
     * ordering.
     * <p>
     * Default value is: <code>false</code>.
     */
    private boolean documentOrder = false;

    /**
     * If set <code>true</code> the index is checked for consistency on startup.
     * If <code>false</code> a consistency check is only performed when there
     * are entries in the redo log on startup.
     * <p>
     * Default value is: <code>false</code>.
     */
    private boolean forceConsistencyCheck = false;

    /**
     * If set <code>true</code> the index is checked for consistency depending
     * on the {@link #forceConsistencyCheck} parameter. If set to
     * <code>false</code>, no consistency check is performed, even if the redo
     * log had been applied on startup.
     * <p>
     * Default value is: <code>false</code>.
     */
    private boolean consistencyCheckEnabled = false;

    /**
     * If set <code>true</code> errors detected by the consistency check are
     * repaired. If <code>false</code> the errors are only reported in the log.
     * <p>
     * Default value is: <code>true</code>.
     */
    private boolean autoRepair = true;

    /**
     * The id resolver cache size.
     * <p>
     * Default value is: <code>1000</code>.
     */
    private int cacheSize = 1000;

    /**
     * The number of documents that are pre fetched when a query is executed.
     * <p>
     * Default value is: {@link Integer#MAX_VALUE}.
     */
    private int resultFetchSize = Integer.MAX_VALUE;

    /**
     * If set to <code>true</code> the fulltext field is stored and and a term
     * vector is created with offset information.
     * <p>
     * Default value is: <code>false</code>.
     */
    private boolean supportHighlighting = false;
    
    /**
     * If enabled, NodeIterator.getSize() may report a larger value than the
     * actual result. This value may shrink when the query result encounters
     * non-existing nodes or the session does not have access to a node. This
     * might be a security problem.
     */
    private boolean sizeEstimate = false;

    /**
     * The excerpt provider class. Implements {@link ExcerptProvider}.
     */
    private Class<?> excerptProviderClass = DefaultHTMLExcerpt.class;

    /**
     * The path to the indexing configuration file (can be an absolute path to a
     * file or a classpath resource).
     */
    private String indexingConfigPath;

    /**
     * The DOM with the indexing configuration or <code>null</code> if there
     * is no such configuration.
     */
    private Element indexingConfiguration;

    /**
     * The indexing configuration.
     */
    private IndexingConfiguration indexingConfig;

    /**
     * The indexing configuration class.
     * Implements {@link IndexingConfiguration}.
     */
    private Class<?> indexingConfigurationClass = IndexingConfigurationImpl.class;

    /**
     * The class that implements {@link SynonymProvider}.
     */
    private Class<?> synonymProviderClass;

    /**
     * The currently set synonym provider.
     */
    private SynonymProvider synProvider;

    /**
     * The configuration path for the synonym provider.
     */
    private String synonymProviderConfigPath;

    /**
     * The FileSystem for the synonym if the query handler context does not
     * provide one.
     */
    private FileSystem synonymProviderConfigFs;

    /**
     * Indicates the index format version which is relevant to a <b>query</b>. This
     * value may be different from what {@link MultiIndex#getIndexFormatVersion()}
     * returns because queries may be executed on two physical indexes with
     * different formats. Index format versions are considered backward
     * compatible. That is, the lower version of the two physical indexes is
     * used for querying.
     */
    private IndexFormatVersion indexFormatVersion;

    /**
     * The class that implements {@link SpellChecker}.
     */
    private Class<?> spellCheckerClass;

    /**
     * The spell checker for this query handler or <code>null</code> if none is
     * configured.
     */
    private SpellChecker spellChecker;

    /**
     * The similarity in use for indexing and searching.
     */
    private Similarity similarity = Similarity.getDefault();

    /**
     * The name of the directory manager class implementation.
     */
    private String directoryManagerClass = FSDirectoryManager.class.getName();

    /**
     * The directory manager.
     */
    private DirectoryManager directoryManager;

    /**
     * Flag that indicates whether the {@link DirectoryManager} should
     * use the <code>SimpleFSDirectory</code> instead of letting Lucene
     * automatically pick an implementation based on the platform we are
     * running on. Note: see JCR-3818 for a discussion on the trade-off.
     */
    private boolean useSimpleFSDirectory = true;

    /**
     * The termInfosIndexDivisor.
     */
    private int termInfosIndexDivisor = DEFAULT_TERM_INFOS_INDEX_DIVISOR;

    /**
     * The field comparator source for indexed properties.
     */
    private SharedFieldComparatorSource scs;

    /**
     * Flag that indicates whether the hierarchy cache should be initialized
     * immediately on startup.
     */
    private boolean initializeHierarchyCache = true;

    /**
     * The name of the redo log factory class implementation.
     */
    private String redoLogFactoryClass = DefaultRedoLogFactory.class.getName();

    /**
     * The redo log factory.
     */
    private RedoLogFactory redoLogFactory;

    /**
     * Indicates if this <code>SearchIndex</code> is closed and cannot be used
     * anymore.
     */
    private boolean closed = false;

    /**
     * Initializes this <code>QueryHandler</code>. This implementation requires
     * that a path parameter is set in the configuration. If this condition
     * is not met, a <code>IOException</code> is thrown.
     *
     * @throws IOException if an error occurs while initializing this handler.
     */
    protected void doInit() throws IOException {
        QueryHandlerContext context = getContext();
        if (path == null) {
            throw new IOException("SearchIndex requires 'path' parameter in configuration!");
        }

        Set<NodeId> excludedIDs = new HashSet<NodeId>();
        if (context.getExcludedNodeId() != null) {
            excludedIDs.add(context.getExcludedNodeId());
        }

        synProvider = createSynonymProvider();
        directoryManager = createDirectoryManager();
        redoLogFactory = createRedoLogFactory();

        if (context.getParentHandler() instanceof SearchIndex) {
            // use system namespace mappings
            SearchIndex sysIndex = (SearchIndex) context.getParentHandler();
            nsMappings = sysIndex.getNamespaceMappings();
        } else {
            // read local namespace mappings
            File mapFile = new File(new File(path), NS_MAPPING_FILE);
            if (mapFile.exists()) {
                // be backward compatible and use ns_mappings.properties from
                // index folder
                nsMappings = new FileBasedNamespaceMappings(mapFile);
            } else {
                // otherwise use repository wide stable index prefix from
                // namespace registry
                nsMappings = new NSRegistryBasedNamespaceMappings(
                        context.getNamespaceRegistry());
            }
        }

        scs = new SharedFieldComparatorSource(
                FieldNames.PROPERTIES, context.getItemStateManager(),
                context.getHierarchyManager(), nsMappings);
        indexingConfig = createIndexingConfiguration(nsMappings);
        analyzer.setIndexingConfig(indexingConfig);

        // initialize the Tika parser
        parser = createParser();

        index = new MultiIndex(this, excludedIDs);
        if (index.numDocs() == 0) {
            Path rootPath;
            if (excludedIDs.isEmpty()) {
                // this is the index for jcr:system
                rootPath = JCR_SYSTEM_PATH;
            } else {
                rootPath = ROOT_PATH;
            }
            index.createInitialIndex(context.getItemStateManager(),
                    context.getRootId(), rootPath);
            checkPendingJournalChanges(context);
        }
        if (consistencyCheckEnabled
                && (index.getRedoLogApplied() || forceConsistencyCheck)) {
            log.info("Running consistency check...");
            try {
                ConsistencyCheck check = runConsistencyCheck();
                if (autoRepair) {
                    check.repair(true);
                } else {
                    List<ConsistencyCheckError> errors = check.getErrors();
                    if (errors.size() == 0) {
                        log.info("No errors detected.");
                    }
                    for (ConsistencyCheckError err : errors) {
                        log.info(err.toString());
                    }
                }
            } catch (Exception e) {
                log.warn("Failed to run consistency check on index: " + e);
            }
        }

        // initialize spell checker
        spellChecker = createSpellChecker();

        log.info("Index initialized: {} Version: {}",
                new Object[]{path, index.getIndexFormatVersion()});
        if (!index.getIndexFormatVersion().equals(getIndexFormatVersion())) {
            log.warn("Using Version {} for reading. Please re-index version " +
                    "storage for optimal performance.",
                    getIndexFormatVersion().getVersion());
        }
    }

    /**
     * Adds the <code>node</code> to the search index.
     * @param node the node to add.
     * @throws RepositoryException if an error occurs while indexing the node.
     * @throws IOException if an error occurs while adding the node to the index.
     */
    public void addNode(NodeState node) throws RepositoryException, IOException {
        throw new UnsupportedOperationException("addNode");
    }

    /**
     * Removes the node with <code>id</code> from the search index.
     * @param id the id of the node to remove from the index.
     * @throws IOException if an error occurs while removing the node from
     * the index.
     */
    public void deleteNode(NodeId id) throws IOException {
        throw new UnsupportedOperationException("deleteNode");
    }

    /**
     * This implementation forwards the call to
     * {@link MultiIndex#update(Collection, Collection)} and
     * transforms the two iterators to the required types.
     *
     * @param remove ids of nodes to remove.
     * @param add    NodeStates to add. Calls to <code>next()</code> on this
     *               iterator may return <code>null</code>, to indicate that a
     *               node could not be indexed successfully.
     * @throws RepositoryException if an error occurs while indexing a node.
     * @throws IOException         if an error occurs while updating the index.
     */
    public void updateNodes(Iterator<NodeId> remove, Iterator<NodeState> add)
            throws RepositoryException, IOException {
        checkOpen();

        Map<NodeId, NodeState> aggregateRoots = new HashMap<NodeId, NodeState>();
        Set<NodeId> removedIds = new HashSet<NodeId>();
        Set<NodeId> addedIds = new HashSet<NodeId>();

        Collection<NodeId> removeCollection = new ArrayList<NodeId>();
        while (remove.hasNext()) {
            NodeId id = remove.next();
            removeCollection.add(id);
            removedIds.add(id);
        }

        Collection<Document> addCollection = new ArrayList<Document>();
        while (add.hasNext()) {
            NodeState state = add.next();
            if (state != null) {
                NodeId id = state.getNodeId();
                addedIds.add(id);
                retrieveAggregateRoot(state, aggregateRoots);

                try {
                    addCollection.add(createDocument(
                            state, getNamespaceMappings(),
                            index.getIndexFormatVersion()));
                } catch (RepositoryException e) {
                    log.warn("Exception while creating document for node: "
                            + state.getNodeId() + ": " + e.toString());
                }
            }
        }

        index.update(removeCollection, addCollection);

        // remove any aggregateRoot nodes that are new
        // and therefore already up-to-date
        aggregateRoots.keySet().removeAll(addedIds);

        // based on removed ids get affected aggregate root nodes
        retrieveAggregateRoot(removedIds, aggregateRoots);

        // update aggregates if there are any affected
        if (!aggregateRoots.isEmpty()) {
            Collection<Document> modified =
                new ArrayList<Document>(aggregateRoots.size());

            for (NodeState state : aggregateRoots.values()) {
                try {
                    modified.add(createDocument(
                            state, getNamespaceMappings(),
                            index.getIndexFormatVersion()));
                } catch (RepositoryException e) {
                    log.warn("Exception while creating document for node: "
                            + state.getNodeId(), e);
                }
            }

            index.update(aggregateRoots.keySet(), modified);
        }
    }

    /**
     * Creates a new query by specifying the query statement itself and the
     * language in which the query is stated.  If the query statement is
     * syntactically invalid, given the language specified, an
     * InvalidQueryException is thrown. <code>language</code> must specify a query language
     * string from among those returned by QueryManager.getSupportedQueryLanguages(); if it is not
     * then an <code>InvalidQueryException</code> is thrown.
     *
     * @param sessionContext component context of the current session
     * @param statement the query statement.
     * @param language the syntax of the query statement.
     * @throws InvalidQueryException if statement is invalid or language is unsupported.
     * @return A <code>Query</code> object.
     */
    public ExecutableQuery createExecutableQuery(
            SessionContext sessionContext, String statement, String language)
            throws InvalidQueryException {
        QueryImpl query = new QueryImpl(
                sessionContext, this, getContext().getPropertyTypeRegistry(),
                statement, language, getQueryNodeFactory());
        query.setRespectDocumentOrder(documentOrder);
        return query;
    }

    /**
     * {@inheritDoc}
     */
    public Iterable<NodeId> getWeaklyReferringNodes(NodeId id)
            throws RepositoryException, IOException {
        final List<Integer> docs = new ArrayList<Integer>();
        final List<NodeId> ids = new ArrayList<NodeId>();
        final IndexReader reader = getIndexReader();
        try {
            IndexSearcher searcher = new IndexSearcher(reader);
            try {
                Query q = new TermQuery(new Term(
                        FieldNames.WEAK_REFS, id.toString()));
                searcher.search(q, new AbstractHitCollector() {
                    @Override
                    public void collect(int doc, float score) {
                        docs.add(doc);
                    }
                });
            } finally {
                searcher.close();
            }
            for (Integer doc : docs) {
                Document d = reader.document(doc, FieldSelectors.UUID);
                ids.add(new NodeId(d.get(FieldNames.UUID)));
            }
        } finally {
            Util.closeOrRelease(reader);
        }
        return ids;
    }

    List<Document> getNodeDocuments(NodeId id) throws RepositoryException, IOException {
        final List<Integer> docIds = new ArrayList<Integer>(1);
        final List<Document> docs = new ArrayList<Document>();
        final IndexReader reader = getIndexReader();
        try {
            IndexSearcher searcher = new IndexSearcher(reader);
            try {
                Query q = new TermQuery(new Term(FieldNames.UUID, id.toString()));
                searcher.search(q, new AbstractHitCollector() {
                    @Override
                    protected void collect(final int doc, final float score) {
                        docIds.add(doc);
                    }
                });
                for (Integer docId : docIds) {
                    docs.add(reader.document(docId, FieldSelectors.UUID_AND_PARENT));
                }
            } finally {
                searcher.close();
            }
        } finally {
            Util.closeOrRelease(reader);
        }
        return docs;
    }

    /**
     * This method returns the QueryNodeFactory used to parse Queries. This method
     * may be overridden to provide a customized QueryNodeFactory
     *
     * @return the query node factory.
     */
    protected DefaultQueryNodeFactory getQueryNodeFactory() {
        return DEFAULT_QUERY_NODE_FACTORY;
    }

    /**
     * Waits until all pending text extraction tasks have been processed
     * and the updated index has been flushed to disk.
     *
     * @throws RepositoryException if the index update can not be written
     */
    public void flush() throws RepositoryException {
        try {
            index.waitUntilIndexingQueueIsEmpty();
            index.safeFlush();
            // flush may have pushed nodes into the indexing queue
            // -> wait again
            index.waitUntilIndexingQueueIsEmpty();
        } catch (IOException e) {
            throw new RepositoryException("Failed to flush the index", e);
        }
    }

    /**
     * Closes this <code>QueryHandler</code> and frees resources attached
     * to this handler.
     */
    public void close() throws IOException {
        if (synonymProviderConfigFs != null) {
            try {
                synonymProviderConfigFs.close();
            } catch (FileSystemException e) {
                log.warn("Exception while closing FileSystem", e);
            }
        }
        if (spellChecker != null) {
            spellChecker.close();
        }
        index.close();
        getContext().destroy();
        super.close();
        closed = true;
        log.info("Index closed: " + path);
    }

    /**
     * Executes the query on the search index.
     *
     * @param session         the session that executes the query.
     * @param queryImpl       the query impl.
     * @param query           the lucene query.
     * @param orderProps      name of the properties for sort order.
     * @param orderSpecs      the order specs for the sort order properties.
     *                        <code>true</code> indicates ascending order,
     *                        <code>false</code> indicates descending.
     * @param orderFuncs      functions for the properties for sort order. 
     * @param resultFetchHint a hint on how many results should be fetched.  @return the query hits.
     * @throws IOException if an error occurs while searching the index.
     */
    public MultiColumnQueryHits executeQuery(SessionImpl session,
                                             AbstractQueryImpl queryImpl,
                                             Query query,
                                             Path[] orderProps,
                                             boolean[] orderSpecs,
                                             String[] orderFuncs, long resultFetchHint)
            throws IOException {
        checkOpen();

        Sort sort = new Sort(createSortFields(orderProps, orderSpecs, orderFuncs));

        final IndexReader reader = getIndexReader(queryImpl.needsSystemTree());
        JackrabbitIndexSearcher searcher = new JackrabbitIndexSearcher(
                session, reader, getContext().getItemStateManager());
        searcher.setSimilarity(getSimilarity());
        return new FilterMultiColumnQueryHits(
                searcher.execute(query, sort, resultFetchHint,
                        QueryImpl.DEFAULT_SELECTOR_NAME)) {
            public void close() throws IOException {
                try {
                    super.close();
                } finally {
                    Util.closeOrRelease(reader);
                }
            }
        };
    }

    /**
     * Executes the query on the search index.
     *
     * @param session         the session that executes the query.
     * @param query           the query.
     * @param orderings       the order specs for the sort order.
     * @param resultFetchHint a hint on how many results should be fetched.
     * @return the query hits.
     * @throws IOException if an error occurs while searching the index.
     */
    public MultiColumnQueryHits executeQuery(SessionImpl session,
                                             MultiColumnQuery query,
                                             Ordering[] orderings,
                                             long resultFetchHint)
            throws IOException {
        checkOpen();

        final IndexReader reader = getIndexReader();
        JackrabbitIndexSearcher searcher = new JackrabbitIndexSearcher(
                session, reader, getContext().getItemStateManager());
        searcher.setSimilarity(getSimilarity());
        return new FilterMultiColumnQueryHits(
                query.execute(searcher, orderings, resultFetchHint)) {
            public void close() throws IOException {
                try {
                    super.close();
                } finally {
                    Util.closeOrRelease(reader);
                }
            }
        };
    }

    /**
     * Creates an excerpt provider for the given <code>query</code>.
     *
     * @param query the query.
     * @return an excerpt provider for the given <code>query</code>.
     * @throws IOException if the provider cannot be created.
     */
    public ExcerptProvider createExcerptProvider(Query query)
            throws IOException {
        ExcerptProvider ep;
        try {
            ep = (ExcerptProvider) excerptProviderClass.newInstance();
        } catch (Exception e) {
            throw Util.createIOException(e);
        }
        ep.init(query, this);
        return ep;
    }

    /**
     * Returns the analyzer in use for indexing.
     * @return the analyzer in use for indexing.
     */
    public Analyzer getTextAnalyzer() {
        return new LimitTokenCountAnalyzer(analyzer, getMaxFieldLength());
    }

    /**
     * Returns the path of the Tika configuration used for text extraction.
     *
     * @return path of the Tika configuration file
     */
    public String getTikaConfigPath() {
        return tikaConfigPath;
    }

    /**
     * Sets the path of the Tika configuration used for text extraction.
     * The path can be either a file system or a class resource path.
     * The default setting is the tika-config.xml class resource relative
     * to org.apache.core.query.lucene.
     *
     * @param tikaConfigPath path of the Tika configuration file
     */
    public void setTikaConfigPath(String tikaConfigPath) {
        this.tikaConfigPath = tikaConfigPath;
    }

    /**
     * Returns the java command used to fork external parser processes,
     * or <code>null</code> (the default) for in-process text extraction.
     *
     * @return fork java command
     */
    public String getForkJavaCommand() {
        return forkJavaCommand;
    }

    /**
     * Sets the java command used to fork external parser processes.
     *
     * @param command fork java command,
     *                or <code>null</code> for in-process extraction
     */
    public void setForkJavaCommand(String command) {
        this.forkJavaCommand = command;
    }

    /**
     * Returns the parser used for extracting text content
     * from binary properties for full text indexing.
     *
     * @return the configured parser
     */
    public Parser getParser() {
        return parser;
    }

    private Parser createParser() {
        URL url = null;
        if (tikaConfigPath != null) {
            File file = new File(tikaConfigPath);
            if (file.exists()) {
                try {
                    url = file.toURI().toURL();
                } catch (MalformedURLException e) {
                    log.warn("Invalid Tika configuration path: " + file, e);
                }
            } else {
                ClassLoader loader = SearchIndex.class.getClassLoader();
                url = loader.getResource(tikaConfigPath);
            }
        }
        if (url == null) {
            url = SearchIndex.class.getResource("tika-config.xml");
        }

        TikaConfig config = null;
        if (url != null) {
            try {
                config = new TikaConfig(url);
            } catch (Exception e) {
                log.warn("Tika configuration not available: " + url, e);
            }
        }
        if (config == null) {
            config = TikaConfig.getDefaultConfig();
        }

        if (forkJavaCommand != null) {
            ForkParser forkParser = new ForkParser(
                    SearchIndex.class.getClassLoader(),
                    new AutoDetectParser(config));
            forkParser.setJavaCommand(forkJavaCommand);
            forkParser.setPoolSize(extractorPoolSize);
            return forkParser;
        } else {
            return new AutoDetectParser(config);
        }
    }

    /**
     * Returns the namespace mappings for the internal representation.
     * @return the namespace mappings for the internal representation.
     */
    public NamespaceMappings getNamespaceMappings() {
        return nsMappings;
    }

    /**
     * @return the indexing configuration or <code>null</code> if there is
     *         none.
     */
    public IndexingConfiguration getIndexingConfig() {
        return indexingConfig;
    }

    /**
     * @return the synonym provider of this search index. If none is set for
     *         this search index the synonym provider of the parent handler is
     *         returned if there is any.
     */
    public SynonymProvider getSynonymProvider() {
        if (synProvider != null) {
            return synProvider;
        } else {
            QueryHandler handler = getContext().getParentHandler();
            if (handler instanceof SearchIndex) {
                return ((SearchIndex) handler).getSynonymProvider();
            } else {
                return null;
            }
        }
    }

    /**
     * @return the spell checker of this search index. If none is configured
     *         this method returns <code>null</code>.
     */
    public SpellChecker getSpellChecker() {
        return spellChecker;
    }

    /**
     * @return the similarity, which should be used for indexing and searching.
     */
    public Similarity getSimilarity() {
        return similarity;
    }

    /**
     * Returns an index reader for this search index. The caller of this method
     * is responsible for closing the index reader when he is finished using
     * it.
     *
     * @return an index reader for this search index.
     * @throws IOException the index reader cannot be obtained.
     */
    public IndexReader getIndexReader() throws IOException {
        return getIndexReader(true);
    }

    /**
     * Returns the index format version that this search index is able to
     * support when a query is executed on this index.
     *
     * @return the index format version for this search index.
     */
    public IndexFormatVersion getIndexFormatVersion() {
        if (indexFormatVersion == null) {
            if (getContext().getParentHandler() instanceof SearchIndex) {
                SearchIndex parent = (SearchIndex) getContext().getParentHandler();
                if (parent.getIndexFormatVersion().getVersion()
                        < index.getIndexFormatVersion().getVersion()) {
                    indexFormatVersion = parent.getIndexFormatVersion();
                } else {
                    indexFormatVersion = index.getIndexFormatVersion();
                }
            } else {
                indexFormatVersion = index.getIndexFormatVersion();
            }
        }
        return indexFormatVersion;
    }

    /**
     * @return the directory manager for this search index.
     */
    public DirectoryManager getDirectoryManager() {
        return directoryManager;
    }

    /**
     * @return the redo log factory for this search index.
     */
    public RedoLogFactory getRedoLogFactory() {
        return redoLogFactory;
    }

    /**
     * Runs a consistency check on this search index.
     *
     * @return the result of the consistency check.
     * @throws IOException if an error occurs while running the check.
     */
    public ConsistencyCheck runConsistencyCheck() throws IOException {
        return index.runConsistencyCheck();
    }

    /**
     * Returns an index reader for this search index. The caller of this method
     * is responsible for closing the index reader when he is finished using
     * it.
     *
     * @param includeSystemIndex if <code>true</code> the index reader will
     *                           cover the complete workspace. If
     *                           <code>false</code> the returned index reader
     *                           will not contains any nodes under /jcr:system.
     * @return an index reader for this search index.
     * @throws IOException the index reader cannot be obtained.
     */
    protected IndexReader getIndexReader(boolean includeSystemIndex)
            throws IOException {
        QueryHandler parentHandler = getContext().getParentHandler();
        CachingMultiIndexReader parentReader = null;
        if (parentHandler instanceof SearchIndex && includeSystemIndex) {
            parentReader = ((SearchIndex) parentHandler).index.getIndexReader();
        }

        IndexReader reader;
        if (parentReader != null) {
            CachingMultiIndexReader[] readers = {index.getIndexReader(), parentReader};
            reader = new CombinedIndexReader(readers);
        } else {
            reader = index.getIndexReader();
        }
        return new JackrabbitIndexReader(reader);
    }

    /**
     * Creates the SortFields for the order properties.
     *
     * @param orderProps the order properties.
     * @param orderSpecs the order specs for the properties.
     * @param orderFuncs the functions for the properties. 
     * @return an array of sort fields
     */
    protected SortField[] createSortFields(Path[] orderProps,
                                           boolean[] orderSpecs, String[] orderFuncs) {
        List<SortField> sortFields = new ArrayList<SortField>();
        for (int i = 0; i < orderProps.length; i++) {
            if (orderProps[i].getLength() == 1
                    && NameConstants.JCR_SCORE.equals(orderProps[i].getName())) {
                // order on jcr:score does not use the natural order as
                // implemented in lucene. score ascending in lucene means that
                // higher scores are first. JCR specs that lower score values
                // are first.
                sortFields.add(new SortField(null, SortField.SCORE, orderSpecs[i]));
            } else {
                if ("upper-case".equals(orderFuncs[i])) {
                    sortFields.add(new SortField(orderProps[i].getString(), new UpperCaseSortComparator(scs), !orderSpecs[i]));
                } else if ("lower-case".equals(orderFuncs[i])) {
                    sortFields.add(new SortField(orderProps[i].getString(), new LowerCaseSortComparator(scs), !orderSpecs[i]));
                } else if ("normalize".equals(orderFuncs[i])) {
                    sortFields.add(new SortField(orderProps[i].getString(), new NormalizeSortComparator(scs), !orderSpecs[i]));
                } else {
                    sortFields.add(new SortField(orderProps[i].getString(), scs, !orderSpecs[i]));
                }
            }
        }
        return sortFields.toArray(new SortField[sortFields.size()]);
    }

    /**
     * Creates internal orderings for the QOM ordering specifications.
     *
     * @param orderings the QOM ordering specifications.
     * @return the internal orderings.
     * @throws RepositoryException if an error occurs.
     */
    protected Ordering[] createOrderings(OrderingImpl[] orderings)
            throws RepositoryException {
        Ordering[] ords = new Ordering[orderings.length];
        for (int i = 0; i < orderings.length; i++) {
            ords[i] = Ordering.fromQOM(orderings[i], scs, nsMappings);
        }
        return ords;
    }

    /**
     * Creates a lucene <code>Document</code> for a node state using the
     * namespace mappings <code>nsMappings</code>.
     *
     * @param node               the node state to index.
     * @param nsMappings         the namespace mappings of the search index.
     * @param indexFormatVersion the index format version that should be used to
     *                           index the passed node state.
     * @return a lucene <code>Document</code> that contains all properties of
     *         <code>node</code>.
     * @throws RepositoryException if an error occurs while indexing the
     *                             <code>node</code>.
     */
    protected Document createDocument(NodeState node,
                                      NamespaceMappings nsMappings,
                                      IndexFormatVersion indexFormatVersion)
            throws RepositoryException {
        NodeIndexer indexer = new NodeIndexer(
                node, getContext().getItemStateManager(), nsMappings,
                getContext().getExecutor(), parser);
        indexer.setSupportHighlighting(supportHighlighting);
        indexer.setIndexingConfiguration(indexingConfig);
        indexer.setIndexFormatVersion(indexFormatVersion);
        indexer.setMaxExtractLength(getMaxExtractLength());
        Document doc = indexer.createDoc();
        mergeAggregatedNodeIndexes(node, doc, indexFormatVersion);
        return doc;
    }

    /**
     * Returns the actual index.
     *
     * @return the actual index.
     */
    protected MultiIndex getIndex() {
        return index;
    }

    /**
     * @return the field comparator source for this index.
     */
    protected SharedFieldComparatorSource getSortComparatorSource() {
        return scs;
    }

    /**
     * @param namespaceMappings The namespace mappings
     * @return the fulltext indexing configuration or <code>null</code> if there
     *         is no configuration.
     */
    protected IndexingConfiguration createIndexingConfiguration(NamespaceMappings namespaceMappings) {
        Element docElement = getIndexingConfigurationDOM();
        if (docElement == null) {
            return null;
        }
        try {
            IndexingConfiguration idxCfg = (IndexingConfiguration)
                    indexingConfigurationClass.newInstance();
            idxCfg.init(docElement, getContext(), namespaceMappings);
            return idxCfg;
        } catch (Exception e) {
            log.warn("Exception initializing indexing configuration from: "
                    + indexingConfigPath, e);
        }
        log.warn(indexingConfigPath + " ignored.");
        return null;
    }

    /**
     * @return the configured synonym provider or <code>null</code> if none is
     *         configured or an error occurs.
     */
    protected SynonymProvider createSynonymProvider() {
        SynonymProvider sp = null;
        if (synonymProviderClass != null) {
            try {
                sp = (SynonymProvider) synonymProviderClass.newInstance();
                sp.initialize(createSynonymProviderConfigResource());
            } catch (Exception e) {
                log.warn("Exception initializing synonym provider: "
                        + synonymProviderClass, e);
                sp = null;
            }
        }
        return sp;
    }

    /**
     * @return an initialized {@link DirectoryManager}.
     * @throws IOException if the directory manager cannot be instantiated or
     *          an exception occurs while initializing the manager.
     */
    protected DirectoryManager createDirectoryManager()
            throws IOException {
        try {
            Class<?> clazz = Class.forName(directoryManagerClass);
            if (!DirectoryManager.class.isAssignableFrom(clazz)) {
                throw new IOException(directoryManagerClass +
                        " is not a DirectoryManager implementation");
            }
            DirectoryManager df = (DirectoryManager) clazz.newInstance();
            df.init(this);
            return df;
        } catch (IOException e) {
            throw e;
        } catch (Exception e) {
            IOException ex = new IOException();
            ex.initCause(e);
            throw ex;
        }
    }

    /**
     * Creates a redo log factory based on {@link #getRedoLogFactoryClass()}.
     *
     * @return the redo log factory.
     * @throws IOException if an error occurs while creating the factory.
     */
    protected RedoLogFactory createRedoLogFactory() throws IOException {
        try {
            Class<?> clazz = Class.forName(redoLogFactoryClass);
            if (!RedoLogFactory.class.isAssignableFrom(clazz)) {
                throw new IOException(redoLogFactoryClass +
                        " is not a RedoLogFactory implementation");
            }
            return (RedoLogFactory) clazz.newInstance();
        } catch (Exception e) {
            IOException ex = new IOException();
            ex.initCause(e);
            throw ex;
        }
    }

    /**
     * Creates a file system resource to the synonym provider configuration.
     *
     * @return a file system resource or <code>null</code> if no path was
     *         configured.
     * @throws FileSystemException if an exception occurs accessing the file
     *                             system.
     * @throws IOException         if another exception occurs.
     */
    protected FileSystemResource createSynonymProviderConfigResource()
            throws FileSystemException, IOException {
        if (synonymProviderConfigPath != null) {
            FileSystemResource fsr;
            // simple sanity check
            if (synonymProviderConfigPath.endsWith(FileSystem.SEPARATOR)) {
                throw new FileSystemException(
                        "Invalid synonymProviderConfigPath: "
                        + synonymProviderConfigPath);
            }
            if (fs == null) {
                fs = new LocalFileSystem();
                int lastSeparator = synonymProviderConfigPath.lastIndexOf(
                        FileSystem.SEPARATOR_CHAR);
                if (lastSeparator != -1) {
                    File root = new File(path,
                            synonymProviderConfigPath.substring(0, lastSeparator));
                    ((LocalFileSystem) fs).setRoot(root.getCanonicalFile());
                    fs.init();
                    fsr = new FileSystemResource(fs,
                            synonymProviderConfigPath.substring(lastSeparator + 1));
                } else {
                    ((LocalFileSystem) fs).setPath(path);
                    fs.init();
                    fsr = new FileSystemResource(fs, synonymProviderConfigPath);
                }
                synonymProviderConfigFs = fs;
            } else {
                fsr = new FileSystemResource(fs, synonymProviderConfigPath);
            }
            return fsr;
        } else {
            // path not configured
            return null;
        }
    }

    /**
     * Creates a spell checker for this query handler.
     *
     * @return the spell checker or <code>null</code> if none is configured or
     *         an error occurs.
     */
    protected SpellChecker createSpellChecker() {
        SpellChecker spCheck = null;
        if (spellCheckerClass != null) {
            try {
                spCheck = (SpellChecker) spellCheckerClass.newInstance();
                spCheck.init(this);
            } catch (Exception e) {
                log.warn("Exception initializing spell checker: "
                        + spellCheckerClass, e);
            }
        }
        return spCheck;
    }

    /**
     * Returns the document element of the indexing configuration or
     * <code>null</code> if there is no indexing configuration.
     *
     * @return the indexing configuration or <code>null</code> if there is
     *         none.
     */
    protected Element getIndexingConfigurationDOM() {
        if (indexingConfiguration != null) {
            return indexingConfiguration;
        }
        if (indexingConfigPath == null) {
            return null;
        }
        File config = new File(indexingConfigPath);
        InputStream configStream = null;

        if (!config.exists()) {
            // check if it's a classpath resource
            configStream = getClass().getResourceAsStream(indexingConfigPath);

            if (configStream == null) {
                // only warn if not available also in the classpath
                log.warn("File does not exist: " + indexingConfigPath);
                return null;
            }
        } else if (!config.canRead()) {
            log.warn("Cannot read file: " + indexingConfigPath);
            return null;
        }
        try {
            DocumentBuilderFactory factory =
                    DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            builder.setEntityResolver(new IndexingConfigurationEntityResolver());

            if (configStream != null) {
                indexingConfiguration = builder
                    .parse(configStream).getDocumentElement();
            } else {
                indexingConfiguration = builder
                    .parse(config).getDocumentElement();
            }
        } catch (ParserConfigurationException e) {
            log.warn("Unable to create XML parser", e);
        } catch (IOException e) {
            log.warn("Exception parsing " + indexingConfigPath, e);
        } catch (SAXException e) {
            log.warn("Exception parsing " + indexingConfigPath, e);
        } finally {
            if (configStream != null) {
                try {
                    configStream.close();
                } catch (IOException e) {
                    // ignore
                }
            }
        }
        return indexingConfiguration;
    }

    /**
     * Merges the fulltext indexed fields of the aggregated node states into
     * <code>doc</code>.
     *
     * @param state the node state on which <code>doc</code> was created.
     * @param doc the lucene document with index fields from <code>state</code>.
     * @param ifv the current index format version.
     */
    protected void mergeAggregatedNodeIndexes(NodeState state,
                                              Document doc,
                                              IndexFormatVersion ifv) {
        if (indexingConfig != null) {
            AggregateRule[] aggregateRules = indexingConfig.getAggregateRules();
            if (aggregateRules == null) {
                return;
            }
            try {
                ItemStateManager ism = getContext().getItemStateManager();
                for (AggregateRule aggregateRule : aggregateRules) {
                    boolean ruleMatched = false;
                    // node includes
                    NodeState[] aggregates = aggregateRule.getAggregatedNodeStates(state);
                    if (aggregates != null) {
                        ruleMatched = true;
                        for (NodeState aggregate : aggregates) {
                            Document aDoc = createDocument(aggregate, getNamespaceMappings(), ifv);
                            // transfer fields to doc if there are any
                            Fieldable[] fulltextFields = aDoc.getFieldables(FieldNames.FULLTEXT);
                            if (fulltextFields != null) {
                                for (Fieldable fulltextField : fulltextFields) {
                                    doc.add(fulltextField);
                                }
                                doc.add(new Field(
                                        FieldNames.AGGREGATED_NODE_UUID, false,
                                        aggregate.getNodeId().toString(),
                                        Field.Store.NO,
                                        Field.Index.NOT_ANALYZED_NO_NORMS,
                                        Field.TermVector.NO));
                            }
                        }
                        // make sure that fulltext fields are aligned properly
                        // first all stored fields, then remaining
                        Fieldable[] fulltextFields = doc
                                .getFieldables(FieldNames.FULLTEXT);
                        doc.removeFields(FieldNames.FULLTEXT);
                        Arrays.sort(fulltextFields, FIELDS_COMPARATOR_STORED);
                        for (Fieldable f : fulltextFields) {
                            doc.add(f);
                        }
                    }
                    // property includes
                    PropertyState[] propStates = aggregateRule.getAggregatedPropertyStates(state);
                    if (propStates != null) {
                        ruleMatched = true;
                        for (PropertyState propState : propStates) {
                            String namePrefix = FieldNames.createNamedValue(getNamespaceMappings().translateName(propState.getName()), "");
                            NodeState parent = (NodeState) ism.getItemState(propState.getParentId());
                            Document aDoc = createDocument(parent, getNamespaceMappings(), ifv);
                            try {
                                // find the right fields to transfer
                                Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
                                for (Fieldable field : fields) {

                                    // assume properties fields use SingleTokenStream
                                    TokenStream tokenStream = field.tokenStreamValue();
                                    TermAttribute termAttribute = tokenStream.addAttribute(TermAttribute.class);
                                    PayloadAttribute payloadAttribute = tokenStream.addAttribute(PayloadAttribute.class);
                                    tokenStream.incrementToken();
                                    tokenStream.end();
                                    tokenStream.close();

                                    String value = new String(termAttribute.termBuffer(), 0, termAttribute.termLength());
                                    if (value.startsWith(namePrefix)) {
                                        // extract value
                                        String rawValue = value.substring(namePrefix.length());
                                        // create new named value
                                        Path p = getRelativePath(state, propState);
                                        String path = getNamespaceMappings().translatePath(p);
                                        value = FieldNames.createNamedValue(path, rawValue);
                                        termAttribute.setTermBuffer(value);
                                        PropertyMetaData pdm = PropertyMetaData
                                                .fromByteArray(payloadAttribute
                                                        .getPayload().getData());
                                        doc.add(new Field(field.name(),
                                                new SingletonTokenStream(value,
                                                        pdm.getPropertyType())));
                                        doc.add(new Field(
                                                FieldNames.AGGREGATED_NODE_UUID,
                                                false,
                                                parent.getNodeId().toString(),
                                                Field.Store.NO,
                                                Field.Index.NOT_ANALYZED_NO_NORMS,
                                                Field.TermVector.NO));
                                        if (pdm.getPropertyType() == PropertyType.STRING) {
                                            // add to fulltext index
                                            Field ft = new Field(
                                                    FieldNames.FULLTEXT,
                                                    false,
                                                    rawValue,
                                                    Field.Store.YES,
                                                    Field.Index.ANALYZED_NO_NORMS,
                                                    Field.TermVector.NO);
                                            doc.add(ft);
                                        }
                                    }
                                }
                            } finally {
                                Util.disposeDocument(aDoc);
                            }
                        }
                    }

                    // only use first aggregate definition that matches
                    if (ruleMatched) {
                        break;
                    }
                }
            } catch (NoSuchItemStateException e) {
                // do not fail if aggregate cannot be created
                log.info(
                        "Exception while building indexing aggregate for {}. Node is not available {}.",
                        state.getNodeId(), e.getMessage());
            } catch (Exception e) {
                // do not fail if aggregate cannot be created
                log.warn("Exception while building indexing aggregate for "
                        + state.getNodeId(), e);
            }
        }
    }

    private static final Comparator<Fieldable> FIELDS_COMPARATOR_STORED = new Comparator<Fieldable>() {
        public int compare(Fieldable o1, Fieldable o2) {
            return Boolean.valueOf(o2.isStored()).compareTo(o1.isStored());
        }
    };

    /**
     * Returns the relative path from <code>nodeState</code> to
     * <code>propState</code>.
     *
     * @param nodeState a node state.
     * @param propState a property state.
     * @return the relative path.
     * @throws RepositoryException if an error occurs while resolving paths.
     * @throws ItemStateException  if an error occurs while reading item
     *                             states.
     */
    protected Path getRelativePath(NodeState nodeState, PropertyState propState)
            throws RepositoryException, ItemStateException {
        HierarchyManager hmgr = getContext().getHierarchyManager();
        Path nodePath = hmgr.getPath(nodeState.getId());
        Path propPath = hmgr.getPath(propState.getId());
        Path p = nodePath.computeRelativePath(propPath);
        // make sure it does not contain indexes
        boolean clean = true;
        Path.Element[] elements = p.getElements();
        for (int i = 0; i < elements.length; i++) {
            if (elements[i].getIndex() != 0) {
                elements[i] = PATH_FACTORY.createElement(elements[i].getName());
                clean = false;
            }
        }
        if (!clean) {
            p = PATH_FACTORY.create(elements);
        }
        return p.getNormalizedPath();
    }

    /**
     * Retrieves the root of the indexing aggregate for <code>state</code> and
     * puts it into <code>aggregates</code>  map.
     *
     * @param state the node state for which we want to retrieve the aggregate
     *              root.
     * @param aggregates aggregate roots are collected in this map.
     */
    protected void retrieveAggregateRoot(NodeState state,
            Map<NodeId, NodeState> aggregates) {
        retrieveAggregateRoot(state, aggregates, state.getNodeId().toString(), 0);
    }
    
    /**
     * Retrieves the root of the indexing aggregate for <code>state</code> and
     * puts it into <code>aggregates</code> map.
     * 
     * @param state
     *            the node state for which we want to retrieve the aggregate
     *            root.
     * @param aggregates
     *            aggregate roots are collected in this map.
     * @param originNodeId
     *            the originating node, used for reporting only
     * @param level
     *            current aggregation level, used to limit recursive aggregation
     *            of nodes that have the same type
     */
    private void retrieveAggregateRoot(NodeState state,
            Map<NodeId, NodeState> aggregates, String originNodeId, long level) {
        if (indexingConfig == null) {
            return;
        }
        AggregateRule[] aggregateRules = indexingConfig.getAggregateRules();
        if (aggregateRules == null) {
            return;
        }
        for (AggregateRule aggregateRule : aggregateRules) {
            NodeState root = null;
            try {
                root = aggregateRule.getAggregateRoot(state);
            } catch (Exception e) {
                log.warn("Unable to get aggregate root for " + state.getNodeId(), e);
            }
            if (root == null) {
                continue;
            }
            if (root.getNodeTypeName().equals(state.getNodeTypeName())) {
                level++;
            } else {
                level = 0;
            }

            // JCR-2989 Support for embedded index aggregates
            if ((aggregateRule.getRecursiveAggregationLimit() == 0)
                    || (aggregateRule.getRecursiveAggregationLimit() != 0 && level <= aggregateRule
                            .getRecursiveAggregationLimit())) {

                // check if the update parent is already in the
                // map, then all its parents are already there so I can
                // skip this update subtree
                if (aggregates.put(root.getNodeId(), root) == null) {
                    retrieveAggregateRoot(root, aggregates, originNodeId, level);
                }
            } else {
                log.warn(
                        "Reached {} levels of recursive aggregation for nodeId {}, type {}, will stop at nodeId {}. Are you sure this did not occur by mistake? Please check the indexing-configuration.xml.",
                        new Object[] { level, originNodeId,
                                root.getNodeTypeName(), root.getNodeId() });
            }
        }
    }

    /**
     * Retrieves the root of the indexing aggregate for <code>removedIds</code>
     * and puts it into <code>map</code>.
     *
     * @param removedIds the ids of removed nodes.
     * @param aggregates aggregate roots are collected in this map
     */
    protected void retrieveAggregateRoot(
            Set<NodeId> removedIds, Map<NodeId, NodeState> aggregates) {
        if(removedIds.isEmpty() || indexingConfig == null){
            return;
        }
        AggregateRule[] aggregateRules = indexingConfig.getAggregateRules();
        if (aggregateRules == null) {
            return;
        }
        int found = 0;
        long time = System.currentTimeMillis();
        try {
            CachingMultiIndexReader reader = index.getIndexReader();
            try {
                Term aggregateIds =
                    new Term(FieldNames.AGGREGATED_NODE_UUID, "");
                TermDocs tDocs = reader.termDocs();
                try {
                    ItemStateManager ism = getContext().getItemStateManager();
                    for (NodeId id : removedIds) {
                        aggregateIds =
                            aggregateIds.createTerm(id.toString());
                        tDocs.seek(aggregateIds);
                        while (tDocs.next()) {
                            Document doc = reader.document(
                                    tDocs.doc(), FieldSelectors.UUID);
                            NodeId nId = new NodeId(doc.get(FieldNames.UUID));
                            NodeState nodeState = (NodeState) ism.getItemState(nId);
                            aggregates.put(nId, nodeState);
                            found++;

                            // JCR-2989 Support for embedded index aggregates
                            int sizeBefore = aggregates.size();
                            retrieveAggregateRoot(nodeState, aggregates);
                            found += aggregates.size() - sizeBefore;
                        }
                    }
                } finally {
                    tDocs.close();
                }
            } finally {
                reader.release();
            }
        } catch (NoSuchItemStateException e) {
            log.info(
                    "Exception while retrieving aggregate roots. Node is not available {}.",
                    e.getMessage());
        } catch (Exception e) {
            log.warn("Exception while retrieving aggregate roots", e);
        }
        time = System.currentTimeMillis() - time;
        log.debug("Retrieved {} aggregate roots in {} ms.", found, time);
    }

    //----------------------------< internal >----------------------------------

    /**
     * Combines multiple {@link CachingMultiIndexReader} into a <code>MultiReader</code>
     * with {@link HierarchyResolver} support.
     */
    protected static final class CombinedIndexReader
            extends MultiReader
            implements HierarchyResolver, MultiIndexReader {

        /**
         * The sub readers.
         */
        private final CachingMultiIndexReader[] subReaders;

        public CombinedIndexReader(CachingMultiIndexReader[] indexReaders) {
            super(indexReaders);
            this.subReaders = indexReaders;
        }

        /**
         * {@inheritDoc}
         */
        public int[] getParents(int n, int[] docNumbers) throws IOException {
            int i = readerIndex(n);
            DocId id = subReaders[i].getParentDocId(n - starts[i]);
            id = id.applyOffset(starts[i]);
            return id.getDocumentNumbers(this, docNumbers);
        }

        //-------------------------< MultiIndexReader >-------------------------

        /**
         * {@inheritDoc}
         */
        public IndexReader[] getIndexReaders() {
            IndexReader[] readers = new IndexReader[subReaders.length];
            System.arraycopy(subReaders, 0, readers, 0, subReaders.length);
            return readers;
        }

        /**
         * {@inheritDoc}
         */
        public void release() throws IOException {
            for (CachingMultiIndexReader subReader : subReaders) {
                subReader.release();
            }
        }

        public boolean equals(Object obj) {
            if (obj instanceof CombinedIndexReader) {
                CombinedIndexReader other = (CombinedIndexReader) obj;
                return Arrays.equals(subReaders, other.subReaders);
            }
            return false;
        }

        public int hashCode() {
            int hash = 0;
            for (CachingMultiIndexReader subReader : subReaders) {
                hash = 31 * hash + subReader.hashCode();
            }
            return hash;
        }

        /**
         * {@inheritDoc}
         */
        public ForeignSegmentDocId createDocId(NodeId id) throws IOException {
            for (CachingMultiIndexReader subReader : subReaders) {
                ForeignSegmentDocId doc = subReader.createDocId(id);
                if (doc != null) {
                    return doc;
                }
            }
            return null;
        }

        /**
         * {@inheritDoc}
         */
        public int getDocumentNumber(ForeignSegmentDocId docId) {
            for (int i = 0; i < subReaders.length; i++) {
                CachingMultiIndexReader subReader = subReaders[i];
                int realDoc = subReader.getDocumentNumber(docId);
                if (realDoc >= 0) {
                    return realDoc + starts[i];
                }
            }
            return -1;
        }
    }

    //--------------------------< properties >----------------------------------

    /**
     * Sets the default analyzer in use for indexing. The given analyzer
     * class name must satisfy the following conditions:
     * <ul>
     *   <li>the class must exist in the class path</li>
     *   <li>the class must have a public default constructor, or
     *       a constructor that takes a Lucene {@link Version} argument</li>
     *   <li>the class must be a Lucene Analyzer</li>
     * </ul>
     * <p>
     * If the above conditions are met, then a new instance of the class is
     * set as the analyzer. Otherwise a warning is logged and the current
     * analyzer is not changed.
     * <p>
     * This property setter method is normally invoked by the Jackrabbit
     * configuration mechanism if the "analyzer" parameter is set in the
     * search configuration.
     *
     * @param analyzerClassName the analyzer class name
     */
    public void setAnalyzer(String analyzerClassName) {
        analyzer.setDefaultAnalyzerClass(analyzerClassName);
    }

    /**
     * Returns the class name of the default analyzer that is currently in use.
     *
     * @return class name of analyzer in use.
     */
    public String getAnalyzer() {
        return analyzer.getDefaultAnalyzerClass();
    }

    /**
     * Sets the location of the search index.
     *
     * @param path the location of the search index.
     */
    public void setPath(String path) {
        this.path = path;
    }

    /**
     * Returns the location of the search index. Returns <code>null</code> if
     * not set.
     *
     * @return the location of the search index.
     */
    public String getPath() {
        return path;
    }

    /**
     * The lucene index writer property: useCompoundFile
     */
    public void setUseCompoundFile(boolean b) {
        useCompoundFile = b;
    }

    /**
     * Returns the current value for useCompoundFile.
     *
     * @return the current value for useCompoundFile.
     */
    public boolean getUseCompoundFile() {
        return useCompoundFile;
    }

    /**
     * The lucene index writer property: minMergeDocs
     */
    public void setMinMergeDocs(int minMergeDocs) {
        this.minMergeDocs = minMergeDocs;
    }

    /**
     * Returns the current value for minMergeDocs.
     *
     * @return the current value for minMergeDocs.
     */
    public int getMinMergeDocs() {
        return minMergeDocs;
    }

    /**
     * Sets the property: volatileIdleTime
     *
     * @param volatileIdleTime idle time in seconds
     */
    public void setVolatileIdleTime(int volatileIdleTime) {
        this.volatileIdleTime = volatileIdleTime;
    }

    /**
     * Returns the current value for volatileIdleTime.
     *
     * @return the current value for volatileIdleTime.
     */
    public int getVolatileIdleTime() {
        return volatileIdleTime;
    }

    /**
     * The lucene index writer property: maxMergeDocs
     */
    public void setMaxMergeDocs(int maxMergeDocs) {
        this.maxMergeDocs = maxMergeDocs;
    }

    /**
     * Returns the current value for maxMergeDocs.
     *
     * @return the current value for maxMergeDocs.
     */
    public int getMaxMergeDocs() {
        return maxMergeDocs;
    }

    /**
     * The lucene index writer property: mergeFactor
     */
    public void setMergeFactor(int mergeFactor) {
        this.mergeFactor = mergeFactor;
    }

    /**
     * Returns the current value for the merge factor.
     *
     * @return the current value for the merge factor.
     */
    public int getMergeFactor() {
        return mergeFactor;
    }

    /**
     * @see VolatileIndex#setBufferSize(int)
     */
    public void setBufferSize(int size) {
        bufferSize = size;
    }

    /**
     * Returns the current value for the buffer size.
     *
     * @return the current value for the buffer size.
     */
    public int getBufferSize() {
        return bufferSize;
    }

    public void setRespectDocumentOrder(boolean docOrder) {
        documentOrder = docOrder;
    }

    public boolean getRespectDocumentOrder() {
        return documentOrder;
    }

    public void setForceConsistencyCheck(boolean b) {
        forceConsistencyCheck = b;
    }

    public boolean getForceConsistencyCheck() {
        return forceConsistencyCheck;
    }

    public void setAutoRepair(boolean b) {
        autoRepair = b;
    }

    public boolean getAutoRepair() {
        return autoRepair;
    }

    public void setCacheSize(int size) {
        cacheSize = size;
    }

    public int getCacheSize() {
        return cacheSize;
    }

    public void setMaxFieldLength(int length) {
        maxFieldLength = length;
    }

    public int getMaxFieldLength() {
        return maxFieldLength;
    }

    public void setMaxExtractLength(int length) {
        maxExtractLength = length;
    }

    public int getMaxExtractLength() {
        if (maxExtractLength < 0) {
            return -maxExtractLength * maxFieldLength;
        } else {
            return maxExtractLength;
        }
    }

    /**
     * Sets the list of text extractors (and text filters) to use for
     * extracting text content from binary properties. The list must be
     * comma (or whitespace) separated, and contain fully qualified class
     * names of the {@code TextExtractor} (and {@code org.apache.jackrabbit.core.query.TextFilter}) classes
     * to be used. The configured classes must all have a public default
     * constructor.
     *
     * @param filterClasses comma separated list of class names
     * @deprecated 
     */
    public void setTextFilterClasses(String filterClasses) {
        log.warn("The textFilterClasses configuration parameter has"
                + " been deprecated, and the configured value will"
                + " be ignored: {}", filterClasses);
    }

    /**
     * Returns the fully qualified class names of the text filter instances
     * currently in use. The names are comma separated.
     *
     * @return class names of the text filters in use.
     * @deprecated 
     */
    public String getTextFilterClasses() {
        return "deprectated";
    }

    /**
     * Tells the query handler how many result should be fetched initially when
     * a query is executed.
     *
     * @param size the number of results to fetch initially.
     */
    public void setResultFetchSize(int size) {
        resultFetchSize = size;
    }

    /**
     * @return the number of results the query handler will fetch initially when
     *         a query is executed.
     */
    public int getResultFetchSize() {
        return resultFetchSize;
    }

    /**
     * The number of background threads for the extractor pool.
     *
     * @param numThreads the number of threads.
     */
    public void setExtractorPoolSize(int numThreads) {
        if (numThreads < 0) {
            numThreads = 0;
        }
        extractorPoolSize = numThreads;
    }

    /**
     * @return the size of the thread pool which is used to run the text
     *         extractors when binary content is indexed.
     */
    public int getExtractorPoolSize() {
        return extractorPoolSize;
    }

    /**
     * The number of extractor jobs that are queued until a new job is executed
     * with the current thread instead of using the thread pool.
     *
     * @param backLog size of the extractor job queue.
     */
    public void setExtractorBackLogSize(int backLog) {
        extractorBackLog = backLog;
    }

    /**
     * @return the size of the extractor queue back log.
     */
    public int getExtractorBackLogSize() {
        return extractorBackLog;
    }

    /**
     * The timeout in milliseconds which is granted to the text extraction
     * process until fulltext indexing is deferred to a background thread.
     *
     * @param timeout the timeout in milliseconds.
     */
    public void setExtractorTimeout(long timeout) {
        extractorTimeout = timeout;
    }

    /**
     * @return the extractor timeout in milliseconds.
     */
    public long getExtractorTimeout() {
        return extractorTimeout;
    }
    
    /**
     * If enabled, NodeIterator.getSize() may report a larger value than the
     * actual result. This value may shrink when the query result encounters
     * non-existing nodes or the session does not have access to a node. This
     * might be a security problem.
     * 
     * @param b <code>true</code> to enable
     */
    public void setSizeEstimate(boolean b) {
        if (b) {
            log.info("Size estimation is enabled");
        }
        this.sizeEstimate = b;
    }
    
    /**
     * Get the size estimate setting.
     * 
     * @return the setting
     */
    public boolean getSizeEstimate() {
        return sizeEstimate;
    }

    /**
     * If set to <code>true</code> additional information is stored in the index
     * to support highlighting using the rep:excerpt pseudo property.
     *
     * @param b <code>true</code> to enable highlighting support.
     */
    public void setSupportHighlighting(boolean b) {
        supportHighlighting = b;
    }

    /**
     * @return <code>true</code> if highlighting support is enabled.
     */
    public boolean getSupportHighlighting() {
        return supportHighlighting;
    }

    /**
     * Sets the class name for the {@link ExcerptProvider} that should be used
     * for the rep:excerpt pseudo property in a query.
     *
     * @param className the name of a class that implements {@link
     *                  ExcerptProvider}.
     */
    public void setExcerptProviderClass(String className) {
        try {
            Class<?> clazz = Class.forName(className);
            if (ExcerptProvider.class.isAssignableFrom(clazz)) {
                excerptProviderClass = clazz;
            } else {
                log.warn("Invalid value for excerptProviderClass, {} does "
                        + "not implement ExcerptProvider interface.", className);
            }
        } catch (ClassNotFoundException e) {
            log.warn("Invalid value for excerptProviderClass, class {} not found.",
                    className);
        }
    }

    /**
     * @return the class name of the excerpt provider implementation.
     */
    public String getExcerptProviderClass() {
        return excerptProviderClass.getName();
    }

    /**
     * Sets the path to the indexing configuration file.
     *
     * @param path the path to the configuration file.
     */
    public void setIndexingConfiguration(String path) {
        indexingConfigPath = path;
    }

    /**
     * @return the path to the indexing configuration file.
     */
    public String getIndexingConfiguration() {
        return indexingConfigPath;
    }

    /**
     * Sets the name of the class that implements {@link IndexingConfiguration}.
     * The default value is <code>org.apache.jackrabbit.core.query.lucene.IndexingConfigurationImpl</code>.
     *
     * @param className the name of the class that implements {@link
     *                  IndexingConfiguration}.
     */
    public void setIndexingConfigurationClass(String className) {
        try {
            Class<?> clazz = Class.forName(className);
            if (IndexingConfiguration.class.isAssignableFrom(clazz)) {
                indexingConfigurationClass = clazz;
            } else {
                log.warn("Invalid value for indexingConfigurationClass, {} "
                        + "does not implement IndexingConfiguration interface.",
                        className);
            }
        } catch (ClassNotFoundException e) {
            log.warn("Invalid value for indexingConfigurationClass, class {} not found.",
                    className);
        }
    }

    /**
     * @return the class name of the indexing configuration implementation.
     */
    public String getIndexingConfigurationClass() {
        return indexingConfigurationClass.getName();
    }

    /**
     * Sets the name of the class that implements {@link SynonymProvider}. The
     * default value is <code>null</code> (none set).
     *
     * @param className name of the class that implements {@link
     *                  SynonymProvider}.
     */
    public void setSynonymProviderClass(String className) {
        try {
            Class<?> clazz = Class.forName(className);
            if (SynonymProvider.class.isAssignableFrom(clazz)) {
                synonymProviderClass = clazz;
            } else {
                log.warn("Invalid value for synonymProviderClass, {} "
                        + "does not implement SynonymProvider interface.",
                        className);
            }
        } catch (ClassNotFoundException e) {
            log.warn("Invalid value for synonymProviderClass, class {} not found.",
                    className);
        }
    }

    /**
     * @return the class name of the synonym provider implementation or
     *         <code>null</code> if none is set.
     */
    public String getSynonymProviderClass() {
        if (synonymProviderClass != null) {
            return synonymProviderClass.getName();
        } else {
            return null;
        }
    }

    /**
     * Sets the name of the class that implements {@link SpellChecker}. The
     * default value is <code>null</code> (none set).
     *
     * @param className name of the class that implements {@link SpellChecker}.
     */
    public void setSpellCheckerClass(String className) {
        try {
            Class<?> clazz = Class.forName(className);
            if (SpellChecker.class.isAssignableFrom(clazz)) {
                spellCheckerClass = clazz;
            } else {
                log.warn("Invalid value for spellCheckerClass, {} "
                        + "does not implement SpellChecker interface.",
                        className);
            }
        } catch (ClassNotFoundException e) {
            log.warn("Invalid value for spellCheckerClass,"
                    + " class {} not found.", className);
        }
    }

    /**
     * @return the class name of the spell checker implementation or
     *         <code>null</code> if none is set.
     */
    public String getSpellCheckerClass() {
        if (spellCheckerClass != null) {
            return spellCheckerClass.getName();
        } else {
            return null;
        }
    }

    /**
     * Enables or disables the consistency check on startup. Consistency checks
     * are disabled per default.
     *
     * @param b <code>true</code> enables consistency checks.
     * @see #setForceConsistencyCheck(boolean)
     */
    public void setEnableConsistencyCheck(boolean b) {
        this.consistencyCheckEnabled = b;
    }

    /**
     * @return <code>true</code> if consistency checks are enabled.
     */
    public boolean getEnableConsistencyCheck() {
        return consistencyCheckEnabled;
    }

    /**
     * Sets the configuration path for the synonym provider.
     *
     * @param path the configuration path for the synonym provider.
     */
    public void setSynonymProviderConfigPath(String path) {
        synonymProviderConfigPath = path;
    }

    /**
     * @return the configuration path for the synonym provider. If none is set
     *         this method returns <code>null</code>.
     */
    public String getSynonymProviderConfigPath() {
        return synonymProviderConfigPath;
    }

    /**
     * Sets the similarity implementation, which will be used for indexing and
     * searching. The implementation must extend {@link Similarity}.
     *
     * @param className a {@link Similarity} implementation.
     */
    public void setSimilarityClass(String className) {
        try {
            Class<?> similarityClass = Class.forName(className);
            similarity = (Similarity) similarityClass.newInstance();
        } catch (Exception e) {
            log.warn("Invalid Similarity class: " + className, e);
        }
    }

    /**
     * @return the name of the similarity class.
     */
    public String getSimilarityClass() {
        return similarity.getClass().getName();
    }

    /**
     * Sets a new maxVolatileIndexSize value.
     *
     * @param maxVolatileIndexSize the new value.
     */
    public void setMaxVolatileIndexSize(long maxVolatileIndexSize) {
        this.maxVolatileIndexSize = maxVolatileIndexSize;
    }

    /**
     * @return the maxVolatileIndexSize in bytes.
     */
    public long getMaxVolatileIndexSize() {
        return maxVolatileIndexSize;
    }

    /**
     * @return the name of the directory manager class.
     */
    public String getDirectoryManagerClass() {
        return directoryManagerClass;
    }

    /**
     * Sets name of the directory manager class. The class must implement
     * {@link DirectoryManager}.
     *
     * @param className the name of the class that implements directory manager.
     */
    public void setDirectoryManagerClass(String className) {
        this.directoryManagerClass = className;
    }

    /**
     * If set <code>true</code> will indicate to the {@link DirectoryManager}
     * to use the <code>SimpleFSDirectory</code>.
     *
     * @param useSimpleFSDirectory whether to use <code>SimpleFSDirectory</code>
     *                             or automatically pick an implementation based
     *                             on the current platform.
     */
    public void setUseSimpleFSDirectory(boolean useSimpleFSDirectory) {
        this.useSimpleFSDirectory = useSimpleFSDirectory;
    }

    /**
     * @return <code>true</code> if the {@link DirectoryManager} should use
     * the <code>SimpleFSDirectory</code>.
     */
    public boolean isUseSimpleFSDirectory() {
        return useSimpleFSDirectory;
    }

    /**
     * @return the current value for termInfosIndexDivisor.
     */
    public int getTermInfosIndexDivisor() {
        return termInfosIndexDivisor;
    }

    /**
     * Sets a new value for termInfosIndexDivisor.
     *
     * @param termInfosIndexDivisor the new value.
     */
    public void setTermInfosIndexDivisor(int termInfosIndexDivisor) {
        this.termInfosIndexDivisor = termInfosIndexDivisor;
    }

    /**
     * @return <code>true</code> if the hierarchy cache should be initialized
     *         immediately on startup.
     */
    public boolean isInitializeHierarchyCache() {
        return initializeHierarchyCache;
    }

    /**
     * Whether the hierarchy cache should be initialized immediately on
     * startup.
     *
     * @param initializeHierarchyCache <code>true</code> if the cache should be
     *                                 initialized immediately.
     */
    public void setInitializeHierarchyCache(boolean initializeHierarchyCache) {
        this.initializeHierarchyCache = initializeHierarchyCache;
    }

    /**
     * @return the maximum age in seconds for outdated generations of
     * {@link IndexInfos}.
     */
    public long getMaxHistoryAge() {
        return maxHistoryAge;
    }

    /**
     * Sets a new value for the maximum age in seconds for outdated generations
     * of {@link IndexInfos}.
     *
     * @param maxHistoryAge age in seconds.
     */
    public void setMaxHistoryAge(long maxHistoryAge) {
        this.maxHistoryAge = maxHistoryAge;
    }

    /**
     * @return the name of the redo log factory class.
     */
    public String getRedoLogFactoryClass() {
        return redoLogFactoryClass;
    }

    /**
     * Sets the name of the redo log factory class. Must implement
     * {@link RedoLogFactory}.
     *
     * @param className the name of the redo log factory class.
     */
    public void setRedoLogFactoryClass(String className) {
        this.redoLogFactoryClass = className;
    }

    /**
     * In the case of an initial index build operation, this checks if there are
     * some new nodes pending in the journal and tries to preemptively delete
     * them, to keep the index consistent.
     * 
     * See JCR-3162
     * 
     * @param context
     * @throws IOException
     */
    private void checkPendingJournalChanges(QueryHandlerContext context) {
        ClusterNode cn = context.getClusterNode();
        if (cn == null) {
            return;
        }

        List<NodeId> addedIds = new ArrayList<NodeId>();
        long rev = cn.getRevision();

        List<ChangeLogRecord> changes = getChangeLogRecords(rev, context.getWorkspace());
        Iterator<ChangeLogRecord> iterator = changes.iterator();
        while (iterator.hasNext()) {
            ChangeLogRecord record = iterator.next();
            for (ItemState state : record.getChanges().addedStates()) {
                if (!state.isNode()) {
                    continue;
                }
                addedIds.add((NodeId) state.getId());
            }
        }
        if (!addedIds.isEmpty()) {
            Collection<NodeState> empty = Collections.emptyList();
            try {
                updateNodes(addedIds.iterator(), empty.iterator());
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        }
    }

    //----------------------------< internal >----------------------------------

    /**
     * Checks if this <code>SearchIndex</code> is open, otherwise throws
     * an <code>IOException</code>.
     *
     * @throws IOException if this <code>SearchIndex</code> had been closed.
     */
    protected void checkOpen() throws IOException {
        if (closed) {
            throw new IOException("query handler closed and cannot be used anymore.");
        }
    }

    /**
     * Polls the underlying journal for events of the type ChangeLogRecord that
     * happened after a given revision, on a given workspace.
     *
     * @param revision
     *            starting revision
     * @param workspace
     *            the workspace name
     * @return
     */
    private List<ChangeLogRecord> getChangeLogRecords(long revision,
            final String workspace) {
        log.debug(
                "Get changes from the Journal for revision {} and workspace {}.",
                revision, workspace);
        ClusterNode cn = getContext().getClusterNode();
        if (cn == null) {
            return Collections.emptyList();
        }
        Journal journal = cn.getJournal();
        final List<ChangeLogRecord> events = new ArrayList<ChangeLogRecord>();
        ClusterRecordDeserializer deserializer = new ClusterRecordDeserializer();
        RecordIterator records = null;
        try {
            records = journal.getRecords(revision);
            while (records.hasNext()) {
                Record record = records.nextRecord();
                if (!record.getProducerId().equals(cn.getId())) {
                    continue;
                }
                ClusterRecord r = null;
                try {
                    r = deserializer.deserialize(record);
                } catch (JournalException e) {
                    log.error(
                            "Unable to read revision '" + record.getRevision()
                                    + "'.", e);
                }
                if (r == null) {
                    continue;
                }
                r.process(new ClusterRecordProcessor() {
                    public void process(ChangeLogRecord record) {
                        String eventW = record.getWorkspace();
                        if (eventW != null ? eventW.equals(workspace) : workspace == null) {
                            events.add(record);
                        }
                    }

                    public void process(LockRecord record) {
                    }

                    public void process(NamespaceRecord record) {
                    }

                    public void process(NodeTypeRecord record) {
                    }

                    public void process(PrivilegeRecord record) {
                    }

                    public void process(WorkspaceRecord record) {
                    }
                });
            }
        } catch (JournalException e1) {
            log.error(e1.getMessage(), e1);
        } finally {
            if (records != null) {
                records.close();
            }
        }
        return events;
    }
}