IndexMerger.java example

Explorer
jackrabbit-master
- jackrabbit-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Merges indexes in a separate daemon thread.
 */
class IndexMerger implements IndexListener {

    /**
     * Logger instance for this class.
     */
    private static final Logger log = LoggerFactory.getLogger(IndexMerger.class);

    /**
     * minMergeDocs config parameter.
     */
    private int minMergeDocs = SearchIndex.DEFAULT_MIN_MERGE_DOCS;

    /**
     * maxMergeDocs config parameter
     */
    private int maxMergeDocs = SearchIndex.DEFAULT_MAX_MERGE_DOCS;

    /**
     * mergeFactor config parameter
     */
    private int mergeFactor = SearchIndex.DEFAULT_MERGE_FACTOR;

    /**
     * List of <code>IndexBucket</code>s in ascending document limit.
     */
    private final List<IndexBucket> indexBuckets = new ArrayList<IndexBucket>();

    /**
     * The <code>MultiIndex</code> this index merger is working on.
     */
    private final MultiIndex multiIndex;

    /**
     * The executor of the repository.
     */
    private final Executor executor;

    /**
     * Flag that indicates that this index merger is shuting down and should
     * quit. 
     */
    private final AtomicBoolean quit = new AtomicBoolean(false);

    /**
     * Flag that indicates if this index merger has already been started.
     * @see #start()
     */
    private final AtomicBoolean isStarted = new AtomicBoolean(false);

    /**
     * Monitor object to synchronize merge calculation.
     */
    private final Object lock = new Object();

    /**
     * Read/write lock for index segment replacement. A shared read lock is
     * acquired for an index replacement. An exclusive write lock is acquired
     * when this index merger is shutting down, to prevent further index
     * replacements.
     */
    private final ReadWriteLock indexReplacement = new ReentrantReadWriteLock();

    /**
     * List of merger threads that are currently busy.
     */
    private final List<Worker> busyMergers = new ArrayList<Worker>();

    /**
     * Creates an <code>IndexMerger</code>.
     *
     * @param multiIndex the <code>MultiIndex</code>.
     * @param executor   the executor of the repository.
     */
    IndexMerger(MultiIndex multiIndex, Executor executor) {
        this.multiIndex = multiIndex;
        this.executor = executor;
    }

    /**
     * Starts this index merger.
     */
    void start() {
        isStarted.set(true);
        synchronized (busyMergers) {
            for (Worker worker : busyMergers) {
                worker.unblock();
            }
        }
    }

    /**
     * Informs the index merger that an index was added / created.
     *
     * @param name the name of the index.
     * @param numDocs the number of documents it contains.
     */
    void indexAdded(String name, int numDocs) {
        if (numDocs < 0) {
            throw new IllegalArgumentException("numDocs must be positive");
        }
        // multiple threads may enter this method:
        // - the background thread of this IndexMerger, when it replaces indexes
        //   after a successful merge
        // - a regular thread that updates the workspace
        //
        // therefore we have to synchronize this block
        synchronized (lock) {
            // initially create buckets
            if (indexBuckets.size() == 0) {
                long lower = 0;
                long upper = minMergeDocs;
                while (upper < maxMergeDocs) {
                    indexBuckets.add(new IndexBucket(lower, upper, true));
                    lower = upper + 1;
                    upper *= mergeFactor;
                }
                // one with upper = maxMergeDocs
                indexBuckets.add(new IndexBucket(lower, maxMergeDocs, false));
                // and another one as overflow, just in case...
                indexBuckets.add(new IndexBucket(maxMergeDocs + 1, Long.MAX_VALUE, false));
            }

            // put index in bucket
            IndexBucket bucket = indexBuckets.get(indexBuckets.size() - 1);
            for (IndexBucket indexBucket : indexBuckets) {
                bucket = indexBucket;
                if (bucket.fits(numDocs)) {
                    break;
                }
            }
            bucket.add(new Index(name, numDocs));

            if (log.isDebugEnabled()) {
                log.debug("index added: name=" + name + ", numDocs=" + numDocs);
            }

            // if bucket does not allow merge, we don't have to continue
            if (!bucket.allowsMerge()) {
                return;
            }

            // check if we need a merge
            if (bucket.size() >= mergeFactor) {
                long targetMergeDocs = bucket.upper;
                targetMergeDocs = Math.min(targetMergeDocs * mergeFactor, maxMergeDocs);
                // sum up docs in bucket
                List<Index> indexesToMerge = new ArrayList<Index>();
                int mergeDocs = 0;
                for (Iterator<Index> it = bucket.iterator(); it.hasNext() && mergeDocs <= targetMergeDocs;) {
                    indexesToMerge.add(it.next());
                }
                if (indexesToMerge.size() > 2) {
                    // found merge
                    Index[] idxs = indexesToMerge.toArray(new Index[indexesToMerge.size()]);
                    bucket.removeAll(indexesToMerge);
                    if (log.isDebugEnabled()) {
                        log.debug("requesting merge for " + indexesToMerge);
                    }
                    addMergeTask(new Merge(idxs));
                    if (log.isDebugEnabled()) {
                        int numBusy;
                        synchronized (busyMergers) {
                            numBusy = busyMergers.size();
                        }
                        log.debug("# of busy merge workers: " + numBusy);
                    }
                }
            }
        }
    }

    /**
     * @inheritDoc
     */
    public void documentDeleted(Term id) {
        log.debug("document deleted: " + id.text());
        synchronized (busyMergers) {
            for (Worker w : busyMergers) {
                w.documentDeleted(id);
            }
        }
    }

    /**
     * When the calling thread returns this index merger will be idle, that is
     * there will be no merge tasks pending anymore. The method returns
     * immediately if there are currently no tasks pending at all.
     *
     * @throws InterruptedException if this thread is interrupted while waiting
     *                              for the worker threads to become idle.
     */
    void waitUntilIdle() throws InterruptedException {
        synchronized (busyMergers) {
            while (!busyMergers.isEmpty()) {
                busyMergers.wait();
            }
        }
    }

    /**
     * Signals this <code>IndexMerger</code> to stop and waits until it
     * has terminated.
     */
    void dispose() {
        log.debug("dispose IndexMerger");
        // get exclusive lock on index replacements
        try {
            indexReplacement.writeLock().lockInterruptibly();
        } catch (InterruptedException e) {
            log.warn("Interrupted while acquiring index replacement exclusive lock: " + e);
            // try to stop IndexMerger without the sync
        }

        // set quit
        quit.set(true);
        log.debug("quit flag set");

        try {
            // give the merger threads some time to quit,
            // it is possible that the mergers are busy working on a large index.
            // if that is the case we will just ignore it and the daemon will
            // die without being able to finish the merge.
            // at this point it is not possible anymore to replace indexes
            // on the MultiIndex because we hold all indexReplacement permits.
            Worker[] workers;
            synchronized (busyMergers) {
                workers = busyMergers.toArray(new Worker[busyMergers.size()]);
            }
            for (Worker w : workers) {
                w.join(500);
                if (w.isAlive()) {
                    log.info("Unable to stop IndexMerger.Worker. Daemon is busy.");
                } else {
                    log.debug("IndexMerger.Worker thread stopped");
                }
            }
        } catch (InterruptedException e) {
            log.warn("Interrupted while waiting for IndexMerger threads to terminate.");
        }
    }

    //-----------------------< merge properties >-------------------------------

    /**
     * The merge factor.
     *
     * @param mergeFactor the merge factor.
     */
    public void setMergeFactor(int mergeFactor) {
        this.mergeFactor = mergeFactor;
    }


    /**
     * The initial threshold for number of documents to merge to a new index.
     *
     * @param minMergeDocs the min merge docs number.
     */
    public void setMinMergeDocs(int minMergeDocs) {
        this.minMergeDocs = minMergeDocs;
    }

    /**
     * The maximum number of document to merge.
     *
     * @param maxMergeDocs the max merge docs number.
     */
    public void setMaxMergeDocs(int maxMergeDocs) {
        this.maxMergeDocs = maxMergeDocs;
    }

    //------------------------------< internal >--------------------------------

    private void addMergeTask(Merge task) {
        // only enqueue if still running
        if (!quit.get()) {
            Worker worker = new Worker(task);
            if (isStarted.get()) {
                // immediately unblock if this index merger is already started
                worker.unblock();
            }
            synchronized (busyMergers) {
                busyMergers.add(worker);
            }
            executor.execute(worker);
        }
    }

    /**
     * Implements a simple struct that holds the name of an index and how
     * many document it contains. <code>Index</code> is comparable using the
     * number of documents it contains.
     */
    private static final class Index implements Comparable<Index> {

        /**
         * The name of the index.
         */
        private final String name;

        /**
         * The number of documents the index contains.
         */
        private final int numDocs;

        /**
         * Creates a new index struct.
         *
         * @param name name of an index.
         * @param numDocs number of documents it contains.
         */
        Index(String name, int numDocs) {
            this.name = name;
            this.numDocs = numDocs;
        }

        /**
         * Indexes are first ordered by {@link #numDocs} and then by {@link
         * #name}.
         *
         * @param o the other <code>Index</code>.
         * @return a negative integer, zero, or a positive integer as this
         *         Index is less than, equal to, or greater than the specified
         *         Index.
         */
        public int compareTo(Index other) {
            int val = numDocs < other.numDocs ? -1 : (numDocs == other.numDocs ? 0 : 1);
            if (val != 0) {
                return val;
            } else {
                return name.compareTo(other.name);
            }
        }

        /**
         * @inheritDoc
         */
        public String toString() {
            return name + ":" + numDocs;
        }
    }

    /**
     * Defines a merge task, to merge a couple of indexes into a new index.
     */
    private static final class Merge {

        private final Index[] indexes;

        /**
         * Merge task, to merge <code>indexes</code> into a new index with
         * <code>name</code>.
         *
         * @param indexes the indexes to merge.
         */
        Merge(Index[] indexes) {
            this.indexes = new Index[indexes.length];
            System.arraycopy(indexes, 0, this.indexes, 0, indexes.length);
        }
    }

    /**
     * Implements a <code>List</code> with a document limit value. An
     * <code>IndexBucket</code> contains {@link Index}es with documents less
     * or equal the document limit of the bucket.
     */
    private static final class IndexBucket extends ArrayList<Index> {

        private static final long serialVersionUID = 2985514550083374904L;

        /**
         * The lower document limit.
         */
        private final long lower;

        /**
         * The upper document limit.
         */
        private final long upper;

        /**
         * Flag indicating if indexes in this bucket can be merged.
         */
        private final boolean allowMerge;

        /**
         * Creates a new <code>IndexBucket</code>. Limits are both inclusive.
         *
         * @param lower document limit.
         * @param upper document limit.
         * @param allowMerge if indexes in this bucket can be merged.
         */
        IndexBucket(long lower, long upper, boolean allowMerge) {
            this.lower = lower;
            this.upper = upper;
            this.allowMerge = allowMerge;
        }

        /**
         * Returns <code>true</code> if the number of documents fit in this
         * <code>IndexBucket</code>; otherwise <code>false</code>
         *
         * @param numDocs the number of documents.
         * @return <code>true</code> if <code>numDocs</code> fit.
         */
        boolean fits(long numDocs) {
            return numDocs >= lower && numDocs <= upper;
        }

        /**
         * Returns <code>true</code> if indexes in this bucket can be merged.
         *
         * @return <code>true</code> if indexes in this bucket can be merged.
         */
        boolean allowsMerge() {
            return allowMerge;
        }
    }

    private class Worker implements Runnable, IndexListener {

        /**
         * List of id <code>Term</code> that identify documents that were deleted
         * while a merge was running.
         */
        private final List<Term> deletedDocuments = Collections.synchronizedList(new ArrayList<Term>());

        /**
         * A latch that is set to zero when this worker is unblocked.
         */
        private final CountDownLatch start = new CountDownLatch(1);

        /**
         * Flag that indicates whether this worker has finished its work.
         */
        private final AtomicBoolean terminated = new AtomicBoolean(false);

        /**
         * The merge task.
         */
        private final Merge task;

        /**
         * Creates a new worker which is initially blocked. Call
         * {@link #unblock()} to unblock it.
         *
         * @param task the merge task.
         */
        private Worker(Merge task) {
            this.task = task;
        }

        /**
         * Implements the index merging.
         */
        public void run() {
            // worker is initially suspended
            try {
                try {
                    start.await();
                } catch (InterruptedException e) {
                    // check if we should quit
                    if (!quit.get()) {
                        // enqueue task again and retry with another thread
                        addMergeTask(task);
                    }
                    return;
                }

                log.debug("accepted merge request");

                // get readers
                String[] names = new String[task.indexes.length];
                for (int i = 0; i < task.indexes.length; i++) {
                    names[i] = task.indexes[i].name;
                }
                try {
                    log.debug("create new index");
                    PersistentIndex index = multiIndex.getOrCreateIndex(null);
                    boolean success = false;
                    try {

                        log.debug("get index readers from MultiIndex");
                        IndexReader[] readers = multiIndex.getIndexReaders(names, IndexMerger.this);
                        try {
                            // do the merge
                            long time = System.currentTimeMillis();
                            index.addIndexes(readers);
                            time = System.currentTimeMillis() - time;
                            int docCount = 0;
                            for (IndexReader reader : readers) {
                                docCount += reader.numDocs();
                            }
                            log.info("merged " + docCount + " documents in " + time + " ms into " + index.getName() + ".");
                        } finally {
                            for (IndexReader reader : readers) {
                                try {
                                    Util.closeOrRelease(reader);
                                } catch (IOException e) {
                                    log.warn("Unable to close IndexReader: " + e);
                                }
                            }
                        }

                        // inform multi index
                        // if we cannot get the sync immediately we have to quit
                        Lock shared = indexReplacement.readLock();
                        if (!shared.tryLock()) {
                            log.debug("index merging canceled");
                            return;
                        }
                        try {
                            log.debug("replace indexes");
                            multiIndex.replaceIndexes(names, index, deletedDocuments);
                        } finally {
                            shared.unlock();
                        }

                        success = true;

                    } finally {
                        if (!success) {
                            // delete index
                            log.debug("deleting index " + index.getName());
                            multiIndex.deleteIndex(index);
                            // add task again and retry
                            addMergeTask(task);
                        }
                    }
                } catch (Throwable e) {
                    log.error("Error while merging indexes: ", e);
                }
            } finally {
                synchronized (terminated) {
                    terminated.set(true);
                    terminated.notifyAll();
                }
                synchronized (busyMergers) {
                    busyMergers.remove(this);
                    busyMergers.notifyAll();
                }
                log.debug("Worker finished");
            }
        }

        /**
         * @inheritDoc
         */
        public void documentDeleted(Term id) {
            log.debug("document deleted: " + id.text());
            deletedDocuments.add(id);
        }

        /**
         * Unblocks this worker and allows it to start with the index merging.
         */
        void unblock() {
            start.countDown();
        }

        /**
         * Waits until this worker is finished or the specified amount of time
         * has elapsed.
         *
         * @param timeout the timeout in milliseconds.
         * @throws InterruptedException if the current thread is interrupted
         *                              while waiting for this worker to
         *                              terminate.
         */
        void join(long timeout) throws InterruptedException {
            synchronized (terminated) {
                while (!terminated.get()) {
                    terminated.wait(timeout);
                }
            }
        }

        /**
         * @return <code>true</code> if this worker is still alive and not yet
         *         terminated.
         */
        boolean isAlive() {
            return !terminated.get();
        }
    }
}