/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jackrabbit.core.query.lucene; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Merges indexes in a separate daemon thread. */ class IndexMerger implements IndexListener { /** * Logger instance for this class. */ private static final Logger log = LoggerFactory.getLogger(IndexMerger.class); /** * minMergeDocs config parameter. */ private int minMergeDocs = SearchIndex.DEFAULT_MIN_MERGE_DOCS; /** * maxMergeDocs config parameter */ private int maxMergeDocs = SearchIndex.DEFAULT_MAX_MERGE_DOCS; /** * mergeFactor config parameter */ private int mergeFactor = SearchIndex.DEFAULT_MERGE_FACTOR; /** * List of <code>IndexBucket</code>s in ascending document limit. */ private final List<IndexBucket> indexBuckets = new ArrayList<IndexBucket>(); /** * The <code>MultiIndex</code> this index merger is working on. */ private final MultiIndex multiIndex; /** * The executor of the repository. */ private final Executor executor; /** * Flag that indicates that this index merger is shuting down and should * quit. */ private final AtomicBoolean quit = new AtomicBoolean(false); /** * Flag that indicates if this index merger has already been started. * @see #start() */ private final AtomicBoolean isStarted = new AtomicBoolean(false); /** * Monitor object to synchronize merge calculation. */ private final Object lock = new Object(); /** * Read/write lock for index segment replacement. A shared read lock is * acquired for an index replacement. An exclusive write lock is acquired * when this index merger is shutting down, to prevent further index * replacements. */ private final ReadWriteLock indexReplacement = new ReentrantReadWriteLock(); /** * List of merger threads that are currently busy. */ private final List<Worker> busyMergers = new ArrayList<Worker>(); /** * Creates an <code>IndexMerger</code>. * * @param multiIndex the <code>MultiIndex</code>. * @param executor the executor of the repository. */ IndexMerger(MultiIndex multiIndex, Executor executor) { this.multiIndex = multiIndex; this.executor = executor; } /** * Starts this index merger. */ void start() { isStarted.set(true); synchronized (busyMergers) { for (Worker worker : busyMergers) { worker.unblock(); } } } /** * Informs the index merger that an index was added / created. * * @param name the name of the index. * @param numDocs the number of documents it contains. */ void indexAdded(String name, int numDocs) { if (numDocs < 0) { throw new IllegalArgumentException("numDocs must be positive"); } // multiple threads may enter this method: // - the background thread of this IndexMerger, when it replaces indexes // after a successful merge // - a regular thread that updates the workspace // // therefore we have to synchronize this block synchronized (lock) { // initially create buckets if (indexBuckets.size() == 0) { long lower = 0; long upper = minMergeDocs; while (upper < maxMergeDocs) { indexBuckets.add(new IndexBucket(lower, upper, true)); lower = upper + 1; upper *= mergeFactor; } // one with upper = maxMergeDocs indexBuckets.add(new IndexBucket(lower, maxMergeDocs, false)); // and another one as overflow, just in case... indexBuckets.add(new IndexBucket(maxMergeDocs + 1, Long.MAX_VALUE, false)); } // put index in bucket IndexBucket bucket = indexBuckets.get(indexBuckets.size() - 1); for (IndexBucket indexBucket : indexBuckets) { bucket = indexBucket; if (bucket.fits(numDocs)) { break; } } bucket.add(new Index(name, numDocs)); if (log.isDebugEnabled()) { log.debug("index added: name=" + name + ", numDocs=" + numDocs); } // if bucket does not allow merge, we don't have to continue if (!bucket.allowsMerge()) { return; } // check if we need a merge if (bucket.size() >= mergeFactor) { long targetMergeDocs = bucket.upper; targetMergeDocs = Math.min(targetMergeDocs * mergeFactor, maxMergeDocs); // sum up docs in bucket List<Index> indexesToMerge = new ArrayList<Index>(); int mergeDocs = 0; for (Iterator<Index> it = bucket.iterator(); it.hasNext() && mergeDocs <= targetMergeDocs;) { indexesToMerge.add(it.next()); } if (indexesToMerge.size() > 2) { // found merge Index[] idxs = indexesToMerge.toArray(new Index[indexesToMerge.size()]); bucket.removeAll(indexesToMerge); if (log.isDebugEnabled()) { log.debug("requesting merge for " + indexesToMerge); } addMergeTask(new Merge(idxs)); if (log.isDebugEnabled()) { int numBusy; synchronized (busyMergers) { numBusy = busyMergers.size(); } log.debug("# of busy merge workers: " + numBusy); } } } } } /** * @inheritDoc */ public void documentDeleted(Term id) { log.debug("document deleted: " + id.text()); synchronized (busyMergers) { for (Worker w : busyMergers) { w.documentDeleted(id); } } } /** * When the calling thread returns this index merger will be idle, that is * there will be no merge tasks pending anymore. The method returns * immediately if there are currently no tasks pending at all. * * @throws InterruptedException if this thread is interrupted while waiting * for the worker threads to become idle. */ void waitUntilIdle() throws InterruptedException { synchronized (busyMergers) { while (!busyMergers.isEmpty()) { busyMergers.wait(); } } } /** * Signals this <code>IndexMerger</code> to stop and waits until it * has terminated. */ void dispose() { log.debug("dispose IndexMerger"); // get exclusive lock on index replacements try { indexReplacement.writeLock().lockInterruptibly(); } catch (InterruptedException e) { log.warn("Interrupted while acquiring index replacement exclusive lock: " + e); // try to stop IndexMerger without the sync } // set quit quit.set(true); log.debug("quit flag set"); try { // give the merger threads some time to quit, // it is possible that the mergers are busy working on a large index. // if that is the case we will just ignore it and the daemon will // die without being able to finish the merge. // at this point it is not possible anymore to replace indexes // on the MultiIndex because we hold all indexReplacement permits. Worker[] workers; synchronized (busyMergers) { workers = busyMergers.toArray(new Worker[busyMergers.size()]); } for (Worker w : workers) { w.join(500); if (w.isAlive()) { log.info("Unable to stop IndexMerger.Worker. Daemon is busy."); } else { log.debug("IndexMerger.Worker thread stopped"); } } } catch (InterruptedException e) { log.warn("Interrupted while waiting for IndexMerger threads to terminate."); } } //-----------------------< merge properties >------------------------------- /** * The merge factor. * * @param mergeFactor the merge factor. */ public void setMergeFactor(int mergeFactor) { this.mergeFactor = mergeFactor; } /** * The initial threshold for number of documents to merge to a new index. * * @param minMergeDocs the min merge docs number. */ public void setMinMergeDocs(int minMergeDocs) { this.minMergeDocs = minMergeDocs; } /** * The maximum number of document to merge. * * @param maxMergeDocs the max merge docs number. */ public void setMaxMergeDocs(int maxMergeDocs) { this.maxMergeDocs = maxMergeDocs; } //------------------------------< internal >-------------------------------- private void addMergeTask(Merge task) { // only enqueue if still running if (!quit.get()) { Worker worker = new Worker(task); if (isStarted.get()) { // immediately unblock if this index merger is already started worker.unblock(); } synchronized (busyMergers) { busyMergers.add(worker); } executor.execute(worker); } } /** * Implements a simple struct that holds the name of an index and how * many document it contains. <code>Index</code> is comparable using the * number of documents it contains. */ private static final class Index implements Comparable<Index> { /** * The name of the index. */ private final String name; /** * The number of documents the index contains. */ private final int numDocs; /** * Creates a new index struct. * * @param name name of an index. * @param numDocs number of documents it contains. */ Index(String name, int numDocs) { this.name = name; this.numDocs = numDocs; } /** * Indexes are first ordered by {@link #numDocs} and then by {@link * #name}. * * @param o the other <code>Index</code>. * @return a negative integer, zero, or a positive integer as this * Index is less than, equal to, or greater than the specified * Index. */ public int compareTo(Index other) { int val = numDocs < other.numDocs ? -1 : (numDocs == other.numDocs ? 0 : 1); if (val != 0) { return val; } else { return name.compareTo(other.name); } } /** * @inheritDoc */ public String toString() { return name + ":" + numDocs; } } /** * Defines a merge task, to merge a couple of indexes into a new index. */ private static final class Merge { private final Index[] indexes; /** * Merge task, to merge <code>indexes</code> into a new index with * <code>name</code>. * * @param indexes the indexes to merge. */ Merge(Index[] indexes) { this.indexes = new Index[indexes.length]; System.arraycopy(indexes, 0, this.indexes, 0, indexes.length); } } /** * Implements a <code>List</code> with a document limit value. An * <code>IndexBucket</code> contains {@link Index}es with documents less * or equal the document limit of the bucket. */ private static final class IndexBucket extends ArrayList<Index> { private static final long serialVersionUID = 2985514550083374904L; /** * The lower document limit. */ private final long lower; /** * The upper document limit. */ private final long upper; /** * Flag indicating if indexes in this bucket can be merged. */ private final boolean allowMerge; /** * Creates a new <code>IndexBucket</code>. Limits are both inclusive. * * @param lower document limit. * @param upper document limit. * @param allowMerge if indexes in this bucket can be merged. */ IndexBucket(long lower, long upper, boolean allowMerge) { this.lower = lower; this.upper = upper; this.allowMerge = allowMerge; } /** * Returns <code>true</code> if the number of documents fit in this * <code>IndexBucket</code>; otherwise <code>false</code> * * @param numDocs the number of documents. * @return <code>true</code> if <code>numDocs</code> fit. */ boolean fits(long numDocs) { return numDocs >= lower && numDocs <= upper; } /** * Returns <code>true</code> if indexes in this bucket can be merged. * * @return <code>true</code> if indexes in this bucket can be merged. */ boolean allowsMerge() { return allowMerge; } } private class Worker implements Runnable, IndexListener { /** * List of id <code>Term</code> that identify documents that were deleted * while a merge was running. */ private final List<Term> deletedDocuments = Collections.synchronizedList(new ArrayList<Term>()); /** * A latch that is set to zero when this worker is unblocked. */ private final CountDownLatch start = new CountDownLatch(1); /** * Flag that indicates whether this worker has finished its work. */ private final AtomicBoolean terminated = new AtomicBoolean(false); /** * The merge task. */ private final Merge task; /** * Creates a new worker which is initially blocked. Call * {@link #unblock()} to unblock it. * * @param task the merge task. */ private Worker(Merge task) { this.task = task; } /** * Implements the index merging. */ public void run() { // worker is initially suspended try { try { start.await(); } catch (InterruptedException e) { // check if we should quit if (!quit.get()) { // enqueue task again and retry with another thread addMergeTask(task); } return; } log.debug("accepted merge request"); // get readers String[] names = new String[task.indexes.length]; for (int i = 0; i < task.indexes.length; i++) { names[i] = task.indexes[i].name; } try { log.debug("create new index"); PersistentIndex index = multiIndex.getOrCreateIndex(null); boolean success = false; try { log.debug("get index readers from MultiIndex"); IndexReader[] readers = multiIndex.getIndexReaders(names, IndexMerger.this); try { // do the merge long time = System.currentTimeMillis(); index.addIndexes(readers); time = System.currentTimeMillis() - time; int docCount = 0; for (IndexReader reader : readers) { docCount += reader.numDocs(); } log.info("merged " + docCount + " documents in " + time + " ms into " + index.getName() + "."); } finally { for (IndexReader reader : readers) { try { Util.closeOrRelease(reader); } catch (IOException e) { log.warn("Unable to close IndexReader: " + e); } } } // inform multi index // if we cannot get the sync immediately we have to quit Lock shared = indexReplacement.readLock(); if (!shared.tryLock()) { log.debug("index merging canceled"); return; } try { log.debug("replace indexes"); multiIndex.replaceIndexes(names, index, deletedDocuments); } finally { shared.unlock(); } success = true; } finally { if (!success) { // delete index log.debug("deleting index " + index.getName()); multiIndex.deleteIndex(index); // add task again and retry addMergeTask(task); } } } catch (Throwable e) { log.error("Error while merging indexes: ", e); } } finally { synchronized (terminated) { terminated.set(true); terminated.notifyAll(); } synchronized (busyMergers) { busyMergers.remove(this); busyMergers.notifyAll(); } log.debug("Worker finished"); } } /** * @inheritDoc */ public void documentDeleted(Term id) { log.debug("document deleted: " + id.text()); deletedDocuments.add(id); } /** * Unblocks this worker and allows it to start with the index merging. */ void unblock() { start.countDown(); } /** * Waits until this worker is finished or the specified amount of time * has elapsed. * * @param timeout the timeout in milliseconds. * @throws InterruptedException if the current thread is interrupted * while waiting for this worker to * terminate. */ void join(long timeout) throws InterruptedException { synchronized (terminated) { while (!terminated.get()) { terminated.wait(timeout); } } } /** * @return <code>true</code> if this worker is still alive and not yet * terminated. */ boolean isAlive() { return !terminated.get(); } } }