/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.exoplatform.services.jcr.impl.core.query.lucene;
import EDU.oswego.cs.dl.util.concurrent.Mutex;
import EDU.oswego.cs.dl.util.concurrent.Sync;
import org.apache.commons.collections.Buffer;
import org.apache.commons.collections.BufferUtils;
import org.apache.commons.collections.buffer.UnboundedFifoBuffer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* Merges indexes in a separate daemon thread.
*/
class IndexMerger extends Thread implements IndexListener
{
/**
* Logger instance for this class.
*/
private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.IndexMerger");
/**
* Marker task to signal the background thread to quit.
*/
private static final Merge QUIT = new Merge(new Index[0]);
/**
* minMergeDocs config parameter.
*/
private int minMergeDocs = SearchIndex.DEFAULT_MIN_MERGE_DOCS;
/**
* maxMergeDocs config parameter
*/
private int maxMergeDocs = SearchIndex.DEFAULT_MAX_MERGE_DOCS;
/**
* mergeFactor config parameter
*/
private int mergeFactor = SearchIndex.DEFAULT_MERGE_FACTOR;
/**
* Queue of merge Tasks
*/
private final Buffer mergeTasks = BufferUtils.blockingBuffer(new UnboundedFifoBuffer());
/**
* List of id <code>Term</code> that identify documents that were deleted
* while a merge was running.
*/
private final List<Term> deletedDocuments = Collections.synchronizedList(new ArrayList<Term>());
/**
* List of <code>IndexBucket</code>s in ascending document limit.
*/
private final List<IndexBucket> indexBuckets = new ArrayList<IndexBucket>();
/**
* The <code>MultiIndex</code> this index merger is working on.
*/
private final MultiIndex multiIndex;
/**
* Monitor object to synchronize merge calculation.
*/
private final Object lock = new Object();
/**
* Mutex that is acquired when replacing indexes on MultiIndex.
*/
private final Sync indexReplacement = new Mutex();
/**
* When released, indicates that this index merger is idle.
*/
private final Sync mergerIdle = new Mutex();
/**
* Creates an <code>IndexMerger</code>.
*
* @param multiIndex the <code>MultiIndex</code>.
*/
IndexMerger(MultiIndex multiIndex)
{
this.multiIndex = multiIndex;
setName("Index Merger" + (multiIndex.workspaceId == null ? "" : " " + multiIndex.workspaceId));
setDaemon(true);
try
{
mergerIdle.acquire();
}
catch (InterruptedException e)
{
// will never happen, lock is free upon construction
throw new InternalError("Unable to acquire mutex after construction");
}
}
/**
* Informs the index merger that an index was added / created.
*
* @param name the name of the index.
* @param numDocs the number of documents it contains.
*/
@SuppressWarnings("unchecked")
void indexAdded(String name, int numDocs)
{
if (numDocs < 0)
{
throw new IllegalArgumentException("numDocs must be positive");
}
// multiple threads may enter this method:
// - the background thread of this IndexMerger, when it replaces indexes
// after a successful merge
// - a regular thread that updates the workspace
//
// therefore we have to synchronize this block
synchronized (lock)
{
// initially create buckets
if (indexBuckets.size() == 0)
{
long lower = 0;
long upper = minMergeDocs;
while (upper < maxMergeDocs)
{
indexBuckets.add(new IndexBucket(lower, upper, true));
lower = upper + 1;
upper *= mergeFactor;
}
// one with upper = maxMergeDocs
indexBuckets.add(new IndexBucket(lower, maxMergeDocs, false));
// and another one as overflow, just in case...
indexBuckets.add(new IndexBucket(maxMergeDocs + 1, Long.MAX_VALUE, false));
}
// put index in bucket
IndexBucket bucket = (IndexBucket)indexBuckets.get(indexBuckets.size() - 1);
for (int i = 0; i < indexBuckets.size(); i++)
{
bucket = (IndexBucket)indexBuckets.get(i);
if (bucket.fits(numDocs))
{
break;
}
}
bucket.add(new Index(name, numDocs));
if (log.isDebugEnabled())
{
log.debug("index added: name=" + name + ", numDocs=" + numDocs);
}
// if bucket does not allow merge, we don't have to continue
if (!bucket.allowsMerge())
{
return;
}
// check if we need a merge
if (bucket.size() >= mergeFactor)
{
long targetMergeDocs = bucket.upper;
targetMergeDocs = Math.min(targetMergeDocs * mergeFactor, maxMergeDocs);
// sum up docs in bucket
List<Index> indexesToMerge = new ArrayList<Index>();
int mergeDocs = 0;
for (Iterator<Index> it = bucket.iterator(); it.hasNext() && mergeDocs <= targetMergeDocs;)
{
indexesToMerge.add(it.next());
}
if (indexesToMerge.size() > 2)
{
// found merge
Index[] idxs = (Index[])indexesToMerge.toArray(new Index[indexesToMerge.size()]);
bucket.removeAll(indexesToMerge);
if (log.isDebugEnabled())
{
log.debug("requesting merge for " + indexesToMerge);
}
mergeTasks.add(new Merge(idxs));
log.debug("merge queue now contains " + mergeTasks.size() + " tasks.");
}
}
}
}
/**
* {@inheritDoc}
*/
public void documentDeleted(Term id)
{
log.debug("document deleted: " + id.text());
deletedDocuments.add(id);
}
/**
* When the calling thread returns this index merger will be idle, that is
* there will be no merge tasks pending anymore. The method returns immediately
* if there are currently no tasks pending at all.
*/
void waitUntilIdle() throws InterruptedException
{
mergerIdle.acquire();
// and immediately release again
mergerIdle.release();
}
/**
* Signals this <code>IndexMerger</code> to stop and waits until it
* has terminated.
*/
@SuppressWarnings("unchecked")
void dispose()
{
log.debug("dispose IndexMerger");
// get mutex for index replacements
try
{
indexReplacement.acquire();
}
catch (InterruptedException e)
{
log.warn("Interrupted while acquiring index replacement sync: " + e);
// try to stop IndexMerger without the sync
}
// clear task queue
mergeTasks.clear();
// send quit
mergeTasks.add(QUIT);
log.debug("quit sent");
try
{
// give the merger thread some time to quit,
// it is possible that the merger is busy working on a large index.
// if that is the case we will just ignore it and the daemon will
// die without being able to finish the merge.
// at this point it is not possible anymore to replace indexes
// on the MultiIndex because we hold the indexReplacement Sync.
this.join(500);
if (isAlive())
{
log.info("Unable to stop IndexMerger. Daemon is busy.");
}
else
{
log.debug("IndexMerger thread stopped");
}
log.debug("merge queue size: " + mergeTasks.size());
}
catch (InterruptedException e)
{
log.warn("Interrupted while waiting for IndexMerger thread to terminate.");
}
}
/**
* Implements the index merging.
*/
public void run()
{
for (;;)
{
boolean isIdle = false;
if (mergeTasks.size() == 0)
{
mergerIdle.release();
isIdle = true;
}
Merge task = (Merge)mergeTasks.remove();
if (task == QUIT) // NOSONAR
{
mergerIdle.release();
break;
}
if (isIdle)
{
try
{
mergerIdle.acquire();
}
catch (InterruptedException e)
{
Thread.interrupted();
log.warn("Unable to acquire mergerIdle sync");
}
}
log.debug("accepted merge request");
// reset deleted documents
deletedDocuments.clear();
// get readers
String[] names = new String[task.indexes.length];
for (int i = 0; i < task.indexes.length; i++)
{
names[i] = task.indexes[i].name;
}
try
{
log.debug("create new index");
PersistentIndex index = multiIndex.getOrCreateIndex(null);
boolean success = false;
try
{
log.debug("get index readers from MultiIndex");
IndexReader[] readers = multiIndex.getIndexReaders(names, this);
try
{
// do the merge
long time = System.currentTimeMillis();
index.addIndexes(readers);
time = System.currentTimeMillis() - time;
int docCount = 0;
for (int i = 0; i < readers.length; i++)
{
docCount += readers[i].numDocs();
}
if (log.isDebugEnabled())
{
log.debug("merged " + docCount + " documents in " + time + " ms into " + index.getName() + ".");
}
}
finally
{
for (int i = 0; i < readers.length; i++)
{
try
{
Util.closeOrRelease(readers[i]);
}
catch (IOException e)
{
log.warn("Unable to close IndexReader: " + e);
}
}
}
// inform multi index
// if we cannot get the sync immediately we have to quit
if (!indexReplacement.attempt(0))
{
log.debug("index merging canceled");
// if index not passed to multiIndex, then it will never be closed
index.close();
break;
}
try
{
log.debug("replace indexes");
multiIndex.replaceIndexes(names, index, deletedDocuments);
}
finally
{
indexReplacement.release();
}
success = true;
}
finally
{
if (!success)
{
// delete index
log.debug("deleting index " + index.getName());
multiIndex.deleteIndex(index);
}
}
}
catch (Throwable e) //NOSONAR
{
log.error("Error while merging indexes: ", e);
}
}
if (log.isDebugEnabled())
{
log.debug("IndexMerger terminated");
}
}
//-----------------------< merge properties >-------------------------------
/**
* The merge factor.
*/
public void setMergeFactor(int mergeFactor)
{
this.mergeFactor = mergeFactor;
}
/**
* The initial threshold for number of documents to merge to a new index.
*/
public void setMinMergeDocs(int minMergeDocs)
{
this.minMergeDocs = minMergeDocs;
}
/**
* The maximum number of document to merge.
*/
public void setMaxMergeDocs(int maxMergeDocs)
{
this.maxMergeDocs = maxMergeDocs;
}
//------------------------------< internal >--------------------------------
/**
* Implements a simple struct that holds the name of an index and how
* many document it contains. <code>Index</code> is comparable using the
* number of documents it contains.
*/
private static final class Index implements Comparable<Index>
{
/**
* The name of the index.
*/
private final String name;
/**
* The number of documents the index contains.
*/
private final int numDocs;
/**
* Creates a new index struct.
*
* @param name name of an index.
* @param numDocs number of documents it contains.
*/
Index(String name, int numDocs)
{
this.name = name;
this.numDocs = numDocs;
}
/**
* Indexes are first ordered by {@link #numDocs} and then by {@link
* #name}.
*
* @param o the other <code>Index</code>.
* @return a negative integer, zero, or a positive integer as this
* Index is less than, equal to, or greater than the specified
* Index.
*/
public int compareTo(Index other)
{
int val = numDocs < other.numDocs ? -1 : (numDocs == other.numDocs ? 0 : 1);
if (val != 0)
{
return val;
}
else
{
return name.compareTo(other.name);
}
}
/**
* {@inheritDoc}
*/
public String toString()
{
return name + ":" + numDocs;
}
}
/**
* Defines a merge task, to merge a couple of indexes into a new index.
*/
private static final class Merge
{
private final Index[] indexes;
/**
* Merge task, to merge <code>indexes</code> into a new index with
* <code>name</code>.
*
* @param indexes the indexes to merge.
*/
Merge(Index[] indexes)
{
this.indexes = new Index[indexes.length];
System.arraycopy(indexes, 0, this.indexes, 0, indexes.length);
}
}
/**
* Implements a <code>List</code> with a document limit value. An
* <code>IndexBucket</code> contains {@link Index}es with documents less
* or equal the document limit of the bucket.
*/
private static final class IndexBucket extends ArrayList<Index>
{
private static final long serialVersionUID = 1885162315017837466L;
/**
* The lower document limit.
*/
private final long lower;
/**
* The upper document limit.
*/
private final long upper;
/**
* Flag indicating if indexes in this bucket can be merged.
*/
private final boolean allowMerge;
/**
* Creates a new <code>IndexBucket</code>. Limits are both inclusive.
*
* @param lower document limit.
* @param upper document limit.
* @param allowMerge if indexes in this bucket can be merged.
*/
IndexBucket(long lower, long upper, boolean allowMerge)
{
this.lower = lower;
this.upper = upper;
this.allowMerge = allowMerge;
}
/**
* Returns <code>true</code> if the number of documents fit in this
* <code>IndexBucket</code>; otherwise <code>false</code>
*
* @param numDocs the number of documents.
* @return <code>true</code> if <code>numDocs</code> fit.
*/
boolean fits(long numDocs)
{
return numDocs >= lower && numDocs <= upper;
}
/**
* Returns <code>true</code> if indexes in this bucket can be merged.
*
* @return <code>true</code> if indexes in this bucket can be merged.
*/
boolean allowsMerge()
{
return allowMerge;
}
}
}