package com.krickert.lucene;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
/**
* This is a scaffolding to make it easier to inject the index writer.
*
* Since the index writer has all these set methods to set the ram buffer sizes,
* merge factor, or any other index methods
*
* @author krickert
*
*/
public class IndexWriterManager {
private final IndexWriter writer;
/**
* Constructor that's made to create a new index writer to be used by the
* application
*
* @param directory
* the directory object for the indexer
* @param analyzer
* the analyzer type
* @param numThreads
* the number of threads to create to concurrently run in the indexer
* @param queueSize
* the size of the queue for new documents to add
* @param ramBufferSizeMb
* the size, in MB of the ram buffer to use before flushing to disk
* @param mergeFactor
* the marge factor - don't make this too big or you'll run out of
* file handles
* @throws CorruptIndexException
* when things go nuts
* @throws LockObtainFailedException
* there's another indexer running if you see this
* @throws IOException
* when something is wrong with the FS - permissions of the file or
* you run out of space
*/
public IndexWriterManager(Directory directory, Analyzer analyzer, int numThreads, int queueSize, int ramBufferSizeMb, int mergeFactor)
throws CorruptIndexException, LockObtainFailedException, IOException {
checkNotNull(directory);
this.writer = new ThreadedIndexWriter(directory, analyzer, true, numThreads, queueSize, MaxFieldLength.UNLIMITED);
// NOTE: max buffered docs is going to get set to DISABLE_AUTO_FLUSH because
// it will help maximize the performance for indexing
writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
// the size of the ram buffer before flushing everything to disk. Makes
// stuff faster for indexing for the cost of memory.
writer.setRAMBufferSizeMB(ramBufferSizeMb);
// The number of segments that are merged by add document. Don't go too
// crazy.. them inodes get mad
writer.setMergeFactor(mergeFactor);
}
public IndexWriter getWriter() {
return writer;
}
public void finishIndex() throws CorruptIndexException, IOException {
this.writer.commit();
this.writer.close();
}
}