/*
* Copyright 2008 The Topaz Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* Contributions:
*/
package org.mulgara.resolver.lucene;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.mulgara.util.io.MappingUtil;
/**
* A cache of lucene index-readers and index-writers. Opening a lucene index-reader or writer is
* fairly expensive, so caching them can provide substantial performance gains. No cache-expiry
* has been implemented, however; the assumption is that there will a limited number of lucene
* models.
*
* <p>This also manages the setting up and removal of the index directory.
*
* @created 2008-09-28
* @author Ronald Tschalär
* @licence Apache License v2.0
*/
public class LuceneIndexerCache {
private static final Logger logger = Logger.getLogger(LuceneIndexerCache.class);
/* our caches */
private final Stack<ReaderInfo> freeReaders = new Stack<ReaderInfo>();
private final Stack<WriterInfo> freeWriters = new Stack<WriterInfo>();
private final Map<IndexReader,ReaderInfo> allocdReaders = new HashMap<IndexReader,ReaderInfo>();
private final Map<IndexWriter,WriterInfo> allocdWriters = new HashMap<IndexWriter,WriterInfo>();
/** The Directory for Lucene. */
private final FSDirectory luceneIndexDirectory;
/** Whether this cache has been closed */
private boolean closed = false;
/**
* Create a new cache.
*
* @param directory the directory to use for the indexes; it is created if it does not exist.
* @throws IOException if the directory is not readable, writable, or some other error occurs
* trying to access it
*/
public LuceneIndexerCache(String directory) throws IOException {
luceneIndexDirectory = FSDirectory.open(createOrValidateDirectory(directory));
clearLocks();
if (!IndexReader.indexExists(luceneIndexDirectory)) {
logger.debug("Fulltext string index does not yet exist in directory '" + directory + "', creating it now.");
createIndex();
}
if (logger.isDebugEnabled()) {
logger.debug("Fulltext string indexer cache initialized; directory =" + directory);
}
}
private void createIndex() throws IOException {
IndexWriter writer = getWriter();
try {
writer.commit();
} catch (Throwable t) {
writer.close();
throw (t instanceof IOException) ? (IOException)t : new IOException("Error creating new index", t);
}
returnWriter(writer, false);
}
private static File createOrValidateDirectory(String directory) throws IOException {
File indexDirectory = new File(directory);
// make the directory if it doesn't exist
if (!indexDirectory.exists()) {
indexDirectory.mkdirs();
}
// ensure the index directory is a directory
if (!indexDirectory.isDirectory()) {
logger.fatal("The fulltext string index directory '" + directory + "' is not a directory!");
throw new IOException("The fulltext string index directory '" + directory +
"' is not a directory!");
}
// ensure the directory is writeable
if (!indexDirectory.canWrite()) {
logger.fatal("The fulltext string index directory '" + directory + "' is not writeable!");
throw new IOException("The fulltext string index directory '" + directory +
"' is not writeable!");
}
return indexDirectory;
}
private void clearLocks() throws IOException {
if (IndexWriter.isLocked(luceneIndexDirectory)) {
/* This shouldn't happen unless mulgara was shut down abruptly since mulgara has a single
* writer lock.
*/
logger.warn("Fulltext index directory '" + luceneIndexDirectory + "' is locked; forcibly unlocking");
IndexWriter.unlock(luceneIndexDirectory);
}
if (IndexWriter.isLocked(luceneIndexDirectory)) {
throw new IOException("Fulltext index directory '" + luceneIndexDirectory + "' is locked; " +
"forced unlock failed; giving up");
}
}
/**
* Get an index-reader. It must be returned via {@link #returnReader}.
*
* @return the index-reader
*/
public synchronized IndexReader getReader() throws IOException {
if (closed) throw new IllegalStateException("IndexerCache has been closed: " + luceneIndexDirectory);
ReaderInfo ri = freeReaders.pop();
if (ri == null) {
ri = new ReaderInfo(luceneIndexDirectory);
if (logger.isDebugEnabled()) logger.debug("Creating new index-reader: " + ri.reader);
} else if (ri.needsRefresh) {
IndexReader reader = ri.reader.reopen();
if (reader != ri.reader) {
forceClose(ri.reader);
ri = new ReaderInfo(reader);
if (logger.isDebugEnabled()) logger.debug("Refreshed index-reader: " + ri.reader);
} else {
ri.needsRefresh = false;
}
} else {
if (logger.isDebugEnabled()) logger.debug("Reusing index-reader: " + ri.reader);
}
allocdReaders.put(ri.reader, ri);
return ri.reader;
}
/**
* Get an index-writer. It must be returned via {@link #returnWriter}.
*
* @return the index-writer
*/
public synchronized IndexWriter getWriter() throws IOException {
if (closed) throw new IllegalStateException("IndexerCache has been closed: " + luceneIndexDirectory);
WriterInfo wi = freeWriters.pop();
if (wi == null) {
wi = new WriterInfo(luceneIndexDirectory);
if (logger.isDebugEnabled()) logger.debug("Created new index-writer: " + wi.writer);
} else if (wi.needsRefresh) {
wi.writer.close();
wi = new WriterInfo(luceneIndexDirectory);
if (logger.isDebugEnabled()) logger.debug("Refreshed index-writer: " + wi.writer);
} else {
if (logger.isDebugEnabled()) logger.debug("Reusing index-writer: " + wi.writer);
}
allocdWriters.put(wi.writer, wi);
return wi.writer;
}
/**
* Return an index-reader to the cache. It must have been previously retrieved through {@link
* #getReader}.
*
* @param reader the reader to return
* @param close if true the reader is closed and not returned to the pool
*/
public synchronized void returnReader(IndexReader reader, boolean close) {
ReaderInfo ri = allocdReaders.remove(reader);
if (close || closed) {
try {
forceClose(reader);
if (logger.isDebugEnabled()) logger.debug("Closed index-reader: " + reader);
} catch (IOException ioe) {
logger.warn("Error closing index-reader: " + reader);
}
} else {
freeReaders.push(ri);
if (logger.isDebugEnabled()) logger.debug("Returned index-reader: " + reader);
}
}
/**
* Return an index-writer to the cache. It must have been previously retrieved through {@link
* #getWriter}.
*
* @param writer the writer to return
* @param close if true the writer is closed and not returned to the pool
*/
public synchronized void returnWriter(IndexWriter writer, boolean close) {
WriterInfo wi = allocdWriters.remove(writer);
if (close || closed) {
try {
writer.close();
if (logger.isDebugEnabled()) logger.debug("Closed index-writer: " + writer);
} catch (IOException ioe) {
logger.warn("Error closing index-writer: " + writer);
}
} else {
freeWriters.push(wi);
if (logger.isDebugEnabled()) logger.debug("Returned index-writer: " + writer);
}
}
/**
* Notify the cache that the index has been modified. All newly returned indexers will be
* appropriately refreshed.
*
* @param writer the writer that made the modification
*/
public synchronized void indexModified(IndexWriter writer) {
for (RefreshableObject ro : freeReaders) ro.needsRefresh = true;
for (RefreshableObject ro : freeWriters) ro.needsRefresh = true;
for (RefreshableObject ro : allocdReaders.values()) ro.needsRefresh = true;
for (WriterInfo wi : allocdWriters.values()) {
if (wi.writer != writer) wi.needsRefresh = true;
}
if (logger.isDebugEnabled()) logger.debug("All indexers marked for refresh");
}
/**
* @return the directory being used for the indexes
*/
public String getDirectory() {
return luceneIndexDirectory.toString();
}
/**
* Remove all index files from the current initialised directory. WARNING : All
* files are removed in the specified directory. This is probably only useful for
* testing. See {@link FullTextStringIndex#removeAll} for an alternate solution.
*
* @return return true if successful at removing all index files
* @throws IOException if an exception occurs while attempting to delete the files
*/
public synchronized boolean removeAllIndexes() throws IOException {
if (allocdWriters.size() > 0 || allocdReaders.size() > 0) {
logger.warn("Attempting to remove all indexes while readers or writers are still active");
}
if (logger.isDebugEnabled()) {
logger.debug("Removing all indexes from " + luceneIndexDirectory);
}
for (String file : luceneIndexDirectory.listAll()) {
delete(luceneIndexDirectory, file);
}
return luceneIndexDirectory.getDirectory().delete();
}
/**
* Close this cache. All pooled index readers/writers are closed; readers/writers that are still
* in use will be closed upon being returned.
*/
public synchronized void close() {
if (allocdWriters.size() > 0 || allocdReaders.size() > 0) {
logger.warn("Attempting to close indexer-cache while readers or writers are still active");
}
closed = true;
for (ReaderInfo ri : freeReaders) {
try {
forceClose(ri.reader);
} catch (IOException ioe) {
logger.error("Error closing index-reader: " + ri.reader, ioe);
}
}
freeReaders.clear(); // Let them be GC'ed. This is important for mapped files.
for (WriterInfo wi : freeWriters) {
try {
wi.writer.close();
} catch (IOException ioe) {
logger.error("Error closing index-writer: " + wi.writer, ioe);
}
}
freeWriters.clear(); // Let them be GC'ed. This is important for mapped files.
if (logger.isDebugEnabled()) logger.debug("IndexerCacher closed: " + luceneIndexDirectory);
}
private static abstract class RefreshableObject {
public boolean needsRefresh = false;
}
private static class ReaderInfo extends RefreshableObject {
public final IndexReader reader;
public ReaderInfo(Directory directory) throws IOException {
reader = IndexReader.open(directory, true);
}
public ReaderInfo(IndexReader reader) {
this.reader = reader;
}
}
private static class WriterInfo extends RefreshableObject {
public final IndexWriter writer;
public WriterInfo(Directory directory) throws IOException {
Version v = LuceneResolver.LUCENE_VERSION;
writer = new IndexWriter(directory, new IndexWriterConfig(v, new StandardAnalyzer(v)));
}
}
private static class Stack<T> extends ArrayList<T> {
/** Serialization ID */
private static final long serialVersionUID = -8597253123267228667L;
public void push(T obj) {
add(obj);
}
public T pop() {
return size() > 0 ? remove(size() - 1) : null;
}
}
private static final int MAX_RETRIES = 10;
/**
* Attempt to force the deletion of a file. In the case of memory-mapped files, this might take a
* few tries because mapped byte buffers aren't unmapped until they're garbage collected.
*/
private static void delete(FSDirectory dir, String file) throws IOException {
int retries = MAX_RETRIES;
for(;;) {
try {
dir.deleteFile(file);
break;
} catch (IOException e) {
if (retries-- == 0) {
logger.warn("Couldn't delete file '" + file + "' from directory " + dir.getDirectory() +
" after " + MAX_RETRIES + " retries", e);
throw e;
}
MappingUtil.systemCleanup();
}
}
}
/** Force the reader closed by cleaning up outstanding references.
* @throws IOException */
private static void forceClose(IndexReader reader) throws IOException {
try {
if (reader.getRefCount() > 1) {
// This likely indicates a FullTextStringIndexTuples that was not properly closed.
// Closing it now is likely to break any existing references to it.
logger.warn("Forcing close of a reader that was returned to the cache with active references: " + System.identityHashCode(reader));
while (reader.getRefCount() > 1) {
reader.decRef();
}
}
} catch (IOException e) {
logger.error("Can't decrement reference count to abandoned reader", e);
throw e;
} finally {
reader.close();
}
}
}