/*
* JBoss, Home of Professional Open Source
* Copyright 2009 Red Hat Inc. and/or its affiliates and other
* contributors as indicated by the @author tags. All rights reserved.
* See the copyright.txt in the distribution for a full listing of
* individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.infinispan.lucene;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.LockFactory;
import org.infinispan.AdvancedCache;
import org.infinispan.Cache;
import org.infinispan.context.Flag;
import org.infinispan.lucene.locking.BaseLockFactory;
import org.infinispan.lucene.readlocks.DistributedSegmentReadLocker;
import org.infinispan.lucene.readlocks.SegmentReadLocker;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
/**
* An implementation of Lucene's {@link org.apache.lucene.store.Directory} which uses Infinispan to store Lucene indexes.
* As the RAMDirectory the data is stored in memory, but provides some additional flexibility:
* <p><b>Passivation, LRU or LIRS</b> Bigger indexes can be configured to passivate cleverly selected chunks of data to a cache store.
* This can be a local filesystem, a network filesystem, a database or custom cloud stores like S3. See Infinispan's core documentation for a full list of available implementations, or {@link org.infinispan.loaders.CacheStore} to implement more.</p>
* <p><b>Non-volatile memory</b> The contents of the index can be stored in it's entirety in such a store, so that on shutdown or crash of the system data is not lost.
* A copy of the index will be copied to the store in sync or async depending on configuration; In case you enable
* Infinispan's clustering even in case of async the segments are always duplicated synchronously to other nodes, so you can
* benefit from good reliability even while choosing the asynchronous mode to write the index to the slowest store implementations.</p>
* <p><b>Real-time change propagation</b> All changes done on a node are propagated at low latency to other nodes of the cluster; this was designed especially for
* interactive usage of Lucene, so that after an IndexWriter commits on one node new IndexReaders opened on any node of the cluster
* will be able to deliver updated search results.</p>
* <p><b>Distributed heap</b> Infinispan acts as a shared heap for the purpose of total memory consumption, so you can avoid hitting the slower disks even
* if the total size of the index can't fit in the memory of a single node: network is faster than disks, especially if the index
* is bigger than the memory available to cache it.</p>
* <p><b>Distributed locking</b>
* As default Lucene Directory implementations a global lock needs to protect the index from having more than an IndexWriter open; in case of a
* replicated or distributed index you need to enable a cluster-wide {@link org.apache.lucene.store.LockFactory}.
* This implementation uses by default {@link org.infinispan.lucene.locking.BaseLockFactory}; in case you want to apply changes during a JTA transaction
* see also {@link org.infinispan.lucene.locking.TransactionalLockFactory}.
* </p>
* <p><b>Combined store patterns</b> It's possible to combine different stores and passivation policies, so that each nodes shares the index changes
* quickly to other nodes, offloads less frequently used data to a per-node local filesystem, and the cluster also coordinates to keeps a safe copy on a shared store.</p>
*
* @since 4.0
* @author Sanne Grinovero
* @author Lukasz Moren
* @see org.apache.lucene.store.Directory
* @see org.apache.lucene.store.LockFactory
* @see org.infinispan.lucene.locking.BaseLockFactory
* @see org.infinispan.lucene.locking.TransactionalLockFactory
*/
@SuppressWarnings("unchecked")
public class InfinispanDirectory extends Directory {
/**
* Used as default chunk size, can be overriden at construction time.
* Each Lucene index segment is splitted into parts with default size defined here
*/
public final static int DEFAULT_BUFFER_SIZE = 16 * 1024;
private static final Log log = LogFactory.getLog(InfinispanDirectory.class);
// own flag required if we are not in this same package what org.apache.lucene.store.Directory,
// access type will be changed in the next Lucene version
volatile boolean isOpen = true;
private final AdvancedCache<FileCacheKey, FileMetadata> metadataCache;
private final AdvancedCache<ChunkCacheKey, Object> chunksCache;
// indexName is required when one common cache is used
private final String indexName;
// chunk size used in this directory, static filed not used as we want to have different chunk
// size per dir
private final int chunkSize;
private final FileListOperations fileOps;
private final SegmentReadLocker readLocks;
/**
* @param metadataCache the cache to be used for all smaller metadata: prefer replication over distribution, avoid eviction
* @param chunksCache the cache to use for the space consuming segments: prefer distribution, enable eviction if needed
* @param indexName the unique index name, useful to store multiple indexes in the same caches
* @param lf the LockFactory to be used by IndexWriters. @see org.infinispan.lucene.locking
* @param chunkSize segments are fragmented in chunkSize bytes; larger values are more efficient for searching but less for distribution and network replication
* @param readLocker @see org.infinispan.lucene.readlocks for some implementations; you might be able to provide more efficient implementations by controlling the IndexReader's lifecycle.
*/
public InfinispanDirectory(Cache<?, ?> metadataCache, Cache<?, ?> chunksCache, String indexName, LockFactory lf, int chunkSize, SegmentReadLocker readLocker) {
checkNotNull(metadataCache, "metadataCache");
checkNotNull(chunksCache, "chunksCache");
checkNotNull(indexName, "indexName");
checkNotNull(lf, "LockFactory");
checkNotNull(readLocker, "SegmentReadLocker");
if (chunkSize <= 0)
throw new IllegalArgumentException("chunkSize must be a positive integer");
this.metadataCache = (AdvancedCache<FileCacheKey, FileMetadata>) metadataCache.getAdvancedCache();
this.chunksCache = (AdvancedCache<ChunkCacheKey, Object>) chunksCache.getAdvancedCache();
this.indexName = indexName;
this.lockFactory = lf;
this.lockFactory.setLockPrefix(this.getLockID());
this.chunkSize = chunkSize;
this.fileOps = new FileListOperations(this.metadataCache, indexName);
this.readLocks = readLocker;
}
public InfinispanDirectory(Cache<?, ?> cache, String indexName, int chunkSize, SegmentReadLocker readLocker) {
this(cache, cache, indexName, makeDefaultLockFactory(cache, indexName), chunkSize, readLocker);
}
/**
* This constructor assumes that three different caches are being used with specialized configurations for each
* cache usage
* @param metadataCache contains the metadata of stored elements
* @param chunksCache cache containing the bulk of the index; this is the larger part of data
* @param distLocksCache cache to store locks; should be replicated and not using a persistent CacheStore
* @param indexName identifies the index; you can store different indexes in the same set of caches using different identifiers
* @param chunkSize the maximum size in bytes for each chunk of data: larger sizes offer better search performance
* but might be problematic to handle during network replication or storage
*/
public InfinispanDirectory(Cache<?, ?> metadataCache, Cache<?, ?> chunksCache, Cache<?, ?> distLocksCache, String indexName, int chunkSize) {
this(metadataCache, chunksCache, indexName, makeDefaultLockFactory(distLocksCache, indexName),
chunkSize, makeDefaultSegmentReadLocker(metadataCache, chunksCache, distLocksCache, indexName));
}
/**
* @param cache the cache to use to store the index
* @param indexName identifies the index; you can store different indexes in the same set of caches using different identifiers
*/
public InfinispanDirectory(Cache<?, ?> cache, String indexName) {
this(cache, cache, cache, indexName, DEFAULT_BUFFER_SIZE);
}
public InfinispanDirectory(Cache<?, ?> cache) {
this(cache, cache, cache, "", DEFAULT_BUFFER_SIZE);
}
/**
* {@inheritDoc}
*/
public String[] list() {
checkIsOpen();
Set<String> filesList = fileOps.getFileList();
String[] array = filesList.toArray(new String[0]);
return array;
}
/**
* {@inheritDoc}
*/
@Override
public boolean fileExists(String name) {
checkIsOpen();
return fileOps.getFileList().contains(name);
}
/**
* {@inheritDoc}
*/
@Override
public long fileModified(String name) {
checkIsOpen();
FileMetadata fileMetadata = fileOps.getFileMetadata(name);
if (fileMetadata == null) {
return 0L;
}
else {
return fileMetadata.getLastModified();
}
}
/**
* {@inheritDoc}
*/
@Override
public void touchFile(String fileName) {
checkIsOpen();
FileMetadata file = fileOps.getFileMetadata(fileName);
if (file == null) {
return;
}
else {
FileCacheKey key = new FileCacheKey(indexName, fileName);
file.touch();
metadataCache.put(key, file);
}
}
/**
* {@inheritDoc}
*/
@Override
public void deleteFile(String name) {
checkIsOpen();
fileOps.deleteFileName(name);
readLocks.deleteOrReleaseReadLock(name);
if (log.isDebugEnabled()) {
log.debugf("Removed file: %s from index: %s", name, indexName);
}
}
/**
* {@inheritDoc}
*/
public void renameFile(String from, String to) {
checkIsOpen();
// preparation: copy all chunks to new keys
int i = -1;
Object ob;
do {
ChunkCacheKey fromChunkKey = new ChunkCacheKey(indexName, from, ++i);
ob = chunksCache.get(fromChunkKey);
if (ob == null) {
break;
}
ChunkCacheKey toChunkKey = new ChunkCacheKey(indexName, to, i);
chunksCache.withFlags(Flag.SKIP_REMOTE_LOOKUP, Flag.SKIP_CACHE_LOAD).put(toChunkKey, ob);
} while (true);
// rename metadata first
boolean batching = metadataCache.startBatch();
FileCacheKey fromKey = new FileCacheKey(indexName, from);
FileMetadata metadata = (FileMetadata) metadataCache.get(fromKey);
metadataCache.put(new FileCacheKey(indexName, to), metadata);
fileOps.removeAndAdd(from, to);
if (batching) metadataCache.endBatch(true);
// now trigger deletion of old file chunks:
readLocks.deleteOrReleaseReadLock(from);
if (log.isTraceEnabled()) {
log.tracef("Renamed file from: %s to: %s in index %s", from, to, indexName);
}
}
/**
* {@inheritDoc}
*/
@Override
public long fileLength(String name) {
checkIsOpen();
FileMetadata fileMetadata = fileOps.getFileMetadata(name);
if (fileMetadata == null) {
return 0L;//as in FSDirectory (RAMDirectory throws an exception instead)
}
else {
return fileMetadata.getSize();
}
}
/**
* {@inheritDoc}
*/
@Override
public IndexOutput createOutput(String name) {
final FileCacheKey key = new FileCacheKey(indexName, name);
// creating new file, metadata is added on flush() or close() of IndexOutPut
return new InfinispanIndexOutput(metadataCache, chunksCache, key, chunkSize, fileOps);
}
/**
* {@inheritDoc}
*/
@Override
public IndexInput openInput(String name) throws IOException {
final FileCacheKey fileKey = new FileCacheKey(indexName, name);
FileMetadata fileMetadata = (FileMetadata) metadataCache.get(fileKey);
if (fileMetadata == null) {
throw new FileNotFoundException("Error loading medatada for index file: " + fileKey);
}
else if (fileMetadata.getSize() <= fileMetadata.getBufferSize()) {
//files smaller than chunkSize don't need a readLock
return new SingleChunkIndexInput(chunksCache, fileKey, fileMetadata);
}
else {
boolean locked = readLocks.acquireReadLock(name);
if (!locked) {
// safest reaction is to tell this file doesn't exist anymore.
throw new FileNotFoundException("Error loading medatada for index file: " + fileKey);
}
return new InfinispanIndexInput(chunksCache, fileKey, fileMetadata, readLocks);
}
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
isOpen = false;
}
private void checkIsOpen() throws AlreadyClosedException {
if (!isOpen) {
throw new AlreadyClosedException("this Directory is closed");
}
}
@Override
public String toString() {
return "InfinispanDirectory{" + "indexName='" + indexName + '\'' + '}';
}
/** new name for list() in Lucene 3.0 **/
@Override
public String[] listAll() {
return list();
}
/**
* @return The value of indexName, same constant as provided to the constructor.
*/
public String getIndexName() {
return indexName;
}
private static LockFactory makeDefaultLockFactory(Cache<?, ?> cache, String indexName) {
checkNotNull(cache, "cache");
checkNotNull(indexName, "indexName");
return new BaseLockFactory(cache, indexName);
}
private static SegmentReadLocker makeDefaultSegmentReadLocker(Cache<?, ?> metadataCache, Cache<?, ?> chunksCache, Cache<?, ?> distLocksCache, String indexName) {
checkNotNull(distLocksCache, "distLocksCache");
checkNotNull(indexName, "indexName");
return new DistributedSegmentReadLocker((Cache<Object, Integer>) distLocksCache, chunksCache, metadataCache, indexName);
}
private static void checkNotNull(Object v, String objectname) {
if (v == null)
throw new IllegalArgumentException(objectname + " must not be null");
}
}