/*
* Copyright (c) 2002-2009 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.index.lucene;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.transaction.Synchronization;
import javax.transaction.SystemException;
import javax.transaction.Transaction;
import javax.transaction.TransactionManager;
import javax.transaction.xa.XAResource;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.neo4j.commons.iterator.CombiningIterator;
import org.neo4j.commons.iterator.IteratorAsIterable;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.NotInTransactionException;
import org.neo4j.index.IndexHits;
import org.neo4j.index.IndexService;
import org.neo4j.index.impl.GenericIndexService;
import org.neo4j.index.impl.IdToNodeIterator;
import org.neo4j.index.impl.SimpleIndexHits;
import org.neo4j.kernel.EmbeddedGraphDatabase;
import org.neo4j.kernel.impl.cache.LruCache;
import org.neo4j.kernel.impl.transaction.LockManager;
import org.neo4j.kernel.impl.transaction.TxModule;
import org.neo4j.kernel.impl.util.ArrayMap;
/**
* An implementation of {@link IndexService} which uses Lucene as backend.
* Additional features to {@link IndexService} is:
* <ul>
* <li>{@link #enableCache(String, int)} will enable a LRU cache for the
* specific key and will boost performance in performance-critical areas.</li>
* <li>{@link #getNodes(String, Object, Sort)} where you can pass in a
* {@link Sort} option to control in which order Lucene returns the results</li>
* <li>{@link #setLazySearchResultThreshold(int)} will control the threshold for
* when a search result is considered big enough to be returned as a lazy
* iteration, making {@link #getNodes(String, Object)} return very fast, but
* skips caching</li>
* </ul>
*
* See more information at
* http://wiki.neo4j.org/content/Indexing_with_IndexService
*/
public class LuceneIndexService extends GenericIndexService
{
/**
* The default value for {@link #getLazySearchResultThreshold()}
*/
public static final int DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD = 100;
protected static final String DOC_ID_KEY = "id";
protected static final String DOC_INDEX_KEY = "index";
protected static final String DIR_NAME = "lucene";
private final TransactionManager txManager;
private final ConnectionBroker broker;
private final LuceneDataSource xaDs;
private int lazynessThreshold = DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD;
/**
* @param graphDb the {@link GraphDatabaseService} to use.
*/
public LuceneIndexService( GraphDatabaseService graphDb )
{
super( graphDb );
EmbeddedGraphDatabase embeddedGraphDb = ( (EmbeddedGraphDatabase) graphDb );
String luceneDirectory = embeddedGraphDb.getConfig().getTxModule().getTxLogDirectory()
+ "/" + getDirName();
TxModule txModule = embeddedGraphDb.getConfig().getTxModule();
txManager = txModule.getTxManager();
byte resourceId[] = getXaResourceId();
Map<Object, Object> params = getDefaultParams();
params.put( "dir", luceneDirectory );
params.put( LockManager.class,
embeddedGraphDb.getConfig().getLockManager() );
xaDs = (LuceneDataSource) txModule.registerDataSource( getDirName(),
getDataSourceClass().getName(), resourceId, params, true );
broker = new ConnectionBroker( txManager, xaDs );
xaDs.setIndexService( this );
}
protected Class<? extends LuceneDataSource> getDataSourceClass()
{
return LuceneDataSource.class;
}
protected String getDirName()
{
return DIR_NAME;
}
protected byte[] getXaResourceId()
{
return "162373".getBytes();
}
private Map<Object, Object> getDefaultParams()
{
Map<Object, Object> params = new HashMap<Object, Object>();
params.put( LuceneIndexService.class, this );
return params;
}
/**
* Enables an LRU cache for a specific index (specified by {@code key}) so
* that the {@code maxNumberOfCachedEntries} number of results found with
* {@link #getNodes(String, Object)} are cached for faster consecutive
* lookups. It's preferred to enable cache at construction time.
*
* @param key the index to enable cache for.
* @param maxNumberOfCachedEntries the max size of the cache before old ones
* are flushed from the cache.
*/
public void enableCache( String key, int maxNumberOfCachedEntries )
{
xaDs.enableCache( key, maxNumberOfCachedEntries );
}
/**
* Sets the threshold for when a result is considered big enough to skip
* cache and be returned as a fully lazy iterator so that
* {@link #getNodes(String, Object)} will return very fast and all the
* reading and fetching of nodes is done lazily before each step in the
* iteration of the returned result. The default value is
* {@link #DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD}.
*
* @param numberOfHitsBeforeLazyLoading the threshold where results which
* are bigger than that threshold becomes lazy.
*/
public void setLazySearchResultThreshold( int numberOfHitsBeforeLazyLoading )
{
this.lazynessThreshold = numberOfHitsBeforeLazyLoading;
xaDs.invalidateCache();
}
/**
* Returns the threshold for when a result is considered big enough to skip
* cache and be returned as a fully lazy iterator so that
* {@link #getNodes(String, Object)} will return very fast and all the
* reading and fetching of nodes is done lazily before each step in the
* iteration of the returned result. The default value is
* {@link #DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD}.
*
* @return the threshold for when a result is considered big enough to be
* returned as a lazy iteration.
*/
public int getLazySearchResultThreshold()
{
return this.lazynessThreshold;
}
/**
* {@inheritDoc}
* <p>
* Note that this implementation will cast objects given as the value to
* {@link java.lang.String}.
*/
@Override
public void index( Node node, String key, Object value )
{
super.index( node, key, value );
}
@Override
protected void indexThisTx( Node node, String key, Object value )
{
getConnection().index( node, key, value );
}
/**
* {@inheritDoc}
* <p>
* Note that this implementation will cast objects given as the value to
* {@link java.lang.String}.
*/
public IndexHits<Node> getNodes( String key, Object value )
{
return getNodes( key, value, null );
}
/**
* Just like {@link #getNodes(String, Object)}, but with sorted result.
*
* @param key the index to query.
* @param value the value to query for.
* @param sortingOrNull lucene sorting behaviour for the result. Ignored if
* {@code null}.
* @return nodes that has been indexed with key and value, optionally sorted
* with {@code sortingOrNull}.
*/
public IndexHits<Node> getNodes( String key, Object value,
Sort sortingOrNull )
{
List<Long> nodeIds = new ArrayList<Long>();
LuceneTransaction luceneTx = getConnection().getLuceneTx();
Set<Long> addedNodes = Collections.emptySet();
Set<Long> deletedNodes = Collections.emptySet();
if ( luceneTx != null )
{
addedNodes = luceneTx.getNodesFor( key, value );
nodeIds.addAll( addedNodes );
deletedNodes = luceneTx.getDeletedNodesFor( key, value );
}
xaDs.getReadLock();
Iterator<Long> nodeIdIterator = null;
Integer nodeIdIteratorSize = null;
IndexSearcherRef searcher = null;
boolean isLazy = false;
try
{
searcher = xaDs.getIndexSearcher( key );
if ( searcher != null )
{
LruCache<String, Collection<Long>> cachedNodesMap = xaDs.getFromCache( key );
String valueAsString = value.toString();
boolean foundInCache = fillFromCache( cachedNodesMap, nodeIds,
key, valueAsString, deletedNodes );
if ( !foundInCache )
{
DocToIdIterator searchedNodeIds = searchForNodes( searcher,
key, value, sortingOrNull, deletedNodes );
if ( searchedNodeIds.size() >= this.lazynessThreshold )
{
// Instantiate a lazy iterator
isLazy = true;
if ( cachedNodesMap != null )
{
cachedNodesMap.remove( valueAsString );
}
Collection<Iterator<Long>> iterators = new ArrayList<Iterator<Long>>();
iterators.add( nodeIds.iterator() );
iterators.add( searchedNodeIds );
nodeIdIterator = new CombiningIterator<Long>( iterators );
nodeIdIteratorSize = nodeIds.size()
+ searchedNodeIds.size();
}
else
{
// Loop through result here (and cache it if possible)
readNodesFromHits( searchedNodeIds, nodeIds,
cachedNodesMap, valueAsString );
}
}
}
}
finally
{
// The DocToIdIterator closes the IndexSearchRef instance anyways,
// or the LazyIterator if it's a lazy one. So no need here.
xaDs.releaseReadLock();
}
if ( nodeIdIterator == null )
{
nodeIdIterator = nodeIds.iterator();
nodeIdIteratorSize = nodeIds.size();
}
IndexHits<Node> hits = new SimpleIndexHits<Node>(
new IteratorAsIterable<Node>(
instantiateIdToNodeIterator( nodeIdIterator ) ),
nodeIdIteratorSize );
if ( isLazy )
{
hits = new LazyIndexHits<Node>( hits, searcher );
}
return hits;
}
private void readNodesFromHits( DocToIdIterator searchedNodeIds,
Collection<Long> nodeIds,
LruCache<String, Collection<Long>> cachedNodesMap,
String valueAsString )
{
ArrayList<Long> readNodeIds = new ArrayList<Long>();
while ( searchedNodeIds.hasNext() )
{
Long readNodeId = searchedNodeIds.next();
nodeIds.add( readNodeId );
readNodeIds.add( readNodeId );
}
if ( cachedNodesMap != null )
{
cachedNodesMap.put( valueAsString, readNodeIds );
}
}
private boolean fillFromCache(
LruCache<String, Collection<Long>> cachedNodesMap,
List<Long> nodeIds, String key, String valueAsString,
Set<Long> deletedNodes )
{
boolean found = false;
if ( cachedNodesMap != null )
{
Collection<Long> cachedNodes = cachedNodesMap.get( valueAsString );
if ( cachedNodes != null )
{
found = true;
for ( Long cachedNodeId : cachedNodes )
{
if ( !deletedNodes.contains( cachedNodeId ) )
{
nodeIds.add( cachedNodeId );
}
}
}
}
return found;
}
protected Iterator<Node> instantiateIdToNodeIterator(
final Iterator<Long> ids )
{
return new IdToNodeIterator( ids, getGraphDb() );
}
protected Query formQuery( String key, Object value )
{
return new TermQuery( new Term( DOC_INDEX_KEY, value.toString() ) );
}
/**
* Returns a lazy iterator with the node ids.
*/
private DocToIdIterator searchForNodes( IndexSearcherRef searcher,
String key, Object value, Sort sortingOrNull, Set<Long> deletedNodes )
{
Query query = formQuery( key, value );
try
{
searcher.incRef();
Hits hits = sortingOrNull != null ? searcher.getSearcher().search(
query, sortingOrNull ) : searcher.getSearcher().search(
query );
return new DocToIdIterator( new HitsIterator( hits ), deletedNodes,
searcher );
}
catch ( IOException e )
{
throw new RuntimeException( "Unable to search for " + key + ","
+ value, e );
}
}
public Node getSingleNode( String key, Object value )
{
IndexHits<Node> hits = null;
try
{
hits = getNodes( key, value );
Iterator<Node> nodes = hits.iterator();
Node node = nodes.hasNext() ? nodes.next() : null;
if ( nodes.hasNext() )
{
throw new RuntimeException( "More than one node for " + key
+ "=" + value );
}
return node;
}
finally
{
if ( hits != null )
{
hits.close();
}
}
}
@Override
protected void removeIndexThisTx( Node node, String key, Object value )
{
if ( value == null )
{
throw new IllegalArgumentException( "Value is null" );
}
getConnection().removeIndex( node, key, value );
}
@Override
public synchronized void shutdown()
{
super.shutdown();
EmbeddedGraphDatabase embeddedGraphDb = ( (EmbeddedGraphDatabase) getGraphDb() );
TxModule txModule = embeddedGraphDb.getConfig().getTxModule();
if ( txModule.getXaDataSourceManager().hasDataSource( getDirName() ) )
{
txModule.getXaDataSourceManager().unregisterDataSource(
getDirName() );
}
xaDs.close();
}
LuceneXaConnection getConnection()
{
return broker.acquireResourceConnection();
}
private static class ConnectionBroker
{
private final ArrayMap<Transaction, LuceneXaConnection> txConnectionMap = new ArrayMap<Transaction, LuceneXaConnection>(
5, true, true );
private final TransactionManager transactionManager;
private final LuceneDataSource xaDs;
ConnectionBroker( TransactionManager transactionManager,
LuceneDataSource xaDs )
{
this.transactionManager = transactionManager;
this.xaDs = xaDs;
}
LuceneXaConnection acquireResourceConnection()
{
LuceneXaConnection con = null;
Transaction tx = this.getCurrentTransaction();
con = txConnectionMap.get( tx );
if ( con == null )
{
try
{
con = (LuceneXaConnection) xaDs.getXaConnection();
if ( !tx.enlistResource( con.getXaResource() ) )
{
throw new RuntimeException( "Unable to enlist '"
+ con.getXaResource()
+ "' in " + tx );
}
tx.registerSynchronization( new TxCommitHook( tx ) );
txConnectionMap.put( tx, con );
}
catch ( javax.transaction.RollbackException re )
{
String msg = "The transaction is marked for rollback only.";
throw new RuntimeException( msg, re );
}
catch ( javax.transaction.SystemException se )
{
String msg = "TM encountered an unexpected error condition.";
throw new RuntimeException( msg, se );
}
}
return con;
}
void releaseResourceConnectionsForTransaction( Transaction tx )
throws NotInTransactionException
{
LuceneXaConnection con = txConnectionMap.remove( tx );
if ( con != null )
{
con.destroy();
}
}
void delistResourcesForTransaction() throws NotInTransactionException
{
Transaction tx = this.getCurrentTransaction();
LuceneXaConnection con = txConnectionMap.get( tx );
if ( con != null )
{
try
{
tx.delistResource( con.getXaResource(),
XAResource.TMSUCCESS );
}
catch ( IllegalStateException e )
{
throw new RuntimeException(
"Unable to delist lucene resource from tx", e );
}
catch ( SystemException e )
{
throw new RuntimeException(
"Unable to delist lucene resource from tx", e );
}
}
}
private Transaction getCurrentTransaction()
throws NotInTransactionException
{
try
{
Transaction tx = transactionManager.getTransaction();
if ( tx == null )
{
throw new NotInTransactionException(
"No transaction found for current thread" );
}
return tx;
}
catch ( SystemException se )
{
throw new NotInTransactionException(
"Error fetching transaction for current thread", se );
}
}
private class TxCommitHook implements Synchronization
{
private final Transaction tx;
TxCommitHook( Transaction tx )
{
this.tx = tx;
}
public void afterCompletion( int param )
{
releaseResourceConnectionsForTransaction( tx );
}
public void beforeCompletion()
{
delistResourcesForTransaction();
}
}
}
}