LuceneIndexService.java example

Explorer
neo4j-components-svn-master
/*
 * Copyright (c) 2002-2009 "Neo Technology,"
 *     Network Engine for Objects in Lund AB [http://neotechnology.com]
 *
 * This file is part of Neo4j.
 * 
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package org.neo4j.index.lucene;

import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.transaction.Synchronization;
import javax.transaction.SystemException;
import javax.transaction.Transaction;
import javax.transaction.TransactionManager;
import javax.transaction.xa.XAResource;

import org.apache.lucene.Hits;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.NotInTransactionException;
import org.neo4j.helpers.collection.CombiningIterator;
import org.neo4j.helpers.collection.IteratorUtil;
import org.neo4j.index.IndexHits;
import org.neo4j.index.IndexService;
import org.neo4j.index.impl.GenericIndexService;
import org.neo4j.index.impl.IdToNodeIterator;
import org.neo4j.index.impl.SimpleIndexHits;
import org.neo4j.kernel.AbstractGraphDatabase;
import org.neo4j.kernel.Config;
import org.neo4j.kernel.impl.cache.LruCache;
import org.neo4j.kernel.impl.transaction.LockManager;
import org.neo4j.kernel.impl.transaction.TxModule;
import org.neo4j.kernel.impl.util.ArrayMap;

/**
 * An implementation of {@link IndexService} which uses Lucene as backend.
 * Additional features to {@link IndexService} is:
 * <ul>
 * <li>{@link #enableCache(String, int)} will enable a LRU cache for the
 * specific key and will boost performance in performance-critical areas.</li>
 * <li>{@link #getNodes(String, Object, Sort)} where you can pass in a
 * {@link Sort} option to control in which order Lucene returns the results</li>
 * <li>{@link #setLazySearchResultThreshold(int)} will control the threshold for
 * when a search result is considered big enough to be returned as a lazy
 * iteration, making {@link #getNodes(String, Object)} return very fast, but
 * skips caching</li>
 * </ul>
 * 
 * See more information at
 * http://wiki.neo4j.org/content/Indexing_with_IndexService
 */
public class LuceneIndexService extends GenericIndexService
{
    /**
     * The default value for {@link #getLazySearchResultThreshold()}
     */
    public static final int DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD = 100;

    protected static final String DOC_ID_KEY = "id";
    protected static final String DOC_INDEX_KEY = "index";
    protected static final String DIR_NAME = "lucene";

    private final TransactionManager txManager;
    private final ConnectionBroker broker;
    private final LuceneDataSource xaDs;

    private int lazynessThreshold = DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD;

    /**
     * @param graphDb the {@link GraphDatabaseService} to use.
     */
    public LuceneIndexService( GraphDatabaseService graphDb )
    {
        super( graphDb );
        Config config = ((AbstractGraphDatabase) graphDb).getConfig();
        String luceneDirectory = config.getTxModule().getTxLogDirectory()
                                 + "/" + getDirName();
        TxModule txModule = config.getTxModule();
        txManager = txModule.getTxManager();
        byte resourceId[] = getXaResourceId();
        Map<Object, Object> params = new HashMap<Object, Object>(
                config.getParams() );
        params.put( LuceneIndexService.class, this );
        params.put( "dir", luceneDirectory );
        params.put( LockManager.class,
                config.getLockManager() );
        xaDs = (LuceneDataSource) txModule.registerDataSource( getDirName(),
                getDataSourceClass().getName(), resourceId, params, true );
        broker = new ConnectionBroker( txManager, xaDs );
        xaDs.setIndexService( this );
    }

    protected Class<? extends LuceneDataSource> getDataSourceClass()
    {
        return LuceneDataSource.class;
    }

    protected String getDirName()
    {
        return DIR_NAME;
    }

    protected byte[] getXaResourceId()
    {
        return "162373".getBytes();
    }

    /**
     * Enables an LRU cache for a specific index (specified by {@code key}) so
     * that the {@code maxNumberOfCachedEntries} number of results found with
     * {@link #getNodes(String, Object)} are cached for faster consecutive
     * lookups. It's preferred to enable cache at construction time.
     * 
     * @param key the index to enable cache for.
     * @param maxNumberOfCachedEntries the max size of the cache before old ones
     *            are flushed from the cache.
     */
    public void enableCache( String key, int maxNumberOfCachedEntries )
    {
        xaDs.enableCache( key, maxNumberOfCachedEntries );
    }
    
    /**
     * Returns the enabled LRU cache size for {@code key}. Cache is enabled
     * using {@link #enableCache(String, int)}. If cache hasn't been enabled
     * for {@code key} then {@code null} is returned.
     * 
     * @param key the key to get the enabled cache size for.
     * @return the max cache size for {@code key} or {@code null} if not
     * enabled for that key.
     */
    public Integer getEnabledCacheSize( String key )
    {
        return xaDs.getEnabledCacheSize( key );
    }

    /**
     * Sets the threshold for when a result is considered big enough to skip
     * cache and be returned as a fully lazy iterator so that
     * {@link #getNodes(String, Object)} will return very fast and all the
     * reading and fetching of nodes is done lazily before each step in the
     * iteration of the returned result. The default value is
     * {@link #DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD}.
     * 
     * @param numberOfHitsBeforeLazyLoading the threshold where results which
     *            are bigger than that threshold becomes lazy.
     */
    public void setLazySearchResultThreshold( int numberOfHitsBeforeLazyLoading )
    {
        this.lazynessThreshold = numberOfHitsBeforeLazyLoading;
        xaDs.invalidateCache();
    }

    /**
     * Returns the threshold for when a result is considered big enough to skip
     * cache and be returned as a fully lazy iterator so that
     * {@link #getNodes(String, Object)} will return very fast and all the
     * reading and fetching of nodes is done lazily before each step in the
     * iteration of the returned result. The default value is
     * {@link #DEFAULT_LAZY_SEARCH_RESULT_THRESHOLD}.
     * 
     * @return the threshold for when a result is considered big enough to be
     *         returned as a lazy iteration.
     */
    public int getLazySearchResultThreshold()
    {
        return this.lazynessThreshold;
    }

    /**
     * {@inheritDoc}
     * <p>
     * Note that this implementation will cast objects given as the value to
     * {@link java.lang.String}.
     */
    @Override
    public void index( Node node, String key, Object value )
    {
        super.index( node, key, value );
    }

    @Override
    protected void indexThisTx( Node node, String key, Object value )
    {
        assertArgumentNotNull( node, "node" );
        assertArgumentNotNull( key, "key" );
        assertArgumentNotNull( value, "value" );
        for ( Object arrayItem : asArray( value ) )
        {
            getConnection().index( node, key, arrayItem );
        }
    }

    /**
     * {@inheritDoc}
     * <p>
     * Note that this implementation will cast objects given as the value to
     * {@link java.lang.String}.
     */
    public IndexHits<Node> getNodes( String key, Object value )
    {
        return getNodes( key, value, null );
    }
    
    /**
     * Returns hits from the index (see {@link #getNodes(String, Object)}).
     * The result is sorted using {@code sortingOrNull}.
     * 
     * @param key the index to search in.
     * @param value the value to match hits for.
     * @param sortingOrNull how the result should be sorted.
     * @return the (sorted) results from this index lookup.
     */
    public IndexHits<Node> getNodes( String key, Object value, Sort sortingOrNull )
    {
        return getNodes( key, value, null, sortingOrNull );
    }
    
    /**
     * A method for calling {@link #getNodes(String, Object)} using exact
     * matching. For this class it's equivalent to calling
     * {@link #getNodes(String, Object)}, but for subclasses, such as
     * {@link LuceneFulltextIndexService} it is useful for it to be able to
     * do queries with exact matching, even though it's a fulltext index.
     * @param key the index to search in.
     * @param value the value to match hits for.
     * @return nodes that have been indexed with key and value
     */
    public IndexHits<Node> getNodesExactMatch( String key, Object value )
    {
        return getNodes( key, value, null );
    }
    
    /**
     * Just like {@link #getNodes(String, Object)}, but with sorted result.
     * 
     * @param key the index to query.
     * @param value the value to query for.
     * @param sortingOrNull lucene sorting behaviour for the result. Ignored if
     *            {@code null}.
     * @return nodes that has been indexed with key and value, optionally sorted
     *         with {@code sortingOrNull}.
     */
    protected IndexHits<Node> getNodes( String key, Object value, Object matching,
            Sort sortingOrNull )
    {
        List<Long> nodeIds = new ArrayList<Long>();
        LuceneXaConnection con = getReadOnlyConnection();
        LuceneTransaction luceneTx = null;
        if ( con != null )
        {
            luceneTx = getReadOnlyConnection().getLuceneTx();
        }
        Set<Long> addedNodes = Collections.emptySet();
        Set<Long> deletedNodes = Collections.emptySet();
        boolean deleted = false;
        if ( luceneTx != null && luceneTx.hasModifications( key ) )
        {
            addedNodes = luceneTx.getNodesFor( key, value, matching );
            nodeIds.addAll( addedNodes );
            deletedNodes = luceneTx.getDeletedNodesFor( key, value, matching );
            deleted = luceneTx.getIndexDeleted( key );
        }
        xaDs.getReadLock();
        Iterator<Long> nodeIdIterator = null;
        Integer nodeIdIteratorSize = null;
        IndexSearcherRef searcher = null;
        boolean isLazy = false;
        try
        {
            searcher = xaDs.getIndexSearcher( key );
            if ( searcher != null && !deleted )
            {
                LruCache<String, Collection<Long>> cachedNodesMap = xaDs.getFromCache( key );
                String valueAsString = value.toString();
                boolean foundInCache = fillFromCache( cachedNodesMap, nodeIds,
                        key, valueAsString, deletedNodes );
                if ( !foundInCache )
                {
                    DocToIdIterator searchedNodeIds = searchForNodes( searcher,
                            key, value, matching, sortingOrNull, deletedNodes );
                    if ( searchedNodeIds.size() >= this.lazynessThreshold )
                    {
                        // Instantiate a lazy iterator
                        isLazy = true;
                        if ( cachedNodesMap != null )
                        {
                            cachedNodesMap.remove( valueAsString );
                        }

                        Collection<Iterator<Long>> iterators = new ArrayList<Iterator<Long>>();
                        iterators.add( nodeIds.iterator() );
                        iterators.add( searchedNodeIds );
                        nodeIdIterator = new CombiningIterator<Long>( iterators );
                        nodeIdIteratorSize = nodeIds.size() + searchedNodeIds.size();
                    }
                    else
                    {
                        // Loop through result here (and cache it if possible)
                        readNodesFromHits( searchedNodeIds, nodeIds,
                                cachedNodesMap, valueAsString );
                    }
                }
            }
        }
        finally
        {
            // The DocToIdIterator closes the IndexSearchRef instance anyways,
            // or the LazyIterator if it's a lazy one. So no need here.
            xaDs.releaseReadLock();
        }

        if ( nodeIdIterator == null )
        {
            nodeIdIterator = nodeIds.iterator();
            nodeIdIteratorSize = nodeIds.size();
        }

        IndexHits<Node> hits = new SimpleIndexHits<Node>( IteratorUtil.asIterable(
                instantiateIdToNodeIterator( nodeIdIterator ) ), nodeIdIteratorSize );
        if ( isLazy )
        {
            hits = new LazyIndexHits<Node>( hits, searcher );
        }
        return hits;
    }

    private void readNodesFromHits( DocToIdIterator searchedNodeIds,
            Collection<Long> nodeIds,
            LruCache<String, Collection<Long>> cachedNodesMap,
            String valueAsString )
    {
        ArrayList<Long> readNodeIds = new ArrayList<Long>();
        while ( searchedNodeIds.hasNext() )
        {
            Long readNodeId = searchedNodeIds.next();
            nodeIds.add( readNodeId );
            readNodeIds.add( readNodeId );
        }
        if ( cachedNodesMap != null )
        {
            cachedNodesMap.put( valueAsString, readNodeIds );
        }
    }

    private boolean fillFromCache(
            LruCache<String, Collection<Long>> cachedNodesMap,
            List<Long> nodeIds, String key, String valueAsString,
            Set<Long> deletedNodes )
    {
        boolean found = false;
        if ( cachedNodesMap != null )
        {
            Collection<Long> cachedNodes = cachedNodesMap.get( valueAsString );
            if ( cachedNodes != null )
            {
                found = true;
                for ( Long cachedNodeId : cachedNodes )
                {
                    if ( deletedNodes == null ||
                            !deletedNodes.contains( cachedNodeId ) )
                    {
                        nodeIds.add( cachedNodeId );
                    }
                }
            }
        }
        return found;
    }

    protected Iterator<Node> instantiateIdToNodeIterator(
            final Iterator<Long> ids )
    {
        return new IdToNodeIterator( ids, getGraphDb() );
    }

    /**
     * 
     * @param key the key
     * @param value the value
     * @param matching an object describing what kind of matching to do.
     * The type this object is is solely up to the implementation.
     * @return the {@link Query} formed from key/value.
     */
    protected Query formQuery( String key, Object value, Object matching )
    {
        return new TermQuery( new Term( DOC_INDEX_KEY, value.toString() ) );
    }

    /**
     * Returns a lazy iterator with the node ids.
     */
    private DocToIdIterator searchForNodes( IndexSearcherRef searcher,
            String key, Object value, Object matching, Sort sortingOrNull, Set<Long> deletedNodes )
    {
        Query query = formQuery( key, value, matching );
        try
        {
            searcher.incRef();
            Hits hits = new Hits( searcher.getSearcher(), query, null, sortingOrNull );
            return new DocToIdIterator( new HitsIterator( hits ), deletedNodes,
                    searcher );
        }
        catch ( IOException e )
        {
            throw new RuntimeException( "Unable to search for " + key + ","
                                        + value, e );
        }
    }
    
    /**
     * A method for calling {@link #getSingleNode(String, Object)} using exact
     * matching. For this class it's equivalent to calling
     * {@link #getSingleNode(String, Object)}, but for subclasses, such as
     * {@link LuceneFulltextIndexService} it is useful for it to be able to
     * do queries with exact matching, even though it's a fulltext index.
     * @param key the index to search in.
     * @param value the value to match hits for.
     * @return the single node for the query, or {@code null} if no hit found.
     * If more than one hit was found a {@link RuntimeException} is thrown.
     */
    public Node getSingleNodeExactMatch( String key, Object value )
    {
        return getSingleNode( key, value, null );
    }
    
    public Node getSingleNode( String key, Object value )
    {
        return getSingleNode( key, value, null );
    }

    protected Node getSingleNode( String key, Object value, Object matching )
    {
        IndexHits<Node> hits = null;
        try
        {
            hits = getNodes( key, value, matching, null );
            Iterator<Node> nodes = hits.iterator();
            Node node = nodes.hasNext() ? nodes.next() : null;
            if ( nodes.hasNext() )
            {
                throw new RuntimeException( "More than one node for " + key
                                            + "=" + value );
            }
            return node;
        }
        finally
        {
            if ( hits != null )
            {
                hits.close();
            }
        }
    }

    @Override
    protected void removeIndexThisTx( Node node, String key, Object value )
    {
        assertArgumentNotNull( node, "node" );
        assertArgumentNotNull( key, "key" );
        assertArgumentNotNull( value, "value" );
        for ( Object arrayItem : asArray( value ) )
        {
            getConnection().removeIndex( node, key, arrayItem );
        }
    }

    private Object[] asArray( Object propertyValue )
    {
        if ( propertyValue.getClass().isArray() )
        {
            int length = Array.getLength( propertyValue );
            Object[] result = new Object[ length ];
            for ( int i = 0; i < length; i++ )
            {
                result[ i ] = Array.get( propertyValue, i );
            }
            return result;
        }
        else
        {
            return new Object[] { propertyValue };
        }
    }
    
    @Override
    public synchronized void shutdown()
    {
        super.shutdown();
        TxModule txModule = ((AbstractGraphDatabase) getGraphDb()).getConfig().getTxModule();
        if ( txModule.getXaDataSourceManager().hasDataSource( getDirName() ) )
        {
            txModule.getXaDataSourceManager().unregisterDataSource(
                    getDirName() );
        }
        xaDs.close();
    }

    LuceneXaConnection getConnection()
    {
        return broker.acquireResourceConnection();
    }

    LuceneXaConnection getReadOnlyConnection()
    {
        return broker.acquireReadOnlyResourceConnection();
    }
    
    private static class ConnectionBroker
    {
        private final ArrayMap<Transaction, LuceneXaConnection> txConnectionMap = new ArrayMap<Transaction, LuceneXaConnection>(
                5, true, true );
        private final TransactionManager transactionManager;
        private final LuceneDataSource xaDs;

        ConnectionBroker( TransactionManager transactionManager,
                LuceneDataSource xaDs )
        {
            this.transactionManager = transactionManager;
            this.xaDs = xaDs;
        }

        LuceneXaConnection acquireResourceConnection()
        {
            LuceneXaConnection con = null;
            Transaction tx = this.getCurrentTransaction();
            if ( tx == null )
            {
                throw new NotInTransactionException();
            }
            con = txConnectionMap.get( tx );
            if ( con == null )
            {
                try
                {
                    con = (LuceneXaConnection) xaDs.getXaConnection();
                    if ( !tx.enlistResource( con.getXaResource() ) )
                    {
                        throw new RuntimeException( "Unable to enlist '"
                                                    + con.getXaResource()
                                                    + "' in " + tx );
                    }
                    tx.registerSynchronization( new TxCommitHook( tx ) );
                    txConnectionMap.put( tx, con );
                }
                catch ( javax.transaction.RollbackException re )
                {
                    String msg = "The transaction is marked for rollback only.";
                    throw new RuntimeException( msg, re );
                }
                catch ( javax.transaction.SystemException se )
                {
                    String msg = "TM encountered an unexpected error condition.";
                    throw new RuntimeException( msg, se );
                }
            }
            return con;
        }

        LuceneXaConnection acquireReadOnlyResourceConnection()
        {
            Transaction tx = this.getCurrentTransaction();
            return tx != null ? txConnectionMap.get( tx ) : null;
        }
        
        void releaseResourceConnectionsForTransaction( Transaction tx )
                throws NotInTransactionException
        {
            LuceneXaConnection con = txConnectionMap.remove( tx );
            if ( con != null )
            {
                con.destroy();
            }
        }

        void delistResourcesForTransaction() throws NotInTransactionException
        {
            Transaction tx = this.getCurrentTransaction();
            if ( tx == null )
            {
                throw new NotInTransactionException();
            }
            LuceneXaConnection con = txConnectionMap.get( tx );
            if ( con != null )
            {
                try
                {
                    tx.delistResource( con.getXaResource(),
                            XAResource.TMSUCCESS );
                }
                catch ( IllegalStateException e )
                {
                    throw new RuntimeException(
                            "Unable to delist lucene resource from tx", e );
                }
                catch ( SystemException e )
                {
                    throw new RuntimeException(
                            "Unable to delist lucene resource from tx", e );
                }
            }
        }

        private Transaction getCurrentTransaction()
                throws NotInTransactionException
        {
            try
            {
                return transactionManager.getTransaction();
            }
            catch ( SystemException se )
            {
                throw new NotInTransactionException(
                        "Error fetching transaction for current thread", se );
            }
        }

        private class TxCommitHook implements Synchronization
        {
            private final Transaction tx;

            TxCommitHook( Transaction tx )
            {
                this.tx = tx;
            }

            public void afterCompletion( int param )
            {
                releaseResourceConnectionsForTransaction( tx );
            }

            public void beforeCompletion()
            {
                delistResourcesForTransaction();
            }
        }
    }

    public void removeIndex( Node node, String key )
    {
        assertArgumentNotNull( node, "node" );
        assertArgumentNotNull( key, "key" );
        getConnection().removeIndex( node, key, null );
    }

    private void assertArgumentNotNull( Object object, String name )
    {
        if ( object == null )
        {
            throw new IllegalArgumentException( name + " is null" );
        }
    }

    public void removeIndex( String key )
    {
        assertArgumentNotNull( key, "key" );
        getConnection().removeIndex( null, key, null );
    }
}