/*
* Copyright (c) 2002-2009 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.index.lucene;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.index.IndexHits;
/**
* A {@link LuceneIndexService} which indexes the values with fulltext indexing.
* Fulltext means that the indexing process takes the values you throw in and
* tokenizes those into words so that you can query for those individual words
* in {@link #getNodes(String, Object)}. Also queries are case-insensitive.
*
* It stores more data per Lucene entry to make this possible. This makes it
* incompatible with {@link LuceneIndexService} so it has got its own XA
* resource ID. This means that you can have one {@link LuceneIndexService} and
* one {@link LuceneFulltextIndexService} for a {@link GraphDatabaseService}.
*
* See more information at
* http://wiki.neo4j.org/content/Indexing_with_IndexService#Fulltext_indexing
*/
public class LuceneFulltextIndexService extends LuceneIndexService
{
protected static final String DOC_INDEX_SOURCE_KEY = "index_source";
protected static final String FULLTEXT_DIR_NAME_POSTFIX = "-fulltext";
/**
* @param graphDb the {@link GraphDatabaseService} to use.
*/
public LuceneFulltextIndexService( GraphDatabaseService graphDb )
{
super( graphDb );
}
@Override
protected Class<? extends LuceneDataSource> getDataSourceClass()
{
return LuceneFulltextDataSource.class;
}
@Override
protected String getDirName()
{
return super.getDirName() + FULLTEXT_DIR_NAME_POSTFIX;
}
@Override
protected byte[] getXaResourceId()
{
return "262374".getBytes();
}
/**
* Since this is a "fulltext" index it changes the contract of this method
* slightly. It treats the {@code value} more like a query in than you can
* query for individual words in your indexed values.
*
* So if you've indexed node (1) with value "Andy Wachowski" and node (2)
* with "Larry Wachowski" you can expect this behaviour if you query for:
*
* <ul>
* <li>"addy" --> (1)</li>
* <li>"Andy" --> (1)</li>
* <li>"wachowski" --> (1), (2)</li>
* <li>"andy larry" --></li>
* <li>"larry Wachowski" --> (2)</li>
* <li>"wachowski Andy" --> (1)</li>
* </ul>
*/
@Override
public IndexHits<Node> getNodes( String key, Object value )
{
return super.getNodes( key, value );
}
/**
* Does a {@link #getNodes(String, Object)} using exact matching, so that
* it for this call behaves like {@link LuceneIndexService}.
* @param key the key.
* @param value the query.
* @return the result of the query.
*/
@Override
public IndexHits<Node> getNodesExactMatch( String key, Object value )
{
return getNodes( key, value, MatchingType.EXACT, null );
}
@Override
public Node getSingleNodeExactMatch( String key, Object value )
{
return getSingleNode( key, value, MatchingType.EXACT );
}
@Override
protected Query formQuery( String key, Object value, Object matching )
{
if ( matching == MatchingType.EXACT )
{
return new TermQuery( new Term( DOC_INDEX_SOURCE_KEY, value.toString() ) );
}
TokenStream stream = LuceneFulltextDataSource.LOWER_CASE_WHITESPACE_ANALYZER.tokenStream(
DOC_INDEX_KEY, new StringReader( value.toString().toLowerCase() ) );
BooleanQuery booleanQuery = new BooleanQuery();
try
{
while ( stream.incrementToken() )
{
String term = stream.getAttribute( TermAttribute.class ).term();
booleanQuery.add( new TermQuery( new Term( DOC_INDEX_KEY, term ) ), Occur.MUST );
}
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
return booleanQuery;
}
@Override
public void enableCache( String key, int maxNumberOfCachedEntries )
{
// For now, or is it just not feasable
throw new UnsupportedOperationException();
}
static enum MatchingType
{
DEFAULT,
EXACT
}
}