package prefuse.data.search;
import java.io.IOException;
import java.util.HashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/**
* Adapter class for interfacing with the Lucene search engine. By default,
* instances of this class use an in-memory search index for English language
* text, for use within a single application session. The class can, however,
* be parameterized for any number of other configurations, including accessing
* persistent search indices.
*
* @version 1.0
* @author <a href="http://jheer.org">jeffrey heer</a>
*/
public class LuceneSearcher {
/** Default Document field used to index text. */
public static final String FIELD = "prefuse-text";
/** Document field used to store the document ID number. */
public static final String ID = "prefuse-id";
private Directory directory;
private Analyzer analyzer;
private String[] fields;
private Searcher searcher;
private IndexReader reader;
private IndexWriter writer;
private boolean m_readMode = true;
private boolean m_readOnly = false;
private HashMap m_hitCountCache;
/**
* Create a new LuceneSearcher using an in-memory search index.
*/
public LuceneSearcher() {
this(new RAMDirectory(), FIELD, false);
}
/**
* Create a new LuceneSearcher using the specified search index location.
* @param dir the Lucene Directory indicating the search index to use.
*/
public LuceneSearcher(Directory dir) {
this(dir, FIELD, false);
}
/**
* Create a new LuceneSearcher using a specified search index location,
* a particular Document field to index, and given read/write status.
* @param dir the Lucene Directory indicating the search index to use.
* @param field the Lucene Document field that should be indexed.
* @param readOnly if this index is read-only or is writable.
*/
public LuceneSearcher(Directory dir, String field, boolean readOnly) {
this(dir, new String[]{field}, readOnly);
}
/**
* Create a new LuceneSearcher using a specified search index location,
* a particular Document fields to index, and given read/write status.
* @param dir the Lucene Directory indicating the search index to use.
* @param fields the Lucene Document fields that should be indexed.
* @param readOnly if this index is read-only or is writable.
*/
public LuceneSearcher(Directory dir, String[] fields, boolean readOnly) {
m_hitCountCache = new HashMap();
directory = dir;
analyzer = new StandardAnalyzer();
this.fields = (String[])fields.clone();
try {
writer = new IndexWriter(directory, analyzer, !readOnly);
writer.close();
writer = null;
} catch (IOException e1) {
e1.printStackTrace();
}
m_readOnly = readOnly;
if ( !readOnly ) {
setReadMode(false);
} else {
m_readMode = false;
setReadMode(true);
}
}
// ------------------------------------------------------------------------
/**
* Sets if this LuceneSearcher is in read mode or write mode. In read more
* searches can be issued, in write mode new Documents can be indexed.
* Read-only LuceneSearcher instances can not be put into write mode.
* @param mode true for read mode, false for write mode.
* @return true if the mode was successfully set, false otherwise.
*/
public boolean setReadMode(boolean mode) {
// return false if this is read-only
if ( m_readOnly && mode == false ) return false;
// do nothing if already in the mode
if ( m_readMode == mode ) return true;
// otherwise switch modes
if ( !mode ) {
// close any open searcher and reader
try {
if ( searcher != null ) searcher.close();
if ( reader != null ) reader.close();
} catch ( Exception e ) {
e.printStackTrace();
return false;
}
// open the writer
try {
writer = new IndexWriter(directory, analyzer, false);
} catch (IOException e1) {
e1.printStackTrace();
return false;
}
} else {
// optimize index and close writer
try {
if ( writer != null ) {
writer.optimize();
writer.close();
}
} catch (IOException e1) {
e1.printStackTrace();
return false;
}
// open the reader and searcher
try {
reader = IndexReader.open(directory);
searcher = new IndexSearcher(reader);
} catch ( Exception e ) {
e.printStackTrace();
return false;
}
}
m_readMode = mode;
return true;
}
/**
* Searches the Lucene index using the given query String, returns an object
* which provides access to the search results.
* @param query the search query
* @return the search Hits
* @throws ParseException if the query is not parsed successfully
* @throws IOException if an input/ouput error occurs
* @throws IllegalStateException if the searcher is in write mode
*/
public Hits search(String query) throws ParseException, IOException {
if ( m_readMode ) {
Query q;
if ( fields.length == 1 ) {
q = QueryParser.parse(query, fields[0], analyzer);
} else {
q = MultiFieldQueryParser.parse(query, fields, analyzer);
}
return searcher.search(q);
} else {
throw new IllegalStateException(
"Searches can only be performed when " +
"the LuceneSearcher is in read mode");
}
}
/**
* Return the result count for the given search query. To allow quick
* repeated look ups, the hit count is cached (this cache is cleared
* whenever a change to the search index occurs).
* @param query the search query
* @return the number of matches to the query
* @throws ParseException if the query is not parsed successfully
* @throws IOException if an input/ouput error occurs
* @throws IllegalStateException if the searcher is in write mode
*/
public int numHits(String query) throws ParseException, IOException {
Integer count;
if ( (count=(Integer)m_hitCountCache.get(query)) == null ) {
Hits hits = search(query);
count = new Integer(hits.length());
m_hitCountCache.put(query, count);
}
return count.intValue();
}
/**
* Add a document to the Lucene search index.
* @param d the Document to add
* @throws IllegalStateException if the searcher is not in write mode
*/
public void addDocument(Document d) {
if ( !m_readMode ) {
try {
writer.addDocument(d);
m_hitCountCache.clear();
} catch (IOException e) {
e.printStackTrace();
}
} else {
throw new IllegalStateException(
"Documents can not be added to the index unless" +
"the LuceneSearcher is not in read mode");
}
}
/**
* Returns the Analyzer used to process text. See Lucene documentation
* for more details.
* @return returns the analyzer.
*/
public Analyzer getAnalyzer() {
return analyzer;
}
/**
* Sets the Analyzer used to process text. See Lucene documentation
* for more details.
* @param analyzer the analyzer to set
*/
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
/**
* Returns the indexed Document fields. These fields determine which
* fields are indexed as Documents are added and which fields are
* queried over when searches are issued.
* @return returns the indexed Document fields
*/
public String[] getFields() {
return (String[])fields.clone();
}
/**
* Sets the indexed Document fields. These fields determine which
* fields are indexed as Documents are added and which fields are
* queried over when searches are issued.
* param fields the indexed Document fields to use
*/
public void setFields(String[] fields) {
this.fields = (String[])fields.clone();
}
/**
* Returns the Lucene IndexReader. See Lucene documentation
* for more details.
* @return teturns the IndexReader.
*/
public IndexReader getIndexReader() {
return reader;
}
/**
* Returns the Lucene IndexSearcher. See Lucene documentation
* for more details.
* @return returns the IndexSearcher.
*/
public Searcher getIndexSearcher() {
return searcher;
}
/**
* Indicates if ths LuceneSearcher is read-only.
* @return true if read-only, false if writes are allowed
*/
public boolean isReadOnly() {
return m_readOnly;
}
} // end of class LuceneSearcher