/*
* Copyright 2000-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.services.search.lucene;
// Java imports
import java.io.File;
import java.io.IOException;
import java.net.URL;
import javax.servlet.ServletConfig;
import java.util.Collection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
// Jetspeed imports
import org.apache.commons.collections.MultiHashMap;
import org.apache.commons.collections.MultiMap;
import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
import org.apache.jetspeed.services.logging.JetspeedLogger;
import org.apache.jetspeed.services.search.HandlerFactory;
import org.apache.jetspeed.services.search.ObjectHandler;
import org.apache.jetspeed.services.search.ParsedObject;
import org.apache.jetspeed.services.search.BaseParsedObject;
import org.apache.jetspeed.services.search.SearchResults;
import org.apache.jetspeed.services.search.SearchService;
// Turbine imports
import org.apache.turbine.services.InitializationException;
import org.apache.turbine.services.resources.ResourceService;
import org.apache.turbine.services.servlet.TurbineServlet;
import org.apache.turbine.services.TurbineBaseService;
import org.apache.turbine.services.TurbineServices;
// Lucene imports
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
/**
* Lucene implementation of search service.
*
* @author <a href="mailto:taylor@apache.org">David Sean taylor</a>
* @author <a href="mailto:caius1440@hotmail.com">Jeremy Ford</a>
* @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a>
* @version $Id: LuceneSearchService.java,v 1.10 2004/03/05 03:49:15 jford Exp $
*/
public class LuceneSearchService extends TurbineBaseService implements SearchService
{
/**
* Static initialization of the logger for this class
*/
private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(LuceneSearchService.class.getName());
private static final int KEYWORD = 0;
private static final int TEXT = 1;
private static final String CONFIG_DIRECTORY = "directory";
private File rootDir = null;
private String indexRoot = null;
/**
* This is the early initialization method called by the
* Turbine <code>Service</code> framework
* @param conf The <code>ServletConfig</code>
* @exception throws a <code>InitializationException</code> if the service
* fails to initialize
*/
public synchronized void init(ServletConfig conf) throws InitializationException
{
// already initialized
if (getInit())
{
return;
}
initConfiguration(conf);
// initialization done
setInit(true);
}
/**
* This is the lateinitialization method called by the
* Turbine <code>Service</code> framework
*
* @exception throws a <code>InitializationException</code> if the service
* fails to initialize
*/
public void init() throws InitializationException
{
logger.info("Late init for " + SearchService.SERVICE_NAME + " called");
while (!getInit())
{
//Not yet...
try
{
Thread.sleep(100);
logger.info("Waiting for init of " + SearchService.SERVICE_NAME + "...");
}
catch (InterruptedException ie)
{
logger.error("Exception", ie);
}
}
}
/**
* This is the shutdown method called by the
* Turbine <code>Service</code> framework
*/
public void shutdown()
{
}
/**
* Loads the configuration parameters for this service from the
* JetspeedResources.properties file.
*
* @exception throws a <code>InitializationException</code> if the service
* fails to initialize
*/
private void initConfiguration(ServletConfig conf) throws InitializationException
{
if (getInit())
{
return;
}
// get configuration parameters from Jetspeed Resources
ResourceService serviceConf = ((TurbineServices) TurbineServices.getInstance())
.getResources(SearchService.SERVICE_NAME);
// Get config properties
indexRoot = serviceConf.getString(CONFIG_DIRECTORY);
//
// The following section opens or creates the search index
//
//
rootDir = new File(indexRoot);
//If the rootDir does not exist, treat it as context relative
if (!rootDir.exists())
{
if (indexRoot != null)
{
String rootDirPath = TurbineServlet.getRealPath("") + indexRoot;
rootDir = new File(rootDirPath);
if (!rootDir.exists())
{
rootDir.mkdir();
logger.info("Created index directory '" + rootDir.getPath() + "'");
}
}
}
try
{
Searcher searcher = null;
searcher = new IndexSearcher(rootDir.getPath());
searcher.close();
}
catch (Exception e)
{
try
{
IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), true);
indexWriter.close();
indexWriter = null;
logger.info("Created Lucene Index in " + rootDir.getPath());
}
catch (Exception e1)
{
logger.error(this.getClass().getName() + ".initConfiguration - Getting or creating IndexSearcher", e);
throw new InitializationException("Getting or creating Index Searcher");
}
}
//Mark that we are done
setInit(true);
}
/**
* Search
*
* @task Parse content into title and description fields
* @param searchString
* is the what is being searched for
* @return Hits, if no hits then null.
*/
public SearchResults search(String searchString)
{
Searcher searcher = null;
Hits hits = null;
try
{
searcher = new IndexSearcher(rootDir.getPath());
}
catch (IOException e)
{
logger.error("Failed to create index search using path " + rootDir.getPath());
return null;
}
Analyzer analyzer = new StandardAnalyzer();
String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
Query query= null;
try
{
query = MultiFieldQueryParser.parse(searchString, searchFields, analyzer);
// Query query = QueryParser.parse(searchString, ParsedObject.FIELDNAME_CONTENT, analyzer);
}
catch (ParseException e)
{
logger.info("Failed to parse query " + searchString);
return null;
}
try
{
hits = searcher.search(query);
}
catch (IOException e)
{
logger.error("Error while peforming search.", e);
return null;
}
// Copy hits to the result list
int hitCount = hits.length();
Document doc = null;
SearchResults results = new SearchResults(hitCount);
for (int counter = 0; counter < hitCount; counter++)
{
ParsedObject result = new BaseParsedObject();
try
{
doc = hits.doc(counter);
addFieldsToParsedObject(doc, result);
result.setScore(hits.score(counter));
result.setType(doc.getField(ParsedObject.FIELDNAME_TYPE).stringValue());
result.setKey(doc.getField(ParsedObject.FIELDNAME_KEY).stringValue());
result.setDescription(doc.getField(ParsedObject.FIELDNAME_DESCRIPTION).stringValue());
result.setTitle(doc.getField(ParsedObject.FIELDNAME_TITLE).stringValue());
result.setContent(doc.getField(ParsedObject.FIELDNAME_CLASSNAME).stringValue());
Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
if (language != null)
{
result.setLanguage(language.stringValue());
}
Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
if (classname != null)
{
result.setClassName(classname.stringValue());
}
Field url = doc.getField(ParsedObject.FIELDNAME_URL);
if (url != null)
{
result.setURL(new URL(url.stringValue()));
}
results.add(counter, result);
}
catch (Exception ioe)
{
logger.error("Exception", ioe);
}
}
if (searcher != null)
{
try
{
searcher.close();
}
catch (IOException ioe)
{
logger.error("Closing Searcher", ioe);
}
}
return results;
}
private void addFieldsToParsedObject(Document doc, ParsedObject o)
{
try
{
MultiMap multiKeywords = new MultiHashMap();
MultiMap multiFields = new MultiHashMap();
HashMap fieldMap = new HashMap();
Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
if(classNameField != null)
{
String className = classNameField.stringValue();
o.setClassName(className);
ObjectHandler handler = HandlerFactory.getHandler(className);
Set fields = handler.getFields();
addFieldsToMap(doc, fields, multiFields);
addFieldsToMap(doc, fields, fieldMap);
Set keywords = handler.getKeywords();
addFieldsToMap(doc, keywords, multiKeywords);
}
o.setMultiKeywords(multiKeywords);
o.setMultiFields(multiFields);
o.setFields(fieldMap);
}
catch(Exception e)
{
logger.error("Error trying to add fields to parsed object.", e);
}
}
private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
{
Iterator fieldIter = fieldNames.iterator();
while(fieldIter.hasNext())
{
String fieldName = (String)fieldIter.next();
Field[] docFields = doc.getFields(fieldName);
if(fields != null)
{
for(int i=0; i<docFields.length; i++)
{
Field field = docFields[i];
if(field != null)
{
String value = field.stringValue();
fields.put(fieldName, value);
}
}
}
}
}
/**
*
* @return
*/
public String[] getSearchSets()
{
return null;
}
/**
*
* @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Object)
* @param o
* @return
*/
public boolean add(Object o)
{
Collection c = new ArrayList(1);
c.add(o);
return add(c);
}
/**
*
* @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Collection)
* @param c
* @return
*/
public boolean add(Collection c)
{
boolean result = false;
IndexWriter indexWriter;
try
{
indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
}
catch (IOException e)
{
logger.error("Error while creating index writer. Skipping add...", e);
return result;
}
Iterator it = c.iterator();
while (it.hasNext())
{
Object o = it.next();
// Look up appropriate handler
ObjectHandler handler = null;
try
{
handler = HandlerFactory.getHandler(o);
}
catch (Exception e)
{
logger.error("Failed to create hanlder for object " + o.getClass().getName());
continue;
}
// Parse the object
ParsedObject parsedObject = handler.parseObject(o);
// Create document
Document doc = new Document();
// Populate document from the parsed object
if (parsedObject.getKey() != null)
{
doc.add(Field.Keyword(ParsedObject.FIELDNAME_KEY, parsedObject.getKey()));
}
if (parsedObject.getType() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_TYPE, parsedObject.getType()));
}
if (parsedObject.getTitle() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle()));
}
if (parsedObject.getDescription() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription()));
}
if (parsedObject.getContent() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent()));
}
if (parsedObject.getLanguage() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage()));
}
if (parsedObject.getURL() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString()));
}
if(parsedObject.getClassName() != null)
{
doc.add(Field.Text(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName()));
}
MultiMap multiKeywords = parsedObject.getMultiKeywords();
addFieldsToDocument(doc, multiKeywords, KEYWORD);
MultiMap multiFields = parsedObject.getMultiFields();
addFieldsToDocument(doc, multiFields, TEXT);
Map fields = parsedObject.getFields();
addFieldsToDocument(doc, fields, TEXT);
// Add the document to search index
try
{
indexWriter.addDocument(doc);
}
catch (IOException e)
{
logger.error("Error adding document to index.", e);
}
logger.debug("Index Document Count = " + indexWriter.docCount());
logger.info("Added '" + parsedObject.getTitle() + "' to index");
result = true;
}
try
{
indexWriter.optimize();
}
catch (IOException e)
{
logger.error("Error while trying to optimize index.");
}
finally
{
try
{
indexWriter.close();
}
catch (IOException e)
{
logger.error("Error while closing index writer.", e);
}
}
return result;
}
private void addFieldsToDocument(Document doc, Map fields, int type)
{
if(fields != null)
{
Iterator keyIter = fields.keySet().iterator();
while(keyIter.hasNext())
{
Object key = keyIter.next();
if(key != null)
{
Object values = fields.get(key);
if(values != null)
{
if(values instanceof Collection)
{
Iterator valueIter = ((Collection)values).iterator();
while(valueIter.hasNext())
{
Object value = valueIter.next();
if(value != null)
{
if(type == TEXT)
{
doc.add(Field.Text(key.toString(), value.toString()));
}
else
{
doc.add(Field.Keyword(key.toString(), value.toString()));
}
}
}
}
else
{
if(type == TEXT)
{
doc.add(Field.Text(key.toString(), values.toString()));
}
else
{
doc.add(Field.Keyword(key.toString(), values.toString()));
}
}
}
}
}
}
}
/**
*
* @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Object)
* @param o
* @return
*/
public boolean remove(Object o)
{
Collection c = new ArrayList(1);
c.add(o);
return remove(c);
}
/**
*
* @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Collection)
* @param c
* @return
*/
public boolean remove(Collection c)
{
boolean result = false;
try
{
IndexReader indexReader = IndexReader.open(this.rootDir);
Iterator it = c.iterator();
while (it.hasNext())
{
Object o = it.next();
// Look up appropriate handler
ObjectHandler handler = HandlerFactory.getHandler(o);
// Parse the object
ParsedObject parsedObject = handler.parseObject(o);
// Create term
Term term = null;
if (parsedObject.getKey() != null)
{
term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
// Remove the document from search index
int rc = indexReader.delete(term);
logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
//System.out.println("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
result = rc > 0;
}
}
indexReader.close();
IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
indexWriter.optimize();
indexWriter.close();
}
catch (Exception e)
{
logger.error("Exception", e);
result = false;
}
return result;
}
/**
*
* @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Object)
* @param o
* @return
*/
public boolean update(Object o)
{
Collection c = new ArrayList(1);
c.add(o);
return update(c);
}
/**
* Updates an index entry. For now, it's a remove and add.
*
* @param c
* @return
* @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Collection)
*/
public boolean update(Collection c)
{
boolean result = false;
try
{
// Delete entries from index
remove(c);
result = true;
}
catch (Throwable e)
{
logger.error("Exception", e);
}
try
{
// Add entries to index
add(c);
result = true;
}
catch (Throwable e)
{
logger.error("Exception", e);
}
return false;
}
}