/*
* Copyright (c) 2009-2010 Lockheed Martin Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.eurekastreams.commons.search.bridge;
import java.io.IOException;
import java.io.StringReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.hibernate.search.bridge.StringBridge;
/**
* Field bridge that converts the input object to a String, then runs it through an Analyzer. This can be used with a
* StandardAnalyzer, for example, to lowercase and remove stop words and punctuation, which will be faster than runnin a
* custom SortComparator.
*/
public class StandardAnalyzerSortFieldBridge implements StringBridge
{
/**
* Instance of the logger.
*/
private Log log = LogFactory.getLog(StandardAnalyzerSortFieldBridge.class);
/**
* The analyzer to run the field through for indexing.
*/
private StandardAnalyzer analyzer = new StandardAnalyzer();
/**
* Convert the input object to String, tokenize it with the analyzer, then join the chunks together spaces.
*
* @param obj
* the object to convert
* @return a sortable string representation of the input object
*/
@Override
public String objectToString(final Object obj)
{
log.info("...");
if (obj == null)
{
return null;
}
log.info("Parsing '" + obj.toString() + "'");
StringBuilder outputSb = new StringBuilder();
TokenStream stream = analyzer.tokenStream(null, new StringReader(obj.toString()));
Token token = new Token();
try
{
while ((token = stream.next(token)) != null)
{
if (outputSb.length() > 0)
{
outputSb.append(" ");
}
outputSb.append(token.term());
}
}
catch (IOException e)
{
log.error("Error parsing '" + obj.toString() + "'", e);
return null;
}
String output = outputSb.toString();
if (log.isInfoEnabled())
{
log.info("Parsed '" + obj.toString() + "' as '" + output + "'");
}
return output;
}
}