package org.apache.solr.spelling.suggest;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
import org.apache.solr.spelling.suggest.Lookup.LookupResult;
import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
import org.apache.solr.util.HighFrequencyDictionary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Suggester extends SolrSpellChecker {
private static final Logger LOG = LoggerFactory.getLogger(Suggester.class);
/** Location of the source data - either a path to a file, or null for the
* current IndexReader.
*/
public static final String LOCATION = "sourceLocation";
/** Field to use as the source of terms if using IndexReader. */
public static final String FIELD = "field";
/** Fully-qualified class of the {@link Lookup} implementation. */
public static final String LOOKUP_IMPL = "lookupImpl";
/**
* Minimum frequency of terms to consider when building the dictionary.
*/
public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold";
/**
* Name of the location where to persist the dictionary. If this location
* is relative then the data will be stored under the core's dataDir. If this
* is null the storing will be disabled.
*/
public static final String STORE_DIR = "storeDir";
protected String sourceLocation;
protected File storeDir;
protected String field;
protected float threshold;
protected Dictionary dictionary;
protected IndexReader reader;
protected Lookup lookup;
protected String lookupImpl;
protected SolrCore core;
@Override
public String init(NamedList config, SolrCore core) {
LOG.info("init: " + config);
String name = super.init(config, core);
threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
: (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
sourceLocation = (String) config.get(LOCATION);
field = (String)config.get(FIELD);
lookupImpl = (String)config.get(LOOKUP_IMPL);
if (lookupImpl == null) {
lookupImpl = JaspellLookup.class.getName();
}
String store = (String)config.get(STORE_DIR);
if (store != null) {
storeDir = new File(store);
if (!storeDir.isAbsolute()) {
storeDir = new File(core.getDataDir() + File.separator + storeDir);
}
if (!storeDir.exists()) {
storeDir.mkdirs();
}
}
return name;
}
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) {
LOG.info("build()");
if (sourceLocation == null) {
reader = searcher.getReader();
dictionary = new HighFrequencyDictionary(reader, field, threshold);
} else {
try {
dictionary = new FileDictionary(new InputStreamReader(
core.getResourceLoader().openResource(sourceLocation), "UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
try {
lookup.build(dictionary);
if (storeDir != null) {
lookup.store(storeDir);
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
LOG.info("reload()");
if (dictionary == null && storeDir != null) {
// this may be a firstSearcher event, try loading it
if (lookup.load(storeDir)) {
return; // loaded ok
}
}
// dictionary based on the current index may need refreshing
if (dictionary instanceof HighFrequencyDictionary) {
reader = reader.reopen();
dictionary = new HighFrequencyDictionary(reader, field, threshold);
try {
lookup.build(dictionary);
if (storeDir != null) {
lookup.store(storeDir);
}
} catch (Exception e) {
throw new IOException(e);
}
}
}
public void add(String query, int numHits) {
LOG.info("add " + query + ", " + numHits);
lookup.add(query, new Integer(numHits));
}
static SpellingResult EMPTY_RESULT = new SpellingResult();
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
LOG.debug("getSuggestions: " + options.tokens);
if (lookup == null) {
LOG.info("Lookup is null - invoke spellchecker.build first");
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
for (Token t : options.tokens) {
String term = new String(t.buffer(), 0, t.length());
List<LookupResult> suggestions = lookup.lookup(term,
options.onlyMorePopular, options.count);
if (suggestions == null) {
continue;
}
for (LookupResult lr : suggestions) {
res.add(t, lr.key, ((Number)lr.value).intValue());
}
}
return res;
}
}