package org.apache.solr.spelling.suggest; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.spell.Dictionary; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.spelling.SolrSpellChecker; import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingResult; import org.apache.solr.spelling.suggest.Lookup.LookupResult; import org.apache.solr.spelling.suggest.jaspell.JaspellLookup; import org.apache.solr.util.HighFrequencyDictionary; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class Suggester extends SolrSpellChecker { private static final Logger LOG = LoggerFactory.getLogger(Suggester.class); /** Location of the source data - either a path to a file, or null for the * current IndexReader. */ public static final String LOCATION = "sourceLocation"; /** Field to use as the source of terms if using IndexReader. */ public static final String FIELD = "field"; /** Fully-qualified class of the {@link Lookup} implementation. */ public static final String LOOKUP_IMPL = "lookupImpl"; /** * Minimum frequency of terms to consider when building the dictionary. */ public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold"; /** * Name of the location where to persist the dictionary. If this location * is relative then the data will be stored under the core's dataDir. If this * is null the storing will be disabled. */ public static final String STORE_DIR = "storeDir"; protected String sourceLocation; protected File storeDir; protected String field; protected float threshold; protected Dictionary dictionary; protected IndexReader reader; protected Lookup lookup; protected String lookupImpl; protected SolrCore core; @Override public String init(NamedList config, SolrCore core) { LOG.info("init: " + config); String name = super.init(config, core); threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f : (Float) config.get(THRESHOLD_TOKEN_FREQUENCY); sourceLocation = (String) config.get(LOCATION); field = (String)config.get(FIELD); lookupImpl = (String)config.get(LOOKUP_IMPL); if (lookupImpl == null) { lookupImpl = JaspellLookup.class.getName(); } String store = (String)config.get(STORE_DIR); if (store != null) { storeDir = new File(store); if (!storeDir.isAbsolute()) { storeDir = new File(core.getDataDir() + File.separator + storeDir); } if (!storeDir.exists()) { storeDir.mkdirs(); } } return name; } @Override public void build(SolrCore core, SolrIndexSearcher searcher) { LOG.info("build()"); if (sourceLocation == null) { reader = searcher.getReader(); dictionary = new HighFrequencyDictionary(reader, field, threshold); } else { try { dictionary = new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), "UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl); try { lookup.build(dictionary); if (storeDir != null) { lookup.store(storeDir); } } catch (Exception e) { e.printStackTrace(); } } @Override public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException { LOG.info("reload()"); if (dictionary == null && storeDir != null) { // this may be a firstSearcher event, try loading it if (lookup.load(storeDir)) { return; // loaded ok } } // dictionary based on the current index may need refreshing if (dictionary instanceof HighFrequencyDictionary) { reader = reader.reopen(); dictionary = new HighFrequencyDictionary(reader, field, threshold); try { lookup.build(dictionary); if (storeDir != null) { lookup.store(storeDir); } } catch (Exception e) { throw new IOException(e); } } } public void add(String query, int numHits) { LOG.info("add " + query + ", " + numHits); lookup.add(query, new Integer(numHits)); } static SpellingResult EMPTY_RESULT = new SpellingResult(); @Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); for (Token t : options.tokens) { String term = new String(t.buffer(), 0, t.length()); List<LookupResult> suggestions = lookup.lookup(term, options.onlyMorePopular, options.count); if (suggestions == null) { continue; } for (LookupResult lr : suggestions) { res.add(t, lr.key, ((Number)lr.value).intValue()); } } return res; } }