/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.search.spell.LevensteinDistance; import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.solr.client.solrj.response.SpellCheckResponse; import org.apache.solr.common.params.SpellingParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SpellCheckMergeData; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.SolrIndexSearcher; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; /** * <p> * Refer to <a href="http://wiki.apache.org/solr/SpellCheckComponent">SpellCheckComponent</a> * for more details. * </p> * * @since solr 1.3 */ public abstract class SolrSpellChecker { public static final String DICTIONARY_NAME = "name"; public static final String DEFAULT_DICTIONARY_NAME = "default"; public static final String FIELD = "field"; public static final String FIELD_TYPE = "fieldType"; /** Dictionary name */ protected String name; protected Analyzer analyzer; protected String field; protected String fieldTypeName; public String init(NamedList config, SolrCore core) { name = (String) config.get(DICTIONARY_NAME); if (name == null) { name = DEFAULT_DICTIONARY_NAME; } field = (String)config.get(FIELD); IndexSchema schema = core.getLatestSchema(); if (field != null && schema.getFieldTypeNoEx(field) != null) { analyzer = schema.getFieldType(field).getQueryAnalyzer(); } fieldTypeName = (String) config.get(FIELD_TYPE); if (schema.getFieldTypes().containsKey(fieldTypeName)) { FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); analyzer = fieldType.getQueryAnalyzer(); } if (analyzer == null) { analyzer = new WhitespaceAnalyzer(); } return name; } /** * Integrate spelling suggestions from the various shards in a distributed environment. */ public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) { float min = 0.5f; try { min = getAccuracy(); } catch(UnsupportedOperationException uoe) { //just use .5 as a default } StringDistance sd = null; try { sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance(); } catch(UnsupportedOperationException uoe) { sd = new LevensteinDistance(); } SpellingResult result = new SpellingResult(); for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) { String original = entry.getKey(); //Only use this suggestion if all shards reported it as misspelled, //unless it was not a term original to the user's query //(WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these) Integer numShards = mergeData.origVsShards.get(original); if(numShards<mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original)) { continue; } HashSet<String> suggested = entry.getValue(); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug); for (String suggestion : suggested) { SuggestWord sug = mergeData.suggestedVsWord.get(suggestion); sug.score = sd.getDistance(original, sug.string); if (sug.score < min) continue; sugQueue.insertWithOverflow(sug); if (sugQueue.size() == numSug) { // if queue full, maintain the minScore score min = sugQueue.top().score; } } // create token SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original); Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset()); // get top 'count' suggestions out of 'sugQueue.size()' candidates SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())]; // skip the first sugQueue.size() - count elements for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop(); // now collect the top 'count' responses for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) { suggestions[k] = sugQueue.pop(); } if (extendedResults) { Integer o = mergeData.origVsFreq.get(original); if (o != null) result.addFrequency(token, o); for (SuggestWord word : suggestions) result.add(token, word.string, word.freq); } else { List<String> words = new ArrayList<>(sugQueue.size()); for (SuggestWord word : suggestions) words.add(word.string); result.add(token, words); } } return result; } public Analyzer getQueryAnalyzer() { return analyzer; } public String getDictionaryName() { return name; } /** * Reloads the index. Useful if an external process is responsible for building the spell checker. * * @throws IOException If there is a low-level I/O error. */ public abstract void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException; /** * (re)Builds the spelling index. May be a NOOP if the implementation doesn't require building, or can't be rebuilt. */ public abstract void build(SolrCore core, SolrIndexSearcher searcher) throws IOException; /** * Get the value of {@link SpellingParams#SPELLCHECK_ACCURACY} if supported. * Otherwise throws UnsupportedOperationException. */ protected float getAccuracy() { throw new UnsupportedOperationException(); } /** * Get the distance implementation used by this spellchecker, or NULL if not applicable. */ protected StringDistance getStringDistance() { throw new UnsupportedOperationException(); } /** * Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer. * The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first. * * @param options The {@link SpellingOptions} to use * @return The {@link SpellingResult} suggestions * @throws IOException if there is an error producing suggestions */ public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException; public boolean isSuggestionsMayOverlap() { return false; } }