/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Spellchecker implementation that uses {@link DirectSpellChecker} * <p> * Requires no auxiliary index or data structure. * <p> * Supported options: * <ul> * <li>field: Used as the source of terms. * <li>distanceMeasure: Sets {@link DirectSpellChecker#setDistance(StringDistance)}. * Note: to set the default {@link DirectSpellChecker#INTERNAL_LEVENSHTEIN}, use "internal". * <li>accuracy: Sets {@link DirectSpellChecker#setAccuracy(float)}. * <li>maxEdits: Sets {@link DirectSpellChecker#setMaxEdits(int)}. * <li>minPrefix: Sets {@link DirectSpellChecker#setMinPrefix(int)}. * <li>maxInspections: Sets {@link DirectSpellChecker#setMaxInspections(int)}. * <li>comparatorClass: Sets {@link DirectSpellChecker#setComparator(Comparator)}. * Note: score-then-frequency can be specified as "score" and frequency-then-score * can be specified as "freq". * <li>thresholdTokenFrequency: sets {@link DirectSpellChecker#setThresholdFrequency(float)}. * <li>minQueryLength: sets {@link DirectSpellChecker#setMinQueryLength(int)}. * <li>maxQueryFrequency: sets {@link DirectSpellChecker#setMaxQueryFrequency(float)}. * </ul> * @see DirectSpellChecker */ public class DirectSolrSpellChecker extends SolrSpellChecker { private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // configuration params shared with other spellcheckers public static final String COMPARATOR_CLASS = AbstractLuceneSpellChecker.COMPARATOR_CLASS; public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP; public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP; public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE; public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY; public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY; public static final String INTERNAL_DISTANCE = "internal"; public static final float DEFAULT_ACCURACY = 0.5f; public static final float DEFAULT_THRESHOLD_TOKEN_FREQUENCY = 0.0f; public static final String MAXEDITS = "maxEdits"; public static final int DEFAULT_MAXEDITS = 2; // params specific to this implementation public static final String MINPREFIX = "minPrefix"; public static final int DEFAULT_MINPREFIX = 1; public static final String MAXINSPECTIONS = "maxInspections"; public static final int DEFAULT_MAXINSPECTIONS = 5; public static final String MINQUERYLENGTH = "minQueryLength"; public static final int DEFAULT_MINQUERYLENGTH = 4; public static final String MAXQUERYFREQUENCY = "maxQueryFrequency"; public static final float DEFAULT_MAXQUERYFREQUENCY = 0.01f; private DirectSpellChecker checker = new DirectSpellChecker(); @Override public String init(NamedList config, SolrCore core) { SolrParams params = SolrParams.toSolrParams(config); LOG.info("init: " + config); String name = super.init(config, core); Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR; String compClass = (String) config.get(COMPARATOR_CLASS); if (compClass != null) { if (compClass.equalsIgnoreCase(SCORE_COMP)) comp = SuggestWordQueue.DEFAULT_COMPARATOR; else if (compClass.equalsIgnoreCase(FREQ_COMP)) comp = new SuggestWordFrequencyComparator(); else //must be a FQCN comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class); } StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN; String distClass = (String) config.get(STRING_DISTANCE); if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE)) sd = core.getResourceLoader().newInstance(distClass, StringDistance.class); float minAccuracy = DEFAULT_ACCURACY; Float accuracy = params.getFloat(ACCURACY); if (accuracy != null) minAccuracy = accuracy; int maxEdits = DEFAULT_MAXEDITS; Integer edits = params.getInt(MAXEDITS); if (edits != null) maxEdits = edits; int minPrefix = DEFAULT_MINPREFIX; Integer prefix = params.getInt(MINPREFIX); if (prefix != null) minPrefix = prefix; int maxInspections = DEFAULT_MAXINSPECTIONS; Integer inspections = params.getInt(MAXINSPECTIONS); if (inspections != null) maxInspections = inspections; float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY; Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY); if (threshold != null) minThreshold = threshold; int minQueryLength = DEFAULT_MINQUERYLENGTH; Integer queryLength = params.getInt(MINQUERYLENGTH); if (queryLength != null) minQueryLength = queryLength; float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY; Float queryFreq = params.getFloat(MAXQUERYFREQUENCY); if (queryFreq != null) maxQueryFrequency = queryFreq; checker.setComparator(comp); checker.setDistance(sd); checker.setMaxEdits(maxEdits); checker.setMinPrefix(minPrefix); checker.setAccuracy(minAccuracy); checker.setThresholdFrequency(minThreshold); checker.setMaxInspections(maxInspections); checker.setMinQueryLength(minQueryLength); checker.setMaxQueryFrequency(maxQueryFrequency); checker.setLowerCaseTerms(false); return name; } @Override public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {} @Override public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {} @Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count; SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount > 0 && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if(suggestions.length==0 && freq==0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; } @Override public float getAccuracy() { return checker.getAccuracy(); } @Override public StringDistance getStringDistance() { return checker.getDistance(); } }