/* Copyright (2006-2012) Schibsted ASA * This file is part of Possom. * * Possom is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Possom is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Possom. If not, see <http://www.gnu.org/licenses/>. */ package no.sesat.search.result.handler; import no.sesat.search.result.BasicSuggestion; import org.apache.log4j.Logger; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import no.sesat.search.datamodel.DataModel; import no.sesat.search.result.BasicResultList; import no.sesat.search.result.ResultItem; import no.sesat.search.result.ResultList; import no.sesat.search.result.WeightedSuggestion; /** * Spelling suggestions are chosen in the following way: * * <ul> * <li>Discard all suggestions with score less than minimumScore</li> * <li>For each term, remove suggestions with lowest score so that the number * of suggestions is less than maxSuggestions. If the query is long the limit is maxSuggestionsForLongQueries</li> * <li>Remove all suggestions whose score differs more than maxDistance from the suggestion with the highest score</li> * <li>If the query is long and if two terms have suggestions, remove all suggestions unless the best suggestion * is much better than the second best</li> * </ul> * * A new query is then created using the chosen suggestions. * * * @version <tt>$Id$</tt> */ public final class SpellingSuggestionChooser implements ResultHandler { private static final Logger LOG = Logger.getLogger(SpellingSuggestionChooser.class); private final SpellingSuggestionChooserResultHandlerConfig config; /** * Create a new SpellingSuggestionChooser. * @param config */ public SpellingSuggestionChooser(final ResultHandlerConfig config) { this.config = (SpellingSuggestionChooserResultHandlerConfig)config; } /** * {@inherit} */ public void handleResult(final Context cxt, final DataModel datamodel) { final ResultList<ResultItem> result = cxt.getSearchResult(); final Map<String,List<WeightedSuggestion>> weightedSuggestionsMap = ((BasicResultList)result).getSpellingSuggestionsMap(); if (LOG.isDebugEnabled()) { LOG.debug("Number of corrected terms are " + numberOfCorrectedTerms(weightedSuggestionsMap)); } final int numberOfTermsInQuery = datamodel.getQuery().getQuery().getTermCount(); if (numberOfTermsInQuery >= config.getVeryLongQuery() && numberOfCorrectedTerms(weightedSuggestionsMap) > 1) { result.getSpellingSuggestions().clear(); } for (Iterator<List<WeightedSuggestion>> terms = weightedSuggestionsMap.values().iterator(); terms.hasNext();) { final List<WeightedSuggestion> suggestionList = terms.next(); Collections.sort(suggestionList); removeSuggestionsWithTooLowScore(suggestionList); limitNumberOfSuggestions(suggestionList, config.getMaxSuggestions()); removeSuggestionsWithTooHighDifference(suggestionList); if (numberOfTermsInQuery >= config.getLongQuery()) { if (numberOfCorrectedTerms(weightedSuggestionsMap) == 1) { limitNumberOfSuggestions(suggestionList, config.getLongQueryMaxSuggestions()); } else if (numberOfCorrectedTerms(weightedSuggestionsMap) == 2 && numberOfTermsInQuery < config.getVeryLongQuery()) { if (suggestionList.size() > 1) { removeAllIfOneIsNotMuchBetter(suggestionList); } } } if (suggestionList.isEmpty()) { terms.remove(); } } final int numberOfCorrections = numberOfCorrectedTerms(weightedSuggestionsMap); final String newQuery = datamodel.getQuery().getString().toLowerCase(datamodel.getSite().getSite().getLocale()); if (numberOfCorrections == 1) { for (List<WeightedSuggestion> spellingSuggestions : weightedSuggestionsMap.values()) { for (WeightedSuggestion suggestion : spellingSuggestions) { String query = newQuery; String displayQuery = newQuery; query = query.replaceAll(suggestion.getOriginal(), suggestion.getSuggestion()); displayQuery = displayQuery .replaceAll(suggestion.getOriginal(), "<b>" + suggestion.getSuggestion() + "</b>"); result.addQuerySuggestion(BasicSuggestion.instanceOf(suggestion.getOriginal(), query, displayQuery)); } } } else if (numberOfCorrections == 2 && numberOfTermsInQuery < config.getVeryLongQuery() ) { String original = newQuery; String query = newQuery; String displayQuery = newQuery; for (List<WeightedSuggestion> spellingSuggestions : weightedSuggestionsMap.values()) { for (WeightedSuggestion spellingSuggestion : spellingSuggestions) { original = spellingSuggestion.getOriginal(); query = query.replaceAll(spellingSuggestion.getOriginal(), spellingSuggestion.getSuggestion()); displayQuery = displayQuery.replaceAll( spellingSuggestion.getOriginal(), "<b>" + spellingSuggestion.getSuggestion() + "</b>"); } } result.addQuerySuggestion(BasicSuggestion.instanceOf(original, query, displayQuery)); } } private void removeAllIfOneIsNotMuchBetter(final List<WeightedSuggestion> suggestionList) { final WeightedSuggestion best = suggestionList.get(0); final WeightedSuggestion nextBest = suggestionList.get(1); if (best.getWeight() < nextBest.getWeight() + config.getMuchBetter()) { suggestionList.clear(); if (LOG.isDebugEnabled()) { LOG.debug("All suggestions removed because the best is not much better than second best"); LOG.debug("Best " + best); LOG.debug("Second best " + nextBest); } } else { suggestionList.clear(); suggestionList.add(best); if (LOG.isDebugEnabled()) { LOG.debug("Only the best suggestion kept"); } } } private int numberOfCorrectedTerms(final Map<String, List<WeightedSuggestion>> spellingSuggestions) { return spellingSuggestions.keySet().size(); } private void removeSuggestionsWithTooHighDifference(final List<WeightedSuggestion> suggestionList) { int lastScore = -1; for (final Iterator<WeightedSuggestion> iterator = suggestionList.iterator(); iterator.hasNext();) { final WeightedSuggestion suggestion = iterator.next(); if (suggestion.getWeight() + config.getMaxDistance() < lastScore) { iterator.remove(); LOG.debug("Suggestion " + suggestion + " because difference too high"); } else { lastScore = suggestion.getWeight(); } } } private void limitNumberOfSuggestions(final List<WeightedSuggestion> suggestionList, final int limit) { if (suggestionList.size() > limit) { final int numberToRemove = suggestionList.size() - limit; for (int i = 0; i < numberToRemove; i++) { final WeightedSuggestion removed = suggestionList.remove(suggestionList.size() - 1); if (LOG.isDebugEnabled()) { LOG.debug("Suggestion " + removed + " to reach maximum number of suggestions"); } } } } private void removeSuggestionsWithTooLowScore(final List<WeightedSuggestion> suggestionList) { for (final Iterator<WeightedSuggestion> suggestions = suggestionList.iterator(); suggestions.hasNext();) { final WeightedSuggestion suggestion = suggestions.next(); if (suggestion.getWeight() < config.getMinScore()) { suggestions.remove(); if (LOG.isDebugEnabled()) { LOG.debug("Suggestion " + suggestion + " removed due to low score"); } } } } }