/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.search.spell.StringDistance; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SpellCheckMergeData; import org.apache.solr.search.SolrIndexSearcher; /** * <p>This class lets a query be run through multiple spell checkers. * The initial use-case is to use {@link WordBreakSolrSpellChecker} * in conjunction with a "standard" spell checker * (such as {@link DirectSolrSpellChecker} * </p> */ public class ConjunctionSolrSpellChecker extends SolrSpellChecker { private StringDistance stringDistance = null; private Float accuracy = null; private String dictionaryName = null; private Analyzer queryAnalyzer = null; private List<SolrSpellChecker> checkers = new ArrayList<>(); private boolean initialized = false; public void addChecker(SolrSpellChecker checker) { if (initialized) { throw new IllegalStateException( "Need to add checkers before calling init()"); } try { if (stringDistance == null) { stringDistance = checker.getStringDistance(); } else if (!stringDistance.equals(checker.getStringDistance())) { throw new IllegalArgumentException( "All checkers need to use the same StringDistance."); } } catch (UnsupportedOperationException uoe) { // ignore } try { if (accuracy == null) { accuracy = checker.getAccuracy(); } else if (accuracy != checker.getAccuracy()) { throw new IllegalArgumentException( "All checkers need to use the same Accuracy."); } } catch (UnsupportedOperationException uoe) { // ignore } if (queryAnalyzer == null) { queryAnalyzer = checker.getQueryAnalyzer(); } else if (queryAnalyzer != checker.getQueryAnalyzer()) { throw new IllegalArgumentException( "All checkers need to use the same Analyzer."); } checkers.add(checker); } @SuppressWarnings("unchecked") @Override public String init(NamedList config, SolrCore core) { for (int i = 0; i < checkers.size(); i++) { SolrSpellChecker c = checkers.get(i); String dn = c.init(config, core); //TODO: in the future, we could develop this further to allow // multiple spellcheckers with per-field dictionaries... if (dictionaryName != null && !dictionaryName.equals(dn)) { throw new IllegalArgumentException( "Cannot have more than one dictionary. (" + dn + " , " + dictionaryName + ")"); } dictionaryName = dn; } if (dictionaryName == null) { dictionaryName = DEFAULT_DICTIONARY_NAME; } initialized = true; return dictionaryName; } @Override public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException { for (SolrSpellChecker c : checkers) { c.build(core, searcher); } } @Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { SpellingResult[] results = new SpellingResult[checkers.size()]; for (int i = 0; i < checkers.size(); i++) { results[i] = checkers.get(i).getSuggestions(options); } return mergeCheckers(results, options.count); } @Override public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) { SpellingResult[] results = new SpellingResult[checkers.size()]; for (int i = 0; i < checkers.size(); i++) { results[i] = checkers.get(i).mergeSuggestions(mergeData, numSug, count, extendedResults); } return mergeCheckers(results, numSug); } //TODO: This just interleaves the results. In the future, we might want to let users give each checker its // own weight and use that in combination to score & frequency to sort the results ? private SpellingResult mergeCheckers(SpellingResult[] results, int numSug) { Map<Token, Integer> combinedTokenFrequency = new HashMap<>(); Map<Token, List<LinkedHashMap<String, Integer>>> allSuggestions = new LinkedHashMap<>(); for(SpellingResult result : results) { if(result.getTokenFrequency()!=null) { combinedTokenFrequency.putAll(result.getTokenFrequency()); } for(Map.Entry<Token, LinkedHashMap<String, Integer>> entry : result.getSuggestions().entrySet()) { List<LinkedHashMap<String, Integer>> allForThisToken = allSuggestions.get(entry.getKey()); if(allForThisToken==null) { allForThisToken = new ArrayList<>(); allSuggestions.put(entry.getKey(), allForThisToken); } allForThisToken.add(entry.getValue()); } } SpellingResult combinedResult = new SpellingResult(); for(Map.Entry<Token, List<LinkedHashMap<String, Integer>>> entry : allSuggestions.entrySet()) { Token original = entry.getKey(); List<Iterator<Map.Entry<String,Integer>>> corrIters = new ArrayList<>(entry.getValue().size()); for(LinkedHashMap<String, Integer> corrections : entry.getValue()) { corrIters.add(corrections.entrySet().iterator()); } int numberAdded = 0; while(numberAdded < numSug) { boolean anyData = false; for(Iterator<Map.Entry<String,Integer>> iter : corrIters) { if(iter.hasNext()) { anyData = true; Map.Entry<String,Integer> corr = iter.next(); combinedResult.add(original, corr.getKey(), corr.getValue()); Integer tokenFrequency = combinedTokenFrequency.get(original); combinedResult.addFrequency(original, tokenFrequency==null ? 0 : tokenFrequency); if(++numberAdded==numSug) { break; } } } if(!anyData) { if(numberAdded==0) { combinedResult.add(original, Collections.<String>emptyList()); Integer tokenFrequency = combinedTokenFrequency.get(original); combinedResult.addFrequency(original, tokenFrequency==null ? 0 : tokenFrequency); } break; } } } return combinedResult; } @Override public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException { for (SolrSpellChecker c : checkers) { c.reload(core, searcher); } } @Override public Analyzer getQueryAnalyzer() { return queryAnalyzer; } @Override public String getDictionaryName() { return dictionaryName; } @Override protected float getAccuracy() { if (accuracy == null) { return super.getAccuracy(); } return accuracy; } @Override protected StringDistance getStringDistance() { if (stringDistance == null) { return super.getStringDistance(); } return stringDistance; } @Override public boolean isSuggestionsMayOverlap() { return true; } }