/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling.suggest.fst; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; import org.apache.solr.spelling.suggest.LookupFactory; /** * Factory for {@link FuzzySuggester} * @lucene.experimental */ public class FuzzyLookupFactory extends LookupFactory { /** * If <code>true</code>, maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix * will be measured in Unicode code points (actual letters) instead of bytes. */ public static final String UNICODE_AWARE = "unicodeAware"; /** * Maximum number of edits allowed, used by {@link LevenshteinAutomata#toAutomaton(int)} * in bytes or Unicode code points (if {@link #UNICODE_AWARE} option is set to true). */ public static final String MAX_EDITS = "maxEdits"; /** * If transpositions are allowed, Fuzzy suggestions will be computed based on a primitive * edit operation. If it is false, it will be based on the classic Levenshtein algorithm. * Transpositions of bytes or Unicode code points (if {@link #UNICODE_AWARE} option is set to true). */ public static final String TRANSPOSITIONS = "transpositions"; /** * Length of common (non-fuzzy) prefix for the suggestions * in bytes or Unicode code points (if {@link #UNICODE_AWARE} option is set to true). */ public static final String NON_FUZZY_PREFIX = "nonFuzzyPrefix"; /** * Minimum length of lookup key before any edits are allowed for the suggestions * in bytes or Unicode code points (if {@link #UNICODE_AWARE} option is set to true). */ public static final String MIN_FUZZY_LENGTH = "minFuzzyLength"; /** * File name for the automaton. */ private static final String FILENAME = "fwfsta.bin"; @Override public Lookup create(NamedList params, SolrCore core) { // mandatory parameter Object fieldTypeName = params.get(AnalyzingLookupFactory.QUERY_ANALYZER); if (fieldTypeName == null) { throw new IllegalArgumentException("Error in configuration: " + AnalyzingLookupFactory.QUERY_ANALYZER + " parameter is mandatory"); } // retrieve index and query analyzers for the field FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); if (ft == null) { throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); } Analyzer indexAnalyzer = ft.getIndexAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); // optional parameters boolean exactMatchFirst = (params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST) != null) ? Boolean.valueOf(params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST).toString()) : true; boolean preserveSep = (params.get(AnalyzingLookupFactory.PRESERVE_SEP) != null) ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_SEP).toString()) : true; int options = 0; if (exactMatchFirst) { options |= FuzzySuggester.EXACT_FIRST; } if (preserveSep) { options |= FuzzySuggester.PRESERVE_SEP; } int maxSurfaceFormsPerAnalyzedForm = (params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS) != null) ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS).toString()) : 256; int maxGraphExpansions = (params.get(AnalyzingLookupFactory.MAX_EXPANSIONS) != null) ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_EXPANSIONS).toString()) : -1; boolean preservePositionIncrements = params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS) != null ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS).toString()) : false; int maxEdits = (params.get(MAX_EDITS) != null) ? Integer.parseInt(params.get(MAX_EDITS).toString()) : FuzzySuggester.DEFAULT_MAX_EDITS; boolean transpositions = (params.get(TRANSPOSITIONS) != null) ? Boolean.parseBoolean(params.get(TRANSPOSITIONS).toString()) : FuzzySuggester.DEFAULT_TRANSPOSITIONS; int nonFuzzyPrefix = (params.get(NON_FUZZY_PREFIX) != null) ? Integer.parseInt(params.get(NON_FUZZY_PREFIX).toString()) :FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; int minFuzzyLength = (params.get(MIN_FUZZY_LENGTH) != null) ? Integer.parseInt(params.get(MIN_FUZZY_LENGTH).toString()) :FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; boolean unicodeAware = (params.get(UNICODE_AWARE) != null) ? Boolean.valueOf(params.get(UNICODE_AWARE).toString()) : FuzzySuggester.DEFAULT_UNICODE_AWARE; return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix, minFuzzyLength, unicodeAware); } @Override public String storeFileName() { return FILENAME; } }