/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling.suggest.fst; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; import org.apache.solr.spelling.suggest.LookupFactory; /** * Factory for {@link AnalyzingSuggester} * @lucene.experimental */ public class AnalyzingLookupFactory extends LookupFactory { /** * If <code>true</code>, exact suggestions are returned first, even if they are prefixes * of other strings in the automaton (possibly with larger weights). */ public static final String EXACT_MATCH_FIRST = "exactMatchFirst"; /** * If <code>true</code>, then a separator between tokens is preserved. This means that * suggestions are sensitive to tokenization (e.g. baseball is different from base ball). */ public static final String PRESERVE_SEP = "preserveSep"; /** * When multiple suggestions collide to the same analyzed form, this is the limit of * how many unique surface forms we keep. */ public static final String MAX_SURFACE_FORMS = "maxSurfaceFormsPerAnalyzedForm"; /** * When building the FST ("index-time"), we add each path through the tokenstream graph * as an individual entry. This places an upper-bound on how many expansions will be added * for a single suggestion. */ public static final String MAX_EXPANSIONS = "maxGraphExpansions"; // confusingly: the queryAnalyzerFieldType parameter is something totally different, this // is solr's "analysis" of the queries before they even reach the suggester (really makes // little sense for suggest at all, only for spellcheck). So we pick different names. /** * The analyzer used at "query-time" and "build-time" to analyze suggestions. */ public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType"; /** * Whether position holes should appear in the automaton. */ public static final String PRESERVE_POSITION_INCREMENTS = "preservePositionIncrements"; /** * File name for the automaton. * */ private static final String FILENAME = "wfsta.bin"; @Override public Lookup create(NamedList params, SolrCore core) { // mandatory parameter Object fieldTypeName = params.get(QUERY_ANALYZER); if (fieldTypeName == null) { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); if (ft == null) { throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); } Analyzer indexAnalyzer = ft.getIndexAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); // optional parameters boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString()) : true; boolean preserveSep = params.get(PRESERVE_SEP) != null ? Boolean.valueOf(params.get(PRESERVE_SEP).toString()) : true; int flags = 0; if (exactMatchFirst) { flags |= AnalyzingSuggester.EXACT_FIRST; } if (preserveSep) { flags |= AnalyzingSuggester.PRESERVE_SEP; } int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString()) : 256; int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString()) : -1; boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null ? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString()) : false; return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements); } @Override public String storeFileName() { return FILENAME; } }