/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.suggest.document; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.TokenStreamToAutomaton; import org.apache.lucene.util.automaton.Operations; /** * Wraps an {@link org.apache.lucene.analysis.Analyzer} * to provide additional completion-only tuning * (e.g. preserving token separators, preserving position increments while converting * a token stream to an automaton) * <p> * Can be used to index {@link SuggestField} and {@link ContextSuggestField} * and as a query analyzer to {@link PrefixCompletionQuery} amd {@link FuzzyCompletionQuery} * <p> * NOTE: In most cases, index and query analyzer should have same values for {@link #preservePositionIncrements()} * and {@link #preserveSep()} * * @lucene.experimental */ public final class CompletionAnalyzer extends AnalyzerWrapper { /** * Represents the separation between tokens, if * <code>preserveSep</code> is <code>true</code> * <p> * Same label is used as a delimiter in the {@link org.apache.lucene.search.suggest.document.CompletionTokenStream} * payload */ final static int SEP_LABEL = NRTSuggesterBuilder.PAYLOAD_SEP; /** * Represent a hole character, inserted by {@link org.apache.lucene.analysis.TokenStreamToAutomaton} */ final static int HOLE_CHARACTER = TokenStreamToAutomaton.HOLE; final static int DEFAULT_MAX_GRAPH_EXPANSIONS = Operations.DEFAULT_MAX_DETERMINIZED_STATES; final static boolean DEFAULT_PRESERVE_SEP = true; final static boolean DEFAULT_PRESERVE_POSITION_INCREMENTS = true; private final Analyzer analyzer; /** * Preserve separation between tokens * when converting to an automaton * <p> * Defaults to <code>true</code> */ private final boolean preserveSep; /** * Preserve position increments for tokens * when converting to an automaton * <p> * Defaults to <code>true</code> */ private final boolean preservePositionIncrements; /** * Sets the maximum number of graph expansions of a completion automaton * <p> * Defaults to <code>-1</code> (no limit) */ private final int maxGraphExpansions; /** * Wraps an analyzer to convert its output token stream to an automaton * * @param analyzer token stream to be converted to an automaton * @param preserveSep Preserve separation between tokens when converting to an automaton * @param preservePositionIncrements Preserve position increments for tokens when converting to an automaton * @param maxGraphExpansions Sets the maximum number of graph expansions of a completion automaton */ public CompletionAnalyzer(Analyzer analyzer, boolean preserveSep, boolean preservePositionIncrements, int maxGraphExpansions) { super(PER_FIELD_REUSE_STRATEGY); this.analyzer = analyzer; this.preserveSep = preserveSep; this.preservePositionIncrements = preservePositionIncrements; this.maxGraphExpansions = maxGraphExpansions; } /** * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)} * preserving token separation, position increments and no limit on graph expansions */ public CompletionAnalyzer(Analyzer analyzer) { this(analyzer, DEFAULT_PRESERVE_SEP, DEFAULT_PRESERVE_POSITION_INCREMENTS, DEFAULT_MAX_GRAPH_EXPANSIONS); } /** * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)} * with no limit on graph expansions */ public CompletionAnalyzer(Analyzer analyzer, boolean preserveSep, boolean preservePositionIncrements) { this(analyzer, preserveSep, preservePositionIncrements, DEFAULT_MAX_GRAPH_EXPANSIONS); } /** * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)} * preserving token separation and position increments */ public CompletionAnalyzer(Analyzer analyzer, int maxGraphExpansions) { this(analyzer, DEFAULT_PRESERVE_SEP, DEFAULT_PRESERVE_POSITION_INCREMENTS, maxGraphExpansions); } /** * Returns true if separation between tokens are preserved when converting * the token stream to an automaton */ public boolean preserveSep() { return preserveSep; } /** * Returns true if position increments are preserved when converting * the token stream to an automaton */ public boolean preservePositionIncrements() { return preservePositionIncrements; } @Override protected Analyzer getWrappedAnalyzer(String fieldName) { return analyzer; } @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { CompletionTokenStream tokenStream = new CompletionTokenStream(components.getTokenStream(), preserveSep, preservePositionIncrements, maxGraphExpansions); return new TokenStreamComponents(components.getTokenizer(), tokenStream); } }