package org.apache.lucene.queryparser.spans; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; /** * Enables setting different analyzers for whole term vs. * multiTerm (wildcard, fuzzy, prefix). * <p> * To set different analyzers per field, use PerFieldAnalyzerWrapper. * This class also has hooks to allow subclassing to enable different * strategies of per field analyzer handling. * <p> * This needs to be public (vs. package private) for Solr integration. * </p> */ public abstract class AnalyzingQueryParserBase extends QueryBuilder { private final Analyzer multiTermAnalyzer; /** * Default initialization. The analyzer is used for both whole terms and multiTerms. * * @param analyzer to use for both full terms and multiterms */ public AnalyzingQueryParserBase(Analyzer analyzer) { super(analyzer); this.multiTermAnalyzer = analyzer; } /** * Expert. Set a different analyzer for whole terms vs. multiTerm subcomponents. * <p> * Warning: this initializer has a side effect of setting normMultiTerms = NORM_MULTI_TERMS.ANALYZE * * @param analyzer analyzer for full terms * @param multiTermAnalyzer analyzer for multiterms */ AnalyzingQueryParserBase(Analyzer analyzer, Analyzer multiTermAnalyzer) { super(analyzer); this.multiTermAnalyzer = multiTermAnalyzer; } //TODO: make this protected in QueryParserBase and then override it //modify to throw only parse exception /** * Notionally overrides functionality from analyzeMultitermTerm. Differences * are that this consumes the full tokenstream, and it throws ParseException * if it encounters no content terms or more than one. * <p> * Need to consume full tokenstream even if on exception because otherwise * analyzer could be left in bad state!!! * * If getMultitermAnalyzer(String fieldName) returns null, * this returns "part" unaltered. * * @param multiTermAnalyzer analyzer for multiterms * @param field default field * @param part term part to analyze * @return bytesRef to term part * @throws ParseException if there is a failure while parsing */ BytesRef analyzeMultitermTermParseEx(Analyzer multiTermAnalyzer, String field, String part) throws ParseException { //TODO: Modify QueryParserBase, analyzeMultiTerm doesn't currently consume all tokens, and it //throws RuntimeExceptions and IllegalArgumentExceptions instead of parse. //Otherwise this is copied verbatim. TokenStream source; if (multiTermAnalyzer == null) { return new BytesRef(part); } try { source = multiTermAnalyzer.tokenStream(field, part); source.reset(); } catch (IOException e) { throw new ParseException("Unable to initialize TokenStream to analyze multiTerm term: " + part); } TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); int partCount = 0; try { if (!source.incrementToken()) { //intentionally empty } else { partCount++; bytes = termAtt.getBytesRef(); while (source.incrementToken()) { partCount++; } } } catch (IOException e1) { throw new RuntimeException("IO error analyzing multiterm: " + part); } try { source.end(); source.close(); } catch (IOException e) { throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part); } if (partCount < 1) { throw new ParseException("Couldn't find any content in >"+ part+"<"); } else if (partCount > 1) { throw new ParseException("Found more than one component in a multiterm:"+part); } return BytesRef.deepCopyOf(bytes); } /** * In this base class, this simply returns * the {@link #multiTermAnalyzer} no matter the value of fieldName. * This is useful as a hook for overriding. * * @param fieldName which field's analyzer to use for multiterms * @return analyzer to use for multiTerms */ protected Analyzer getMultiTermAnalyzer(String fieldName) { return multiTermAnalyzer; } /** * In this base class, this simply returns * the {@link #analyzer} no matter the value of fieldName. * This is useful as a hook for overriding. * * @param fieldName which field's analyzer to use for full terms * @return analyzer to use for full terms */ protected Analyzer getAnalyzer(String fieldName) { return getAnalyzer(); } }