/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.schema; import java.io.IOException; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.search.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; import org.apache.solr.common.SolrException; import org.apache.solr.query.SolrRangeQuery; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.Sorting; import org.apache.solr.uninverting.UninvertingReader.Type; /** <code>TextField</code> is the basic type for configurable text analysis. * Analyzers for field types using this implementation should be defined in the schema. * */ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; /** * Analyzer set by schema for text types to use when searching fields * of this type, subclasses can set analyzer themselves or override * getIndexAnalyzer() * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and * lowercasing filters, and charfilters. * * @see #getMultiTermAnalyzer * @see #setMultiTermAnalyzer */ protected Analyzer multiTermAnalyzer=null; private boolean isExplicitMultiTermAnalyzer = false; @Override protected void init(IndexSchema schema, Map<String,String> args) { properties |= TOKENIZED; if (schema.getVersion() > 1.1F && // only override if it's not explicitly true 0 == (trueProperties & OMIT_TF_POSITIONS)) { properties &= ~OMIT_TF_POSITIONS; } if (schema.getVersion() > 1.3F) { autoGeneratePhraseQueries = false; } else { autoGeneratePhraseQueries = true; } String autoGeneratePhraseQueriesStr = args.remove(AUTO_GENERATE_PHRASE_QUERIES); if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); enableGraphQueries = true; String enableGraphQueriesStr = args.remove(ENABLE_GRAPH_QUERIES); if (enableGraphQueriesStr != null) enableGraphQueries = Boolean.parseBoolean(enableGraphQueriesStr); super.init(schema, args); } /** * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified. * <p> * This method may be called many times, at any time. * </p> * @see #getIndexAnalyzer */ public Analyzer getMultiTermAnalyzer() { return multiTermAnalyzer; } public void setMultiTermAnalyzer(Analyzer analyzer) { this.multiTermAnalyzer = analyzer; } public boolean getAutoGeneratePhraseQueries() { return autoGeneratePhraseQueries; } public boolean getEnableGraphQueries() { return enableGraphQueries; } @Override public SortField getSortField(SchemaField field, boolean reverse) { /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */ field.checkSortability(); return Sorting.getTextSortField(field.getName(), reverse, field.sortMissingLast(), field.sortMissingFirst()); } @Override public ValueSource getValueSource(SchemaField field, QParser parser) { return new SortedSetFieldSource(field.getName()); } @Override public Type getUninversionType(SchemaField sf) { return Type.SORTED_SET_BINARY; } @Override public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, f.stringValue(), true); } @Override public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) { return parseFieldQuery(parser, getQueryAnalyzer(), field.getName(), externalVal); } @Override public Object toObject(SchemaField sf, BytesRef term) { return term.utf8ToString(); } @Override protected boolean supportsAnalyzers() { return true; } @Override public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { Analyzer multiAnalyzer = getMultiTermAnalyzer(); BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer); BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer); return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive); } public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) { if (part == null || analyzerIn == null) return null; try (TokenStream source = analyzerIn.tokenStream(field, part)){ source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); if (!source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); if (source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); source.end(); return bytes; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e); } } static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) { // note, this method always worked this way (but nothing calls it?) because it has no idea of quotes... return new QueryBuilder(analyzer).createPhraseQuery(field, queryText); } public void setIsExplicitMultiTermAnalyzer(boolean isExplicitMultiTermAnalyzer) { this.isExplicitMultiTermAnalyzer = isExplicitMultiTermAnalyzer; } public boolean isExplicitMultiTermAnalyzer() { return isExplicitMultiTermAnalyzer; } @Override public Object marshalSortValue(Object value) { return marshalStringSortValue(value); } @Override public Object unmarshalSortValue(Object value) { return unmarshalStringSortValue(value); } }