/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.lucene.analysis.Analyzer;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
/**
* A variation on the Lucene QueryParser which knows about the field
* types and query time analyzers configured in Solr's schema.xml.
*
* <p>
* This class also deviates from the Lucene QueryParser by using
* ConstantScore versions of RangeQuery and PrefixQuery to prevent
* TooManyClauses exceptions.
* </p>
*
* <p>
* If the magic field name "<code>_val_</code>" is used in a term or
* phrase query, the value is parsed as a function.
* </p>
*/
public class SolrQueryParser extends QueryParser {
protected final IndexSchema schema;
protected final QParser parser;
protected final String defaultField;
/**
* Identifies the list of all known "magic fields" that trigger
* special parsing behavior
*/
public static enum MagicFieldName {
VAL("_val_", "func"), QUERY("_query_", null);
public final String field;
public final String subParser;
MagicFieldName(final String field, final String subParser) {
this.field = field;
this.subParser = subParser;
}
public String toString() {
return field;
}
private final static Map<String,MagicFieldName> lookup
= new HashMap<String,MagicFieldName>();
static {
for(MagicFieldName s : EnumSet.allOf(MagicFieldName.class))
lookup.put(s.toString(), s);
}
public static MagicFieldName get(final String field) {
return lookup.get(field);
}
}
// implementation detail - caching ReversedWildcardFilterFactory based on type
private Map<FieldType, ReversedWildcardFilterFactory> leadingWildcards;
public SolrQueryParser(QParser parser, String defaultField) {
this(parser, defaultField, parser.getReq().getSchema().getQueryAnalyzer());
}
public SolrQueryParser(QParser parser, String defaultField, Analyzer analyzer) {
super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, analyzer);
this.schema = parser.getReq().getSchema();
this.parser = parser;
this.defaultField = defaultField;
setEnablePositionIncrements(true);
setLowercaseExpandedTerms(false);
setAllowLeadingWildcard(true);
}
protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
if (leadingWildcards == null) leadingWildcards = new HashMap<FieldType, ReversedWildcardFilterFactory>();
ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
if (fac == null && leadingWildcards.containsKey(fac)) {
return fac;
}
Analyzer a = fieldType.getAnalyzer();
if (a instanceof TokenizerChain) {
// examine the indexing analysis chain if it supports leading wildcards
TokenizerChain tc = (TokenizerChain)a;
TokenFilterFactory[] factories = tc.getTokenFilterFactories();
for (TokenFilterFactory factory : factories) {
if (factory instanceof ReversedWildcardFilterFactory) {
fac = (ReversedWildcardFilterFactory)factory;
break;
}
}
}
leadingWildcards.put(fieldType, fac);
return fac;
}
private void checkNullField(String field) throws SolrException {
if (field == null && defaultField == null) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"no field name specified in query and no default specified via 'df' param");
}
}
protected String analyzeIfMultitermTermText(String field, String part, FieldType fieldType) {
if (part == null) return part;
SchemaField sf = schema.getFieldOrNull((field));
if (sf == null || ! (fieldType instanceof TextField)) return part;
String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
// System.out.println("INPUT="+part + " OUTPUT="+out);
return out;
}
@Override
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
checkNullField(field);
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
QParser nested = parser.subQuery(queryText, magic.subParser);
return nested.getQuery();
}
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized()) {
return super.getFieldQuery(field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
} else {
return sf.getType().getFieldQuery(parser, sf, queryText);
}
}
// default to a normal field query
return super.getFieldQuery(field, queryText, quoted);
}
@Override
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
checkNullField(field);
SchemaField sf = schema.getField(field);
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
}
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
checkNullField(field);
termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
// Solr has always used constant scoring for prefix queries. This should return constant scoring by default.
return newPrefixQuery(new Term(field, termStr));
}
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
checkNullField(field);
// *:* -> MatchAllDocsQuery
if ("*".equals(field) && "*".equals(termStr)) {
return newMatchAllDocsQuery();
}
FieldType fieldType = schema.getFieldType(field);
termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
// can we use reversed wildcards in this field?
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
if (factory != null) {
Term term = new Term(field, termStr);
// fsa representing the query
Automaton automaton = WildcardQuery.toAutomaton(term);
// TODO: we should likely use the automaton to calculate shouldReverse, too.
if (factory.shouldReverse(termStr)) {
automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
SpecialOperations.reverse(automaton);
} else {
// reverse wildcardfilter is active: remove false positives
// fsa representing false positives (markerChar*)
Automaton falsePositives = BasicOperations.concatenate(
BasicAutomata.makeChar(factory.getMarkerChar()),
BasicAutomata.makeAnyString());
// subtract these away
automaton = BasicOperations.minus(automaton, falsePositives);
}
return new AutomatonQuery(term, automaton) {
// override toString so its completely transparent
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(term.text());
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
};
}
// Solr has always used constant scoring for wildcard queries. This should return constant scoring by default.
return newWildcardQuery(new Term(field, termStr));
}
@Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
return newRegexpQuery(new Term(field, termStr));
}
}