/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.Term;
import org.apache.solr.search.function.FunctionQuery;
import org.apache.solr.search.function.valuesource.QueryValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import java.io.IOException;
import java.util.Collections;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
/**
* Collection of static utilities useful for query parsing.
*
*
*/
public class QueryParsing {
public static final String OP = "q.op"; // the SolrParam used to override the QueryParser "default operator"
public static final String V = "v"; // value of this parameter
public static final String F = "f"; // field that a query or command pertains to
public static final String TYPE = "type";// parser for this query or command
public static final String DEFTYPE = "defType"; // default parser for any direct subqueries
public static final String LOCALPARAM_START = "{!";
public static final char LOCALPARAM_END = '}';
public static final String DOCID = "_docid_";
public static final String SCORE = "score";
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param)
public static final String VAL_EXPLICIT = "__VAL_EXPLICIT__";
/**
* Returns the "preferred" default operator for use by Query Parsers,
* based on the settings in the IndexSchema which may be overridden using
* an optional String override value.
*
* @see IndexSchema#getQueryParserDefaultOperator()
* @see #OP
*/
public static QueryParser.Operator getQueryParserDefaultOperator(final IndexSchema sch,
final String override) {
String val = override;
if (null == val) val = sch.getQueryParserDefaultOperator();
return "AND".equals(val) ? QueryParser.Operator.AND : QueryParser.Operator.OR;
}
/**
* Returns the effective default field based on the 'df' param or
* hardcoded schema default. May be null if either exists specified.
* @see org.apache.solr.common.params.CommonParams#DF
* @see org.apache.solr.schema.IndexSchema#getDefaultSearchFieldName
*/
public static String getDefaultField(final IndexSchema s, final String df) {
return df != null ? df : s.getDefaultSearchFieldName();
}
// note to self: something needs to detect infinite recursion when parsing queries
public static int parseLocalParams(String txt, int start, Map<String, String> target, SolrParams params) throws SyntaxError {
return parseLocalParams(txt, start, target, params, LOCALPARAM_START, LOCALPARAM_END);
}
public static int parseLocalParams(String txt, int start, Map<String, String> target, SolrParams params, String startString, char endChar) throws SyntaxError {
int off = start;
if (!txt.startsWith(startString, off)) return start;
StrParser p = new StrParser(txt, start, txt.length());
p.pos += startString.length(); // skip over "{!"
for (; ;) {
/*
if (p.pos>=txt.length()) {
throw new SyntaxError("Missing '}' parsing local params '" + txt + '"');
}
*/
char ch = p.peek();
if (ch == endChar) {
return p.pos + 1;
}
String id = p.getId();
if (id.length() == 0) {
throw new SyntaxError("Expected ending character '" + endChar + "' parsing local params '" + txt + '"');
}
String val = null;
ch = p.peek();
if (ch != '=') {
// single word... treat {!func} as type=func for easy lookup
val = id;
id = TYPE;
} else {
// saw equals, so read value
p.pos++;
ch = p.peek();
boolean deref = false;
if (ch == '$') {
p.pos++;
ch = p.peek();
deref = true; // dereference whatever value is read by treating it as a variable name
}
if (ch == '\"' || ch == '\'') {
val = p.getQuotedString();
} else {
// read unquoted literal ended by whitespace or endChar (normally '}')
// there is no escaping.
int valStart = p.pos;
for (; ;) {
if (p.pos >= p.end) {
throw new SyntaxError("Missing end to unquoted value starting at " + valStart + " str='" + txt + "'");
}
char c = p.val.charAt(p.pos);
if (c == endChar || Character.isWhitespace(c)) {
val = p.val.substring(valStart, p.pos);
break;
}
p.pos++;
}
}
if (deref) { // dereference parameter
if (params != null) {
val = params.get(val);
}
}
}
if (target != null) target.put(id, val);
}
}
public static String encodeLocalParamVal(String val) {
int len = val.length();
int i = 0;
if (len > 0 && val.charAt(0) != '$') {
for (;i<len; i++) {
char ch = val.charAt(i);
if (Character.isWhitespace(ch) || ch=='}') break;
}
}
if (i>=len) return val;
// We need to enclose in quotes... but now we need to escape
StringBuilder sb = new StringBuilder(val.length() + 4);
sb.append('\'');
for (i=0; i<len; i++) {
char ch = val.charAt(i);
if (ch=='\'') {
sb.append('\\');
}
sb.append(ch);
}
sb.append('\'');
return sb.toString();
}
/**
* "foo" returns null
* "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
* "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
*/
public static SolrParams getLocalParams(String txt, SolrParams params) throws SyntaxError {
if (txt == null || !txt.startsWith(LOCALPARAM_START)) {
return null;
}
Map<String, String> localParams = new HashMap<>();
int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);
String val = localParams.get(V);
if (val == null) {
val = txt.substring(start);
localParams.put(V, val);
} else {
// localParams.put(VAL_EXPLICIT, "true");
}
return new MapSolrParams(localParams);
}
/**
* Returns the Sort object represented by the string, or null if default sort
* by score descending should be used.
* @see #parseSortSpec
* @deprecated use {@link #parseSortSpec}
*/
@Deprecated
public static Sort parseSort(String sortSpec, SolrQueryRequest req) {
return parseSortSpec(sortSpec, req).getSort();
}
/**
* <p>
* The form of the sort specification string currently parsed is:
* </p>
* <pre>
* SortSpec ::= SingleSort [, SingleSort]*
* SingleSort ::= <fieldname|function> SortDirection
* SortDirection ::= top | desc | bottom | asc
* </pre>
* Examples:
* <pre>
* score desc #normal sort by score (will return null)
* weight bottom #sort by weight ascending
* weight desc #sort by weight descending
* height desc,weight desc #sort by height descending, and use weight descending to break any ties
* height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker
* </pre>
* @return a SortSpec object populated with the appropriate Sort (which may be null if
* default score sort is used) and SchemaFields (where applicable) using
* hardcoded default count & offset values.
*/
public static SortSpec parseSortSpec(String sortSpec, SolrQueryRequest req) {
if (sortSpec == null || sortSpec.length() == 0) return newEmptySortSpec();
List<SortField> sorts = new ArrayList<>(4);
List<SchemaField> fields = new ArrayList<>(4);
try {
StrParser sp = new StrParser(sortSpec);
while (sp.pos < sp.end) {
sp.eatws();
final int start = sp.pos;
// short circuit test for a really simple field name
String field = sp.getId(null);
Exception qParserException = null;
if (field == null || !Character.isWhitespace(sp.peekChar())) {
// let's try it as a function instead
field = null;
String funcStr = sp.val.substring(start);
QParser parser = QParser.getParser(funcStr, FunctionQParserPlugin.NAME, req);
Query q = null;
try {
if (parser instanceof FunctionQParser) {
FunctionQParser fparser = (FunctionQParser)parser;
fparser.setParseMultipleSources(false);
fparser.setParseToEnd(false);
q = fparser.getQuery();
if (fparser.localParams != null) {
if (fparser.valFollowedParams) {
// need to find the end of the function query via the string parser
int leftOver = fparser.sp.end - fparser.sp.pos;
sp.pos = sp.end - leftOver; // reset our parser to the same amount of leftover
} else {
// the value was via the "v" param in localParams, so we need to find
// the end of the local params themselves to pick up where we left off
sp.pos = start + fparser.localParamsEnd;
}
} else {
// need to find the end of the function query via the string parser
int leftOver = fparser.sp.end - fparser.sp.pos;
sp.pos = sp.end - leftOver; // reset our parser to the same amount of leftover
}
} else {
// A QParser that's not for function queries.
// It must have been specified via local params.
q = parser.getQuery();
assert parser.getLocalParams() != null;
sp.pos = start + parser.localParamsEnd;
}
Boolean top = sp.getSortDirection();
if (null != top) {
// we have a Query and a valid direction
if (q instanceof FunctionQuery) {
sorts.add(((FunctionQuery)q).getValueSource().getSortField(top));
} else {
sorts.add((new QueryValueSource(q, 0.0f)).getSortField(top));
}
fields.add(null);
continue;
}
} catch (Exception e) {
// hang onto this in case the string isn't a full field name either
qParserException = e;
}
}
// if we made it here, we either have a "simple" field name,
// or there was a problem parsing the string as a complex func/quer
if (field == null) {
// try again, simple rules for a field name with no whitespace
sp.pos = start;
field = sp.getSimpleString();
}
Boolean top = sp.getSortDirection();
if (null == top) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Can't determine a Sort Order (asc or desc) in sort spec " + sp);
}
if (SCORE.equals(field)) {
if (top) {
sorts.add(SortField.FIELD_SCORE);
} else {
sorts.add(new SortField(null, SortField.Type.SCORE, true));
}
fields.add(null);
} else if (DOCID.equals(field)) {
sorts.add(new SortField(null, SortField.Type.DOC, top));
fields.add(null);
} else {
// try to find the field
SchemaField sf = req.getSchema().getFieldOrNull(field);
if (null == sf) {
if (null != qParserException) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"sort param could not be parsed as a query, and is not a "+
"field that exists in the index: " + field,
qParserException);
}
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"sort param field can't be found: " + field);
}
sorts.add(sf.getSortField(top));
fields.add(sf);
}
}
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error in sort: " + sortSpec, e);
}
// normalize a sort on score desc to null
if (sorts.size()==1 && sorts.get(0) == SortField.FIELD_SCORE) {
return newEmptySortSpec();
}
Sort s = new Sort(sorts.toArray(new SortField[sorts.size()]));
return new SortSpec(s, fields);
}
private static SortSpec newEmptySortSpec() {
return new SortSpec(null, Collections.<SchemaField>emptyList());
}
///////////////////////////
///////////////////////////
///////////////////////////
static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
FieldType ft = null;
ft = schema.getFieldTypeNoEx(name);
out.append(name);
if (ft == null) {
out.append("(UNKNOWN FIELD " + name + ')');
}
out.append(':');
return ft;
}
static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft != null) {
try {
out.append(ft.indexedToReadable(val));
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val);
out.append(")");
}
} else {
out.append(val);
}
}
static void writeFieldVal(BytesRef val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft != null) {
try {
CharsRef readable = new CharsRef();
ft.indexedToReadable(val, readable);
out.append(readable);
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val.utf8ToString());
out.append(")");
}
} else {
out.append(val.utf8ToString());
}
}
/**
* @see #toString(Query,IndexSchema)
*/
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
boolean writeBoost = true;
if (query instanceof TermQuery) {
TermQuery q = (TermQuery) query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.bytes(), ft, out, flags);
} else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{');
BytesRef lt = q.getLowerTerm();
BytesRef ut = q.getUpperTerm();
if (lt == null) {
out.append('*');
} else {
writeFieldVal(lt, ft, out, flags);
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
writeFieldVal(ut, ft, out, flags);
}
out.append(q.includesUpper() ? ']' : '}');
} else if (query instanceof NumericRangeQuery) {
NumericRangeQuery q = (NumericRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesMin() ? '[' : '{');
Number lt = q.getMin();
Number ut = q.getMax();
if (lt == null) {
out.append('*');
} else {
out.append(lt.toString());
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
out.append(ut.toString());
}
out.append(q.includesMax() ? ']' : '}');
} else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery) query;
boolean needParens = false;
if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) {
needParens = true;
}
if (needParens) {
out.append('(');
}
boolean first = true;
for (BooleanClause c : q.clauses()) {
if (!first) {
out.append(' ');
} else {
first = false;
}
if (c.isProhibited()) {
out.append('-');
} else if (c.isRequired()) {
out.append('+');
}
Query subQuery = c.getQuery();
boolean wrapQuery = false;
// TODO: may need to put parens around other types
// of queries too, depending on future syntax.
if (subQuery instanceof BooleanQuery) {
wrapQuery = true;
}
if (wrapQuery) {
out.append('(');
}
toString(subQuery, schema, out, flags);
if (wrapQuery) {
out.append(')');
}
}
if (needParens) {
out.append(')');
}
if (q.getMinimumNumberShouldMatch() > 0) {
out.append('~');
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
}
if (q.isCoordDisabled()) {
out.append("/no_coord");
}
} else if (query instanceof PrefixQuery) {
PrefixQuery q = (PrefixQuery) query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof WildcardQuery) {
out.append(query.toString());
writeBoost = false;
} else if (query instanceof FuzzyQuery) {
out.append(query.toString());
writeBoost = false;
} else if (query instanceof ConstantScoreQuery) {
out.append(query.toString());
writeBoost = false;
} else if (query instanceof WrappedQuery) {
WrappedQuery q = (WrappedQuery)query;
out.append(q.getOptions());
toString(q.getWrappedQuery(), schema, out, flags);
writeBoost = false; // we don't use the boost on wrapped queries
} else {
out.append(query.getClass().getSimpleName()
+ '(' + query.toString() + ')');
writeBoost = false;
}
if (writeBoost && query.getBoost() != 1.0f) {
out.append("^");
out.append(Float.toString(query.getBoost()));
}
}
/**
* Formats a Query for debugging, using the IndexSchema to make
* complex field types readable.
* <p/>
* <p>
* The benefit of using this method instead of calling
* <code>Query.toString</code> directly is that it knows about the data
* types of each field, so any field which is encoded in a particularly
* complex way is still readable. The downside is that it only knows
* about built in Query types, and will not be able to format custom
* Query classes.
* </p>
*/
public static String toString(Query query, IndexSchema schema) {
try {
StringBuilder sb = new StringBuilder();
toString(query, schema, sb, 0);
return sb.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Simple class to help with parsing a string.
* <b>Note: This API is experimental and may change in non backward-compatible ways in the future</b>
*/
public static class StrParser {
String val;
int pos;
int end;
public StrParser(String val) {
this(val, 0, val.length());
}
public StrParser(String val, int start, int end) {
this.val = val;
this.pos = start;
this.end = end;
}
void eatws() {
while (pos < end && Character.isWhitespace(val.charAt(pos))) pos++;
}
char ch() {
return pos < end ? val.charAt(pos) : 0;
}
void skip(int nChars) {
pos = Math.max(pos + nChars, end);
}
boolean opt(String s) {
eatws();
int slen = s.length();
if (val.regionMatches(pos, s, 0, slen)) {
pos += slen;
return true;
}
return false;
}
boolean opt(char ch) {
eatws();
if (pos < end && val.charAt(pos) == ch) {
pos++;
return true;
}
return false;
}
void expect(String s) throws SyntaxError {
eatws();
int slen = s.length();
if (val.regionMatches(pos, s, 0, slen)) {
pos += slen;
} else {
throw new SyntaxError("Expected '" + s + "' at position " + pos + " in '" + val + "'");
}
}
float getFloat() {
eatws();
char[] arr = new char[end - pos];
int i;
for (i = 0; i < arr.length; i++) {
char ch = val.charAt(pos);
if ((ch >= '0' && ch <= '9')
|| ch == '+' || ch == '-'
|| ch == '.' || ch == 'e' || ch == 'E'
) {
pos++;
arr[i] = ch;
} else {
break;
}
}
return Float.parseFloat(new String(arr, 0, i));
}
Number getNumber() {
eatws();
int start = pos;
boolean flt = false;
while (pos < end) {
char ch = val.charAt(pos);
if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-') {
pos++;
} else if (ch == '.' || ch =='e' || ch=='E') {
flt = true;
pos++;
} else {
break;
}
}
String v = val.substring(start,pos);
if (flt) {
return Double.parseDouble(v);
} else {
return Long.parseLong(v);
}
}
double getDouble() {
eatws();
char[] arr = new char[end - pos];
int i;
for (i = 0; i < arr.length; i++) {
char ch = val.charAt(pos);
if ((ch >= '0' && ch <= '9')
|| ch == '+' || ch == '-'
|| ch == '.' || ch == 'e' || ch == 'E'
) {
pos++;
arr[i] = ch;
} else {
break;
}
}
return Double.parseDouble(new String(arr, 0, i));
}
int getInt() {
eatws();
char[] arr = new char[end - pos];
int i;
for (i = 0; i < arr.length; i++) {
char ch = val.charAt(pos);
if ((ch >= '0' && ch <= '9')
|| ch == '+' || ch == '-'
) {
pos++;
arr[i] = ch;
} else {
break;
}
}
return Integer.parseInt(new String(arr, 0, i));
}
String getId() throws SyntaxError {
return getId("Expected identifier");
}
String getId(String errMessage) throws SyntaxError {
eatws();
int id_start = pos;
char ch;
if (pos < end && (ch = val.charAt(pos)) != '$' && Character.isJavaIdentifierStart(ch)) {
pos++;
while (pos < end) {
ch = val.charAt(pos);
// if (!Character.isJavaIdentifierPart(ch) && ch != '.' && ch != ':') {
if (!Character.isJavaIdentifierPart(ch) && ch != '.') {
break;
}
pos++;
}
return val.substring(id_start, pos);
}
if (errMessage != null) {
throw new SyntaxError(errMessage + " at pos " + pos + " str='" + val + "'");
}
return null;
}
public String getGlobbedId(String errMessage) throws SyntaxError {
eatws();
int id_start = pos;
char ch;
if (pos < end && (ch = val.charAt(pos)) != '$' && (Character.isJavaIdentifierStart(ch) || ch=='?' || ch=='*')) {
pos++;
while (pos < end) {
ch = val.charAt(pos);
if (!(Character.isJavaIdentifierPart(ch) || ch=='?' || ch=='*') && ch != '.') {
break;
}
pos++;
}
return val.substring(id_start, pos);
}
if (errMessage != null) {
throw new SyntaxError(errMessage + " at pos " + pos + " str='" + val + "'");
}
return null;
}
/**
* Skips leading whitespace and returns whatever sequence of non
* whitespace it can find (or hte empty string)
*/
String getSimpleString() {
eatws();
int startPos = pos;
char ch;
while (pos < end) {
ch = val.charAt(pos);
if (Character.isWhitespace(ch)) break;
pos++;
}
return val.substring(startPos, pos);
}
/**
* Sort direction or null if current position does not indicate a
* sort direction. (True is desc, False is asc).
* Position is advanced to after the comma (or end) when result is non null
*/
Boolean getSortDirection() throws SyntaxError {
final int startPos = pos;
final String order = getId(null);
Boolean top = null;
if (null != order) {
final String orderLowerCase = order.toLowerCase(Locale.ROOT);
if ("desc".equals(orderLowerCase) || "top".equals(orderLowerCase)) {
top = true;
} else if ("asc".equals(orderLowerCase) || "bottom".equals(orderLowerCase)) {
top = false;
}
// it's not a legal direction if more stuff comes after it
eatws();
final char c = ch();
if (0 == c) {
// :NOOP
} else if (',' == c) {
pos++;
} else {
top = null;
}
}
if (null == top) pos = startPos; // no direction, reset
return top;
}
// return null if not a string
String getQuotedString() throws SyntaxError {
eatws();
char delim = peekChar();
if (!(delim == '\"' || delim == '\'')) {
return null;
}
int val_start = ++pos;
StringBuilder sb = new StringBuilder(); // needed for escaping
for (; ;) {
if (pos >= end) {
throw new SyntaxError("Missing end quote for string at pos " + (val_start - 1) + " str='" + val + "'");
}
char ch = val.charAt(pos);
if (ch == '\\') {
pos++;
if (pos >= end) break;
ch = val.charAt(pos);
switch (ch) {
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'r':
ch = '\r';
break;
case 'b':
ch = '\b';
break;
case 'f':
ch = '\f';
break;
case 'u':
if (pos + 4 >= end) {
throw new SyntaxError("bad unicode escape \\uxxxx at pos" + (val_start - 1) + " str='" + val + "'");
}
ch = (char) Integer.parseInt(val.substring(pos + 1, pos + 5), 16);
pos += 4;
break;
}
} else if (ch == delim) {
pos++; // skip over the quote
break;
}
sb.append(ch);
pos++;
}
return sb.toString();
}
// next non-whitespace char
char peek() {
eatws();
return pos < end ? val.charAt(pos) : 0;
}
// next char
char peekChar() {
return pos < end ? val.charAt(pos) : 0;
}
@Override
public String toString() {
return "'" + val + "'" + ", pos=" + pos;
}
}
/**
* Builds a list of String which are stringified versions of a list of Queries
*/
public static List<String> toString(List<Query> queries, IndexSchema schema) {
List<String> out = new ArrayList<>(queries.size());
for (Query q : queries) {
out.add(QueryParsing.toString(q, schema));
}
return out;
}
}