/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser.Operator; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.function.FunctionQuery; import org.apache.solr.search.function.ValueSource; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; /** * Collection of static utilities useful for query parsing. * * @version $Id: QueryParsing.java 950853 2010-06-03 01:46:44Z hossman $ */ public class QueryParsing { public static final String OP = "q.op"; // the SolrParam used to override the QueryParser "default operator" public static final String V = "v"; // value of this parameter public static final String F = "f"; // field that a query or command pertains to public static final String TYPE = "type";// type of this query or command public static final String DEFTYPE = "defType"; // default type for any direct subqueries public static final String LOCALPARAM_START = "{!"; public static final char LOCALPARAM_END = '}'; public static final String DOCID = "_docid_"; /** * Returns the "prefered" default operator for use by Query Parsers, * based on the settings in the IndexSchema which may be overridden using * an optional String override value. * * @see IndexSchema#getQueryParserDefaultOperator() * @see #OP */ public static Operator getQueryParserDefaultOperator(final IndexSchema sch, final String override) { String val = override; if (null == val) val = sch.getQueryParserDefaultOperator(); return "AND".equals(val) ? Operator.AND : Operator.OR; } /** * Helper utility for parsing a query using the Lucene QueryParser syntax. * * @param qs query expression in standard Lucene syntax * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information */ public static Query parseQuery(String qs, IndexSchema schema) { return parseQuery(qs, null, schema); } /** * Helper utility for parsing a query using the Lucene QueryParser syntax. * * @param qs query expression in standard Lucene syntax * @param defaultField default field used for unqualified search terms in the query expression * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information */ public static Query parseQuery(String qs, String defaultField, IndexSchema schema) { try { Query query = schema.getSolrQueryParser(defaultField).parse(qs); if (SolrCore.log.isTraceEnabled()) { SolrCore.log.trace("After QueryParser:" + query); } return query; } catch (ParseException e) { SolrCore.log(e); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing Lucene query", e); } } /** * Helper utility for parsing a query using the Lucene QueryParser syntax. * * @param qs query expression in standard Lucene syntax * @param defaultField default field used for unqualified search terms in the query expression * @param params used to determine the default operator, overriding the schema specified operator * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information */ public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) { try { SolrQueryParser parser = schema.getSolrQueryParser(defaultField); parser.setDefaultOperator(getQueryParserDefaultOperator (schema, params.get(QueryParsing.OP))); Query query = parser.parse(qs); if (SolrCore.log.isTraceEnabled()) { SolrCore.log.trace("After QueryParser:" + query); } return query; } catch (ParseException e) { SolrCore.log(e); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Query parsing error: " + e.getMessage(), e); } } // note to self: something needs to detect infinite recursion when parsing queries static int parseLocalParams(String txt, int start, Map<String, String> target, SolrParams params) throws ParseException { int off = start; if (!txt.startsWith(LOCALPARAM_START, off)) return start; StrParser p = new StrParser(txt, start, txt.length()); p.pos += 2; // skip over "{!" for (; ;) { /* if (p.pos>=txt.length()) { throw new ParseException("Missing '}' parsing local params '" + txt + '"'); } */ char ch = p.peek(); if (ch == LOCALPARAM_END) { return p.pos + 1; } String id = p.getId(); if (id.length() == 0) { throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"'); } String val = null; ch = p.peek(); if (ch != '=') { // single word... treat {!func} as type=func for easy lookup val = id; id = TYPE; } else { // saw equals, so read value p.pos++; ch = p.peek(); if (ch == '\"' || ch == '\'') { val = p.getQuotedString(); } else if (ch == '$') { p.pos++; // dereference parameter String pname = p.getId(); if (params != null) { val = params.get(pname); } } else { // read unquoted literal ended by whitespace or '}' // there is no escaping. int valStart = p.pos; for (; ;) { if (p.pos >= p.end) { throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt + "'"); } char c = p.val.charAt(p.pos); if (c == LOCALPARAM_END || Character.isWhitespace(c)) { val = p.val.substring(valStart, p.pos); break; } p.pos++; } } } if (target != null) target.put(id, val); } } /** * "foo" returns null * "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes" * "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p") */ public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException { if (txt == null || !txt.startsWith(LOCALPARAM_START)) { return null; } Map<String, String> localParams = new HashMap<String, String>(); int start = QueryParsing.parseLocalParams(txt, 0, localParams, params); String val; if (start >= txt.length()) { // if the rest of the string is empty, check for "v" to provide the value val = localParams.get(V); val = val == null ? "" : val; } else { val = txt.substring(start); } localParams.put(V, val); return new MapSolrParams(localParams); } /** * Returns null if the sortSpec is the standard sort desc. * <p/> * <p> * The form of the sort specification string currently parsed is: * </p> * <pre>> * SortSpec ::= SingleSort [, SingleSort]* * SingleSort ::= <fieldname> SortDirection * SortDirection ::= top | desc | bottom | asc * </pre> * Examples: * <pre> * score desc #normal sort by score (will return null) * weight bottom #sort by weight ascending * weight desc #sort by weight descending * height desc,weight desc #sort by height descending, and use weight descending to break any ties * height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker * </pre> */ public static Sort parseSort(String sortSpec, IndexSchema schema) { if (sortSpec == null || sortSpec.length() == 0) return null; char[] chars = sortSpec.toCharArray(); int i = 0; StringBuilder buffer = new StringBuilder(sortSpec.length()); String sort = null; String order = null; int functionDepth = 0; boolean score = true; List<SortField> lst = new ArrayList<SortField>(5); boolean needOrder = false; while (i < chars.length) { if (Character.isWhitespace(chars[i]) && functionDepth == 0) { if (buffer.length() == 0) { //do nothing } else { if (needOrder == false) { sort = buffer.toString().trim(); buffer.setLength(0); needOrder = true; } else { order = buffer.toString().trim(); buffer.setLength(0); needOrder = false; } } } else if (chars[i] == '(' && functionDepth >= 0) { buffer.append(chars[i]); functionDepth++; } else if (chars[i] == ')' && functionDepth > 0) { buffer.append(chars[i]); functionDepth--;//close up one layer } else if (chars[i] == ',' && functionDepth == 0) {//can either be a separator of sort declarations, or a separator in a function //we have a separator between sort declarations, // We may need an order still, but then evaluate it, as we should have everything we need if (needOrder == true && buffer.length() > 0){ order = buffer.toString().trim(); buffer.setLength(0); needOrder = false; } score = processSort(schema, sort, order, lst); sort = null; order = null; buffer.setLength(0);//get ready for the next one, if there is one } else if (chars[i] == ',' && functionDepth > 0) { //we are in a function buffer.append(chars[i]); } else { //just a regular old char, add it to the buffer buffer.append(chars[i]); } i++; } if (buffer.length() > 0 && needOrder){//see if we have anything left, at most it should be an order order = buffer.toString().trim(); buffer.setLength(0); needOrder = false; } //do some sanity checks if (functionDepth != 0){ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to parse sort spec, mismatched parentheses: " + sortSpec); } if (buffer.length() > 0){//there's something wrong, as everything should have been parsed by now throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to parse sort spec: " + sortSpec); } if (needOrder == false && sort != null && sort.equals("") == false && order != null && order.equals("") == false){//handle the last declaration score = processSort(schema, sort, order, lst); } //If the normal case (by score desc) do nothing if (lst.size() == 1 && score == true && lst.get(0).getReverse() == false) { return null; // do normal scoring... } return new Sort((SortField[]) lst.toArray(new SortField[lst.size()])); } private static boolean processSort(IndexSchema schema, String sort, String order, List<SortField> lst) { boolean score = false; if (sort != null && order != null) { boolean top = true; if ("desc".equals(order) || "top".equals(order)) { top = true; } else if ("asc".equals(order) || "bottom".equals(order)) { top = false; } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown sort order: " + order); } //we got the order, now deal with the sort if ("score".equals(sort)) { score = true; if (top) { lst.add(SortField.FIELD_SCORE); } else { lst.add(new SortField(null, SortField.SCORE, true)); } } else if (DOCID.equals(sort)) { lst.add(new SortField(null, SortField.DOC, top)); } else { //See if we have a Field first, then see if it is a function, then throw an exception // getField could throw an exception if the name isn't found SchemaField f = null; try { f = schema.getField(sort); } catch (SolrException e) { //Not an error just yet } if (f != null) { if (f == null || !f.indexed()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can not sort on unindexed field: " + sort); } lst.add(f.getType().getSortField(f, top)); } else { //See if we have a function: FunctionQuery query = null; try { query = parseFunction(sort, schema); if (query != null) { ValueSource valueSource = query.getValueSource(); //We have a function query try { lst.add(valueSource.getSortField(top)); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error getting the sort for this function: " + sort, e); } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined function: " + sort); } } catch (ParseException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined field or function: " + sort, e); } } } } else if (sort == null) {//no sort value throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Must declare sort field or function"); } else if (order == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing sort order: "); } return score; } /////////////////////////// /////////////////////////// /////////////////////////// static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException { FieldType ft = null; ft = schema.getFieldTypeNoEx(name); out.append(name); if (ft == null) { out.append("(UNKNOWN FIELD " + name + ')'); } out.append(':'); return ft; } static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException { if (ft != null) { try { out.append(ft.indexedToReadable(val)); } catch (Exception e) { out.append("EXCEPTION(val="); out.append(val); out.append(")"); } } else { out.append(val); } } /** * @see #toString(Query,IndexSchema) */ public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException { boolean writeBoost = true; if (query instanceof TermQuery) { TermQuery q = (TermQuery) query; Term t = q.getTerm(); FieldType ft = writeFieldName(t.field(), schema, out, flags); writeFieldVal(t.text(), ft, out, flags); } else if (query instanceof TermRangeQuery) { TermRangeQuery q = (TermRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); String lt = q.getLowerTerm(); String ut = q.getUpperTerm(); if (lt == null) { out.append('*'); } else { writeFieldVal(lt, ft, out, flags); } out.append(" TO "); if (ut == null) { out.append('*'); } else { writeFieldVal(ut, ft, out, flags); } out.append(q.includesUpper() ? ']' : '}'); } else if (query instanceof NumericRangeQuery) { NumericRangeQuery q = (NumericRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesMin() ? '[' : '{'); Number lt = q.getMin(); Number ut = q.getMax(); if (lt == null) { out.append('*'); } else { out.append(lt.toString()); } out.append(" TO "); if (ut == null) { out.append('*'); } else { out.append(ut.toString()); } out.append(q.includesMax() ? ']' : '}'); } else if (query instanceof BooleanQuery) { BooleanQuery q = (BooleanQuery) query; boolean needParens = false; if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) { needParens = true; } if (needParens) { out.append('('); } boolean first = true; for (BooleanClause c : (List<BooleanClause>) q.clauses()) { if (!first) { out.append(' '); } else { first = false; } if (c.isProhibited()) { out.append('-'); } else if (c.isRequired()) { out.append('+'); } Query subQuery = c.getQuery(); boolean wrapQuery = false; // TODO: may need to put parens around other types // of queries too, depending on future syntax. if (subQuery instanceof BooleanQuery) { wrapQuery = true; } if (wrapQuery) { out.append('('); } toString(subQuery, schema, out, flags); if (wrapQuery) { out.append(')'); } } if (needParens) { out.append(')'); } if (q.getMinimumNumberShouldMatch() > 0) { out.append('~'); out.append(Integer.toString(q.getMinimumNumberShouldMatch())); } } else if (query instanceof PrefixQuery) { PrefixQuery q = (PrefixQuery) query; Term prefix = q.getPrefix(); FieldType ft = writeFieldName(prefix.field(), schema, out, flags); out.append(prefix.text()); out.append('*'); } else if (query instanceof WildcardQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof FuzzyQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof ConstantScoreQuery) { out.append(query.toString()); writeBoost = false; } else { out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')'); writeBoost = false; } if (writeBoost && query.getBoost() != 1.0f) { out.append("^"); out.append(Float.toString(query.getBoost())); } } /** * Formats a Query for debugging, using the IndexSchema to make * complex field types readable. * <p/> * <p> * The benefit of using this method instead of calling * <code>Query.toString</code> directly is that it knows about the data * types of each field, so any field which is encoded in a particularly * complex way is still readable. The downside is that it only knows * about built in Query types, and will not be able to format custom * Query classes. * </p> */ public static String toString(Query query, IndexSchema schema) { try { StringBuilder sb = new StringBuilder(); toString(query, schema, sb, 0); return sb.toString(); } catch (Exception e) { throw new RuntimeException(e); } } /** * Simple class to help with parsing a string * <b>Note: This API is experimental and may change in non backward-compatible ways in the future</b> */ public static class StrParser { String val; int pos; int end; public StrParser(String val) { this(val, 0, val.length()); } public StrParser(String val, int start, int end) { this.val = val; this.pos = start; this.end = end; } void eatws() { while (pos < end && Character.isWhitespace(val.charAt(pos))) pos++; } void skip(int nChars) { pos = Math.max(pos + nChars, end); } boolean opt(String s) { eatws(); int slen = s.length(); if (val.regionMatches(pos, s, 0, slen)) { pos += slen; return true; } return false; } boolean opt(char ch) { eatws(); if (val.charAt(pos) == ch) { pos++; return true; } return false; } void expect(String s) throws ParseException { eatws(); int slen = s.length(); if (val.regionMatches(pos, s, 0, slen)) { pos += slen; } else { throw new ParseException("Expected '" + s + "' at position " + pos + " in '" + val + "'"); } } float getFloat() throws ParseException { eatws(); char[] arr = new char[end - pos]; int i; for (i = 0; i < arr.length; i++) { char ch = val.charAt(pos); if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.' || ch == 'e' || ch == 'E' ) { pos++; arr[i] = ch; } else { break; } } return Float.parseFloat(new String(arr, 0, i)); } double getDouble() throws ParseException { eatws(); char[] arr = new char[end - pos]; int i; for (i = 0; i < arr.length; i++) { char ch = val.charAt(pos); if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.' || ch == 'e' || ch == 'E' ) { pos++; arr[i] = ch; } else { break; } } return Double.parseDouble(new String(arr, 0, i)); } int getInt() throws ParseException { eatws(); char[] arr = new char[end - pos]; int i; for (i = 0; i < arr.length; i++) { char ch = val.charAt(pos); if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' ) { pos++; arr[i] = ch; } else { break; } } return Integer.parseInt(new String(arr, 0, i)); } String getId() throws ParseException { eatws(); int id_start = pos; if (pos < end && Character.isJavaIdentifierStart(val.charAt(pos))) { pos++; while (pos < end) { char ch = val.charAt(pos); if (!Character.isJavaIdentifierPart(ch) && ch != '.') { break; } pos++; } return val.substring(id_start, pos); } throw new ParseException("Expected identifier at pos " + pos + " str='" + val + "'"); } // return null if not a string String getQuotedString() throws ParseException { eatws(); char delim = peekChar(); if (!(delim == '\"' || delim == '\'')) { return null; } int val_start = ++pos; StringBuilder sb = new StringBuilder(); // needed for escaping for (; ;) { if (pos >= end) { throw new ParseException("Missing end quote for string at pos " + (val_start - 1) + " str='" + val + "'"); } char ch = val.charAt(pos); if (ch == '\\') { pos++; if (pos >= end) break; ch = val.charAt(pos); switch (ch) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; case 'u': if (pos + 4 >= end) { throw new ParseException("bad unicode escape \\uxxxx at pos" + (val_start - 1) + " str='" + val + "'"); } ch = (char) Integer.parseInt(val.substring(pos + 1, pos + 5), 16); pos += 4; break; } } else if (ch == delim) { pos++; // skip over the quote break; } sb.append(ch); pos++; } return sb.toString(); } // next non-whitespace char char peek() { eatws(); return pos < end ? val.charAt(pos) : 0; } // next char char peekChar() { return pos < end ? val.charAt(pos) : 0; } public String toString() { return "'" + val + "'" + ", pos=" + pos; } } /** * Builds a list of String which are stringified versions of a list of Queries */ public static List<String> toString(List<Query> queries, IndexSchema schema) { List<String> out = new ArrayList<String>(queries.size()); for (Query q : queries) { out.add(QueryParsing.toString(q, schema)); } return out; } /** * Parse a function, returning a FunctionQuery * <p/> * <p> * Syntax Examples.... * </p> * <p/> * <pre> * // Numeric fields default to correct type * // (ie: IntFieldSource or FloatFieldSource) * // Others use explicit ord(...) to generate numeric field value * myfield * <p/> * // OrdFieldSource * ord(myfield) * <p/> * // ReverseOrdFieldSource * rord(myfield) * <p/> * // LinearFloatFunction on numeric field value * linear(myfield,1,2) * <p/> * // MaxFloatFunction of LinearFloatFunction on numeric field value or constant * max(linear(myfield,1,2),100) * <p/> * // ReciprocalFloatFunction on numeric field value * recip(myfield,1,2,3) * <p/> * // ReciprocalFloatFunction on ReverseOrdFieldSource * recip(rord(myfield),1,2,3) * <p/> * // ReciprocalFloatFunction on LinearFloatFunction on ReverseOrdFieldSource * recip(linear(rord(myfield),1,2),3,4,5) * </pre> */ public static FunctionQuery parseFunction(String func, IndexSchema schema) throws ParseException { SolrCore core = SolrCore.getSolrCore(); return (FunctionQuery) (QParser.getParser(func, "func", new LocalSolrQueryRequest(core, new HashMap())).parse()); // return new FunctionQuery(parseValSource(new StrParser(func), schema)); } }