/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * This parser was originally derived from DismaxQParser from Solr. * All changes are Copyright 2008, Lucid Imagination, Inc. */ package org.apache.solr.search; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.DefaultSolrParams; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.search.QueryUtils; import org.apache.solr.search.function.BoostedQuery; import org.apache.solr.search.function.FunctionQuery; import org.apache.solr.search.function.ProductFloatFunction; import org.apache.solr.search.function.QueryValueSource; import org.apache.solr.search.function.ValueSource; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.analysis.*; import java.util.*; import java.io.Reader; import java.io.IOException; /** * An advanced multi-field query parser. */ public class ExtendedDismaxQParserPlugin extends QParserPlugin { public static final String NAME = "edismax"; public void init(NamedList args) { } public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new ExtendedDismaxQParser(qstr, localParams, params, req); } } class ExtendedDismaxQParser extends QParser { /** * A field we can't ever find in any schema, so we can safely tell * DisjunctionMaxQueryParser to use it as our defaultField, and * map aliases from it to any field in our schema. */ private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC"; /** shorten the class references for utilities */ private static class U extends SolrPluginUtils { /* :NOOP */ } /** shorten the class references for utilities */ private static interface DMP extends DisMaxParams { /* :NOOP */ } public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } Map<String,Float> queryFields; Query parsedUserQuery; private String[] boostParams; private String[] multBoosts; private List<Query> boostQueries; private Query altUserQuery; private QParser altQParser; public Query parse() throws ParseException { SolrParams localParams = getLocalParams(); SolrParams params = getParams(); SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params); final String minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF)); if (0 == queryFields.size()) { queryFields.put(req.getSchema().getDefaultSearchFieldName(), 1.0f); } // Boosted phrase of the full query string Map<String,Float> phraseFields = U.parseFieldBoosts(solrParams.getParams(DMP.PF)); // Boosted Bi-Term Shingles from the query string Map<String,Float> phraseFields2 = U.parseFieldBoosts(solrParams.getParams("pf2")); // Boosted Tri-Term Shingles from the query string Map<String,Float> phraseFields3 = U.parseFieldBoosts(solrParams.getParams("pf3")); float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f); int pslop = solrParams.getInt(DMP.PS, 0); int qslop = solrParams.getInt(DMP.QS, 0); // remove stopwords from mandatory "matching" component? boolean stopwords = solrParams.getBool("stopwords", true); /* the main query we will execute. we disable the coord because * this query is an artificial construct */ BooleanQuery query = new BooleanQuery(true); /* * * Main User Query * * */ parsedUserQuery = null; String userQuery = getString(); altUserQuery = null; if( userQuery == null || userQuery.length() < 1 ) { // If no query is specified, we may have an alternate String altQ = solrParams.get( DMP.ALTQ ); if (altQ != null) { altQParser = subQuery(altQ, null); altUserQuery = altQParser.getQuery(); query.add( altUserQuery , BooleanClause.Occur.MUST ); } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "missing query string" ); } } else { // There is a valid query string // userQuery = partialEscape(U.stripUnbalancedQuotes(userQuery)).toString(); boolean lowercaseOperators = solrParams.getBool("lowercaseOperators", true); String mainUserQuery = userQuery; ExtendedSolrQueryParser up = new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME); up.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, queryFields); up.setPhraseSlop(qslop); // slop for explicit user phrase queries up.setAllowLeadingWildcard(true); // defer escaping and only do if lucene parsing fails, or we need phrases // parsing fails. Need to sloppy phrase queries anyway though. List<Clause> clauses = null; boolean specialSyntax = false; int numPluses = 0; int numMinuses = 0; int numOptional = 0; int numAND = 0; int numOR = 0; int numNOT = 0; boolean sawLowerAnd=false; boolean sawLowerOr=false; clauses = splitIntoClauses(userQuery, false); for (Clause clause : clauses) { if (!clause.isPhrase && clause.hasSpecialSyntax) { specialSyntax = true; } if (clause.must == '+') numPluses++; if (clause.must == '-') numMinuses++; if (clause.isBareWord()) { String s = clause.val; if ("AND".equals(s)) { numAND++; } else if ("OR".equals(s)) { numOR++; } else if ("NOT".equals(s)) { numNOT++; } else if (lowercaseOperators) { if ("and".equals(s)) { numAND++; sawLowerAnd=true; } else if ("or".equals(s)) { numOR++; sawLowerOr=true; } } } } numOptional = clauses.size() - (numPluses + numMinuses); // convert lower or mixed case operators to uppercase if we saw them. // only do this for the lucene query part and not for phrase query boosting // since some fields might not be case insensitive. // We don't use a regex for this because it might change and AND or OR in // a phrase query in a case sensitive field. if (sawLowerAnd || sawLowerOr) { StringBuilder sb = new StringBuilder(); for (int i=0; i<clauses.size(); i++) { Clause clause = clauses.get(i); String s = clause.raw; // and and or won't be operators at the start or end if (i>0 && i+1<clauses.size()) { if ("AND".equalsIgnoreCase(s)) { s="AND"; } else if ("OR".equalsIgnoreCase(s)) { s="OR"; } } sb.append(s); sb.append(' '); } mainUserQuery = sb.toString(); } // For correct lucene queries, turn off mm processing if there // were explicit operators (except for AND). boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0; try { up.setRemoveStopFilter(!stopwords); parsedUserQuery = up.parse(mainUserQuery); if (stopwords && isEmpty(parsedUserQuery)) { // if the query was all stop words, remove none of them up.setRemoveStopFilter(true); parsedUserQuery = up.parse(mainUserQuery); } } catch (Exception e) { // ignore failure and reparse later after escaping reserved chars } if (parsedUserQuery != null && doMinMatched) { if (parsedUserQuery instanceof BooleanQuery) { U.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch); } } if (parsedUserQuery == null) { StringBuilder sb = new StringBuilder(); for (Clause clause : clauses) { boolean doQuote = clause.isPhrase; String s=clause.val; if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) { doQuote=true; } if (clause.must != 0) { sb.append(clause.must); } if (clause.field != null) { sb.append(clause.field); sb.append(':'); } if (doQuote) { sb.append('"'); } sb.append(clause.val); if (doQuote) { sb.append('"'); } sb.append(' '); } String escapedUserQuery = sb.toString(); parsedUserQuery = up.parse(escapedUserQuery); if (parsedUserQuery instanceof BooleanQuery) { BooleanQuery t = new BooleanQuery(); U.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery); U.setMinShouldMatch(t, minShouldMatch); parsedUserQuery = t; } } query.add(parsedUserQuery, BooleanClause.Occur.MUST); // sloppy phrase queries for proximity if (phraseFields.size() > 0 || phraseFields2.size() > 0 || phraseFields3.size() > 0) { // find non-field clauses List<Clause> normalClauses = new ArrayList<Clause>(clauses.size()); for (Clause clause : clauses) { if (clause.field != null || clause.isPhrase) continue; // check for keywords "AND,OR,TO" if (clause.isBareWord()) { String s = clause.val.toString(); // avoid putting explict operators in the phrase query if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue; } normalClauses.add(clause); } // full phrase... addShingledPhraseQueries(query, normalClauses, phraseFields, 0, tiebreaker, pslop); // shingles... addShingledPhraseQueries(query, normalClauses, phraseFields2, 2, tiebreaker, pslop); addShingledPhraseQueries(query, normalClauses, phraseFields3, 3, tiebreaker, pslop); } } /* * * Boosting Query * * */ boostParams = solrParams.getParams(DMP.BQ); //List<Query> boostQueries = U.parseQueryStrings(req, boostParams); boostQueries=null; if (boostParams!=null && boostParams.length>0) { boostQueries = new ArrayList<Query>(); for (String qs : boostParams) { if (qs.trim().length()==0) continue; Query q = subQuery(qs, null).getQuery(); boostQueries.add(q); } } if (null != boostQueries) { for(Query f : boostQueries) { query.add(f, BooleanClause.Occur.SHOULD); } } /* * * Boosting Functions * * */ String[] boostFuncs = solrParams.getParams(DMP.BF); if (null != boostFuncs && 0 != boostFuncs.length) { for (String boostFunc : boostFuncs) { if(null == boostFunc || "".equals(boostFunc)) continue; Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc); for (String f : ff.keySet()) { Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery(); Float b = ff.get(f); if (null != b) { fq.setBoost(b); } query.add(fq, BooleanClause.Occur.SHOULD); } } } // // create a boosted query (scores multiplied by boosts) // Query topQuery = query; multBoosts = solrParams.getParams("boost"); if (multBoosts!=null && multBoosts.length>0) { List<ValueSource> boosts = new ArrayList<ValueSource>(); for (String boostStr : multBoosts) { if (boostStr==null || boostStr.length()==0) continue; Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery(); ValueSource vs; if (boost instanceof FunctionQuery) { vs = ((FunctionQuery)boost).getValueSource(); } else { vs = new QueryValueSource(boost, 1.0f); } boosts.add(vs); } if (boosts.size()>1) { ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()])); topQuery = new BoostedQuery(query, prod); } else if (boosts.size() == 1) { topQuery = new BoostedQuery(query, boosts.get(0)); } } return topQuery; } /** * Modifies the main query by adding a new optional Query consisting * of shingled phrase queries across the specified clauses using the * specified field => boost mappings. * * @param mainQuery Where the phrase boosting queries will be added * @param clauses Clauses that will be used to construct the phrases * @param fields Field => boost mappings for the phrase queries * @param shingleSize how big the phrases should be, 0 means a single phrase * @param tiebreaker tie breker value for the DisjunctionMaxQueries * @param slop slop value for the constructed phrases */ private void addShingledPhraseQueries(final BooleanQuery mainQuery, final List<Clause> clauses, final Map<String,Float> fields, int shingleSize, final float tiebreaker, final int slop) throws ParseException { if (null == fields || fields.isEmpty() || null == clauses || clauses.size() <= shingleSize ) return; if (0 == shingleSize) shingleSize = clauses.size(); final int goat = shingleSize-1; // :TODO: better name for var? StringBuilder userPhraseQuery = new StringBuilder(); for (int i=0; i < clauses.size() - goat; i++) { userPhraseQuery.append('"'); for (int j=0; j <= goat; j++) { userPhraseQuery.append(clauses.get(i + j).val); userPhraseQuery.append(' '); } userPhraseQuery.append('"'); userPhraseQuery.append(' '); } /* for parsing sloppy phrases using DisjunctionMaxQueries */ ExtendedSolrQueryParser pp = new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME); pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, fields); pp.setPhraseSlop(slop); pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords /* :TODO: reevaluate using makeDismax=true vs false... * * The DismaxQueryParser always used DisjunctionMaxQueries for the * pf boost, for the same reasons it used them for the qf fields. * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added * the "makeDismax=false" property to use BooleanQueries instead, but * when asked why his response was "I honestly don't recall" ... * * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813 * * so for now, we continue to use dismax style queries becuse it * seems the most logical and is back compatible, but we should * try to figure out what Yonik was thinking at the time (because he * rarely does things for no reason) */ pp.makeDismax = true; // minClauseSize is independent of the shingleSize because of stop words // (if they are removed from the middle, so be it, but we need at least // two or there shouldn't be a boost) pp.minClauseSize = 2; // TODO: perhaps we shouldn't use synonyms either... Query phrase = pp.parse(userPhraseQuery.toString()); if (phrase != null) { mainQuery.add(phrase, BooleanClause.Occur.SHOULD); } } @Override public String[] getDefaultHighlightFields() { String[] highFields = queryFields.keySet().toArray(new String[0]); return highFields; } @Override public Query getHighlightQuery() throws ParseException { return parsedUserQuery; } public void addDebugInfo(NamedList<Object> debugInfo) { super.addDebugInfo(debugInfo); debugInfo.add("altquerystring", altUserQuery); if (null != boostQueries) { debugInfo.add("boost_queries", boostParams); debugInfo.add("parsed_boost_queries", QueryParsing.toString(boostQueries, getReq().getSchema())); } debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF)); } public static CharSequence partialEscape(CharSequence s) { StringBuilder sb = new StringBuilder(); int len = s.length(); for (int i = 0; i < len; i++) { char c = s.charAt(i); if (c == ':') { // look forward to make sure it's something that won't // cause a parse exception (something that won't be escaped... like // +,-,:, whitespace if (i+1<len && i>0) { char ch = s.charAt(i+1); if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) { // OK, at this point the chars after the ':' will be fine. // now look back and try to determine if this is a fieldname // [+,-]? [letter,_] [letter digit,_,-,.]* // This won't cover *all* possible lucene fieldnames, but we should // only pick nice names to begin with int start, pos; for (start=i-1; start>=0; start--) { ch = s.charAt(start); if (Character.isWhitespace(ch)) break; } // skip whitespace pos = start+1; // skip leading + or - ch = s.charAt(pos); if (ch=='+' || ch=='-') { pos++; } // we don't need to explicitly check for end of string // since ':' will act as our sentinal // first char can't be '-' or '.' ch = s.charAt(pos++); if (Character.isJavaIdentifierPart(ch)) { for(;;) { ch = s.charAt(pos++); if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) { break; } } if (pos<=i) { // OK, we got to the ':' and everything looked like a valid fieldname, so // don't escape the ':' sb.append(':'); continue; // jump back to start of outer-most loop } } } } // we fell through to here, so we should escape this like other reserved chars. sb.append('\\'); } else if (c == '\\' || c == '!' || c == '(' || c == ')' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' ) { sb.append('\\'); } sb.append(c); } return sb; } static class Clause { boolean isBareWord() { return must==0 && !isPhrase; } String field; boolean isPhrase; boolean hasWhitespace; boolean hasSpecialSyntax; boolean syntaxError; char must; // + or - String val; // the field value (minus the field name, +/-, quotes) String raw; // the raw clause w/o leading/trailing whitespace } public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) { ArrayList<Clause> lst = new ArrayList<Clause>(4); Clause clause = new Clause(); int pos=0; int end=s.length(); char ch=0; int start; outer: while (pos < end) { ch = s.charAt(pos); while (Character.isWhitespace(ch)) { if (++pos >= end) break; ch = s.charAt(pos); } start = pos; if (ch=='+' || ch=='-') { clause.must = ch; pos++; } clause.field = getFieldName(s, pos, end); if (clause.field != null) { pos += clause.field.length(); // skip the field name pos++; // skip the ':' } if (pos>=end) break; char inString=0; ch = s.charAt(pos); if (!ignoreQuote && ch=='"') { clause.isPhrase = true; inString = '"'; pos++; } StringBuilder sb = new StringBuilder(); while (pos < end) { ch = s.charAt(pos++); if (ch=='\\') { // skip escaped chars, but leave escaped sb.append(ch); if (pos >= end) { sb.append(ch); // double backslash if we are at the end of the string break; } ch = s.charAt(pos++); sb.append(ch); continue; } else if (inString != 0 && ch == inString) { inString=0; break; } else if (Character.isWhitespace(ch)) { clause.hasWhitespace=true; if (inString == 0) { // end of the token if we aren't in a string, backing // up the position. pos--; break; } } if (inString == 0) { switch (ch) { case '!': case '(': case ')': case ':': case '^': case '[': case ']': case '{': case '}': case '~': case '*': case '?': case '"': case '+': case '-': clause.hasSpecialSyntax = true; sb.append('\\'); } } else if (ch=='"') { // only char we need to escape in a string is double quote sb.append('\\'); } sb.append(ch); } clause.val = sb.toString(); if (clause.isPhrase) { if (inString != 0) { // detected bad quote balancing... retry // parsing with quotes like any other char return splitIntoClauses(s, true); } // special syntax in a string isn't special clause.hasSpecialSyntax = false; } else { // an empty clause... must be just a + or - on it's own if (clause.val.length() == 0) { clause.syntaxError = true; if (clause.must != 0) { clause.val="\\"+clause.must; clause.must = 0; clause.hasSpecialSyntax = true; } else { // uh.. this shouldn't happen. clause=null; } } } if (clause != null) { clause.raw = s.substring(start, pos); lst.add(clause); } clause = new Clause(); } return lst; } public String getFieldName(String s, int pos, int end) { if (pos >= end) return null; int p=pos; int colon = s.indexOf(':',pos); // make sure there is space after the colon, but not whitespace if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null; char ch = s.charAt(p++); if (!Character.isJavaIdentifierPart(ch)) return null; while (p<colon) { ch = s.charAt(p++); if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null; } String fname = s.substring(pos, p); return getReq().getSchema().getFieldTypeNoEx(fname) == null ? null : fname; } public static List<String> split(String s, boolean ignoreQuote) { ArrayList<String> lst = new ArrayList<String>(4); int pos=0, start=0, end=s.length(); char inString=0; char ch=0; while (pos < end) { char prevChar=ch; ch = s.charAt(pos++); if (ch=='\\') { // skip escaped chars pos++; } else if (inString != 0 && ch==inString) { inString=0; } else if (!ignoreQuote && ch=='"') { // If char is directly preceeded by a number or letter // then don't treat it as the start of a string. if (!Character.isLetterOrDigit(prevChar)) { inString=ch; } } else if (Character.isWhitespace(ch) && inString==0) { lst.add(s.substring(start,pos-1)); start=pos; } } if (start < end) { lst.add(s.substring(start,end)); } if (inString != 0) { // unbalanced quote... ignore them return split(s, true); } return lst; } enum QType { FIELD, PHRASE, PREFIX, WILDCARD, FUZZY, RANGE } /** * A subclass of SolrQueryParser that supports aliasing fields for * constructing DisjunctionMaxQueries. */ class ExtendedSolrQueryParser extends SolrQueryParser { /** A simple container for storing alias info */ protected class Alias { public float tie; public Map<String,Float> fields; } boolean makeDismax=true; boolean disableCoord=true; boolean allowWildcard=true; int minClauseSize = 0; // minimum number of clauses per phrase query... // used when constructing boosting part of query via sloppy phrases ExtendedAnalyzer analyzer; /** * Where we store a map from field name we expect to see in our query * string, to Alias object containing the fields to use in our * DisjunctionMaxQuery and the tiebreaker to use. */ protected Map<String,Alias> aliases = new HashMap<String,Alias>(3); public ExtendedSolrQueryParser(QParser parser, String defaultField) { super(parser, defaultField, new ExtendedAnalyzer(parser)); analyzer = (ExtendedAnalyzer)getAnalyzer(); // don't trust that our parent class won't ever change it's default setDefaultOperator(QueryParser.Operator.OR); } public void setRemoveStopFilter(boolean remove) { analyzer.removeStopFilter = remove; } protected Query getBooleanQuery(List clauses, boolean disableCoord) throws ParseException { Query q = super.getBooleanQuery(clauses, disableCoord); if (q != null) { q = QueryUtils.makeQueryable(q); } return q; } //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// protected void addClause(List clauses, int conj, int mods, Query q) { //System.out.println("addClause:clauses="+clauses+" conj="+conj+" mods="+mods+" q="+q); super.addClause(clauses, conj, mods, q); } /** * Add an alias to this query parser. * * @param field the field name that should trigger alias mapping * @param fieldBoosts the mapping from fieldname to boost value that * should be used to build up the clauses of the * DisjunctionMaxQuery. * @param tiebreaker to the tiebreaker to be used in the * DisjunctionMaxQuery * @see SolrPluginUtils#parseFieldBoosts */ public void addAlias(String field, float tiebreaker, Map<String,Float> fieldBoosts) { Alias a = new Alias(); a.tie = tiebreaker; a.fields = fieldBoosts; aliases.put(field, a); } QType type; String field; String val; String val2; boolean bool; float flt; int slop; @Override protected Query getFieldQuery(String field, String val, boolean quoted) throws ParseException { //System.out.println("getFieldQuery: val="+val); this.type = QType.FIELD; this.field = field; this.val = val; this.slop = getPhraseSlop(); // unspecified return getAliasedQuery(); } @Override protected Query getFieldQuery(String field, String val, int slop) throws ParseException { //System.out.println("getFieldQuery: val="+val+" slop="+slop); this.type = QType.PHRASE; this.field = field; this.val = val; this.slop = slop; return getAliasedQuery(); } @Override protected Query getPrefixQuery(String field, String val) throws ParseException { //System.out.println("getPrefixQuery: val="+val); if (val.equals("") && field.equals("*")) { return new MatchAllDocsQuery(); } this.type = QType.PREFIX; this.field = field; this.val = val; return getAliasedQuery(); } @Override protected Query getRangeQuery(String field, String a, String b, boolean inclusive) throws ParseException { //System.out.println("getRangeQuery:"); this.type = QType.RANGE; this.field = field; this.val = a; this.val2 = b; this.bool = inclusive; return getAliasedQuery(); } @Override protected Query getWildcardQuery(String field, String val) throws ParseException { //System.out.println("getWildcardQuery: val="+val); if (val.equals("*")) { if (field.equals("*")) { return new MatchAllDocsQuery(); } else{ return getPrefixQuery(field,""); } } this.type = QType.WILDCARD; this.field = field; this.val = val; return getAliasedQuery(); } @Override protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws ParseException { //System.out.println("getFuzzyQuery: val="+val); this.type = QType.FUZZY; this.field = field; this.val = val; this.flt = minSimilarity; return getAliasedQuery(); } /** * Delegates to the super class unless the field has been specified * as an alias -- in which case we recurse on each of * the aliased fields, and the results are composed into a * DisjunctionMaxQuery. (so yes: aliases which point at other * aliases should work) */ protected Query getAliasedQuery() throws ParseException { Alias a = aliases.get(field); if (a != null) { List<Query> lst = getQueries(a); if (lst == null || lst.size()==0) return getQuery(); // make a DisjunctionMaxQuery in this case too... it will stop // the "mm" processing from making everything required in the case // that the query expanded to multiple clauses. // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway. // if (lst.size()==1) return lst.get(0); if (makeDismax) { DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie); return q; } else { // should we disable coord? BooleanQuery q = new BooleanQuery(disableCoord); for (Query sub : lst) { q.add(sub, BooleanClause.Occur.SHOULD); } return q; } } else { return getQuery(); } } protected List<Query> getQueries(Alias a) throws ParseException { if (a == null) return null; if (a.fields.size()==0) return null; List<Query> lst= new ArrayList<Query>(4); for (String f : a.fields.keySet()) { this.field = f; Query sub = getQuery(); if (sub != null) { Float boost = a.fields.get(f); if (boost != null) { sub.setBoost(boost); } lst.add(sub); } } return lst; } private Query getQuery() throws ParseException { try { switch (type) { case FIELD: // fallthrough case PHRASE: Query query = super.getFieldQuery(field, val, type == QType.PHRASE); if (query instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery)query; if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null; ((PhraseQuery)query).setSlop(slop); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery pq = (MultiPhraseQuery)query; if (minClauseSize > 1 && pq.getTermArrays().size() < minClauseSize) return null; ((MultiPhraseQuery)query).setSlop(slop); } else if (minClauseSize > 1) { // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements return null; } return query; case PREFIX: return super.getPrefixQuery(field, val); case WILDCARD: return super.getWildcardQuery(field, val); case FUZZY: return super.getFuzzyQuery(field, val, flt); case RANGE: return super.getRangeQuery(field, val, val2, bool); } return null; } catch (Exception e) { // an exception here is due to the field query not being compatible with the input text // for example, passing a string to a numeric field. return null; } } } static boolean isEmpty(Query q) { if (q==null) return true; if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true; return false; } } final class ExtendedAnalyzer extends Analyzer { final Map<String, Analyzer> map = new HashMap<String, Analyzer>(); final QParser parser; final Analyzer queryAnalyzer; public boolean removeStopFilter = false; public static TokenizerChain getQueryTokenizerChain(QParser parser, String fieldName) { FieldType ft = parser.getReq().getSchema().getFieldType(fieldName); Analyzer qa = ft.getQueryAnalyzer(); return qa instanceof TokenizerChain ? (TokenizerChain)qa : null; } public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) { TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName); if (tcq == null) return null; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i=0; i<facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { return (StopFilterFactory)tf; } } return null; } public ExtendedAnalyzer(QParser parser) { this.parser = parser; this.queryAnalyzer = parser.getReq().getSchema().getQueryAnalyzer(); } public TokenStream tokenStream(String fieldName, Reader reader) { if (!removeStopFilter) { return queryAnalyzer.tokenStream(fieldName, reader); } Analyzer a = map.get(fieldName); if (a != null) { return a.tokenStream(fieldName, reader); } FieldType ft = parser.getReq().getSchema().getFieldType(fieldName); Analyzer qa = ft.getQueryAnalyzer(); if (!(qa instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tcq = (TokenizerChain)qa; Analyzer ia = ft.getAnalyzer(); if (ia == qa || !(ia instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tci = (TokenizerChain)ia; // make sure that there isn't a stop filter in the indexer for (TokenFilterFactory tf : tci.getTokenFilterFactories()) { if (tf instanceof StopFilterFactory) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } } // now if there is a stop filter in the query analyzer, remove it int stopIdx = -1; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i=0; i<facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { stopIdx = i; break; } } if (stopIdx == -1) { // no stop filter exists map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length-1]; for (int i=0,j=0; i<facs.length; i++) { if (i==stopIdx) continue; newtf[j++] = facs[i]; } TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf); newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName)); map.put(fieldName, newa); return newa.tokenStream(fieldName, reader); } public int getPositionIncrementGap(String fieldName) { return queryAnalyzer.getPositionIncrementGap(fieldName); } public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { if (!removeStopFilter) { return queryAnalyzer.reusableTokenStream(fieldName, reader); } // TODO: done to fix stop word removal bug - could be done while still using resusable? return tokenStream(fieldName, reader); } }