/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Set; /** * The GraphTermsQuery builds a disjunction query from a list of terms. The terms are first filtered by the maxDocFreq parameter. * This allows graph traversals to skip traversing high frequency nodes which is often desirable from a performance standpoint. * * Syntax: {!graphTerms f=field maxDocFreq=10000}term1,term2,term3 **/ public class GraphTermsQParserPlugin extends QParserPlugin { public static final String NAME = "graphTerms"; @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new QParser(qstr, localParams, params, req) { @Override public Query parse() throws SyntaxError { String fname = localParams.get(QueryParsing.F); FieldType ft = req.getSchema().getFieldTypeNoEx(fname); int maxDocFreq = localParams.getInt("maxDocFreq", Integer.MAX_VALUE); String qstr = localParams.get(QueryParsing.V);//never null if (qstr.length() == 0) { return new MatchNoDocsQuery(); } final String[] splitVals = qstr.split(","); Term[] terms = new Term[splitVals.length]; BytesRefBuilder term = new BytesRefBuilder(); for (int i = 0; i < splitVals.length; i++) { String stringVal = splitVals[i].trim(); if (ft != null) { ft.readableToIndexed(stringVal, term); } else { term.copyChars(stringVal); } BytesRef ref = term.toBytesRef(); terms[i] = new Term(fname, ref); } ArrayUtil.timSort(terms); return new ConstantScoreQuery(new GraphTermsQuery(fname, terms, maxDocFreq)); } }; } private class GraphTermsQuery extends Query implements ExtendedQuery { private Term[] queryTerms; private String field; private int maxDocFreq; private Object id; public GraphTermsQuery(String field, Term[] terms, int maxDocFreq) { this.maxDocFreq = maxDocFreq; this.field = field; this.queryTerms = terms; this.id = new Object(); } //Just for cloning private GraphTermsQuery(String field, Term[] terms, int maxDocFreq, Object id) { this.field = field; this.queryTerms = terms; this.maxDocFreq = maxDocFreq; this.id = id; } public boolean getCache() { return false; } public boolean getCacheSep() { return false; } public void setCacheSep(boolean sep) { } public void setCache(boolean cache) { } public int getCost() { return 1; // Not a post filter. The GraphTermsQuery will typically be used as the main query. } public void setCost(int cost) { } @Override public Query rewrite(IndexReader reader) throws IOException { return this; } public int hashCode() { return 31 * classHash() + id.hashCode(); } public boolean equals(Object other) { return sameClassAs(other) && id == ((GraphTermsQuery) other).id; } public GraphTermsQuery clone() { GraphTermsQuery clone = new GraphTermsQuery(this.field, this.queryTerms, this.maxDocFreq, this.id); return clone; } @Override public String toString(String defaultField) { StringBuilder builder = new StringBuilder(); boolean first = true; for (Term term : this.queryTerms) { if (!first) { builder.append(','); } first = false; builder.append(term.toString()); } return builder.toString(); } private class WeightOrDocIdSet { final Weight weight; final DocIdSet set; WeightOrDocIdSet(DocIdSet bitset) { this.set = bitset; this.weight = null; } } @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { List<TermContext> finalContexts = new ArrayList(); List<Term> finalTerms = new ArrayList(); List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves(); TermContext[] termContexts = new TermContext[this.queryTerms.length]; collectTermContext(searcher.getIndexReader(), contexts, termContexts, this.queryTerms); for(int i=0; i<termContexts.length; i++) { TermContext termContext = termContexts[i]; if(termContext != null && termContext.docFreq() <= this.maxDocFreq) { finalContexts.add(termContext); finalTerms.add(queryTerms[i]); } } return new ConstantScoreWeight(this, boost) { @Override public void extractTerms(Set<Term> terms) { // no-op // This query is for abuse cases when the number of terms is too high to // run efficiently as a BooleanQuery. So likewise we hide its terms in // order to protect highlighters } private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { final LeafReader reader = context.reader(); final Fields fields = reader.fields(); Terms terms = fields.terms(field); if(terms == null) { return new WeightOrDocIdSet(new BitDocIdSet(new FixedBitSet(reader.maxDoc()), 0)); } TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc(), terms); for (int i=0; i<finalContexts.size(); i++) { TermContext termContext = finalContexts.get(i); TermState termState = termContext.get(context.ord); if(termState != null) { Term term = finalTerms.get(i); termsEnum.seekExact(term.bytes(), termContext.get(context.ord)); docs = termsEnum.postings(docs, PostingsEnum.NONE); builder.add(docs); } } return new WeightOrDocIdSet(builder.build()); } private Scorer scorer(DocIdSet set) throws IOException { if (set == null) { return null; } final DocIdSetIterator disi = set.iterator(); if (disi == null) { return null; } return new ConstantScoreScorer(this, score(), disi); } @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { return weightOrBitSet.weight.bulkScorer(context); } else { final Scorer scorer = scorer(weightOrBitSet.set); if (scorer == null) { return null; } return new DefaultBulkScorer(scorer); } } @Override public Scorer scorer(LeafReaderContext context) throws IOException { final WeightOrDocIdSet weightOrBitSet = rewrite(context); if (weightOrBitSet.weight != null) { return weightOrBitSet.weight.scorer(context); } else { return scorer(weightOrBitSet.set); } } }; } private void collectTermContext(IndexReader reader, List<LeafReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(this.field); if (terms == null) { // field does not exist continue; } termsEnum = terms.iterator(); if (termsEnum == TermsEnum.EMPTY) continue; for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; if (termsEnum.seekExact(term.bytes())) { if (termContext == null) { contextArray[i] = new TermContext(reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { termContext.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } } } } }