/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search.facet; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.BitDocSet; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.QParser; import org.apache.solr.search.QueryContext; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; import org.apache.solr.util.RTimer; public abstract class FacetProcessor<FacetRequestT extends FacetRequest> { SimpleOrderedMap<Object> response; FacetContext fcontext; FacetRequestT freq; DocSet filter; // additional filters specified by "filter" // TODO: do these need to be on the context to support recomputing during multi-select? LinkedHashMap<String,SlotAcc> accMap; SlotAcc[] accs; CountSlotAcc countAcc; /** factory method for invoking json facet framework as whole. * Note: this is currently only used from SimpleFacets, not from JSON Facet API itself. */ public static FacetProcessor<?> createProcessor(SolrQueryRequest req, Map<String, Object> params, DocSet docs){ FacetParser parser = new FacetTopParser(req); FacetRequest facetRequest = null; try { facetRequest = parser.parse(params); } catch (SyntaxError syntaxError) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); } FacetContext fcontext = new FacetContext(); fcontext.base = docs; fcontext.req = req; fcontext.searcher = req.getSearcher(); fcontext.qcontext = QueryContext.newContext(fcontext.searcher); return facetRequest.createFacetProcessor(fcontext); } FacetProcessor(FacetContext fcontext, FacetRequestT freq) { this.fcontext = fcontext; this.freq = freq; } public Object getResponse() { return response; } public void process() throws IOException { handleDomainChanges(); } private void evalFilters() throws IOException { if (freq.domain.filters == null || freq.domain.filters.isEmpty()) return; List<Query> qlist = new ArrayList<>(freq.domain.filters.size()); // TODO: prevent parsing filters each time! for (Object rawFilter : freq.domain.filters) { if (rawFilter instanceof String) { QParser parser = null; try { parser = QParser.getParser((String)rawFilter, fcontext.req); parser.setIsFilter(true); Query symbolicFilter = parser.getQuery(); qlist.add(symbolicFilter); } catch (SyntaxError syntaxError) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); } } else if (rawFilter instanceof Map) { Map<String,Object> m = (Map<String, Object>) rawFilter; String type; Object args; if (m.size() == 1) { Map.Entry<String, Object> entry = m.entrySet().iterator().next(); type = entry.getKey(); args = entry.getValue(); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't convert map to query:" + rawFilter); } if (!"param".equals(type)) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown type. Can't convert map to query:" + rawFilter); } String tag; if (!(args instanceof String)) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't retrieve non-string param:" + args); } tag = (String)args; String[] qstrings = fcontext.req.getParams().getParams(tag); if (qstrings != null) { for (String qstring : qstrings) { QParser parser = null; try { parser = QParser.getParser((String) qstring, fcontext.req); parser.setIsFilter(true); Query symbolicFilter = parser.getQuery(); qlist.add(symbolicFilter); } catch (SyntaxError syntaxError) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); } } } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter); } } this.filter = fcontext.searcher.getDocSet(qlist); } private void handleDomainChanges() throws IOException { if (freq.domain == null) return; handleFilterExclusions(); // Check filters... if we do have filters they apply after domain changes. // We still calculate them first because we can use it in a parent->child domain change. evalFilters(); handleJoinField(); boolean appliedFilters = handleBlockJoin(); if (this.filter != null && !appliedFilters) { fcontext.base = fcontext.base.intersection( filter ); } } private void handleFilterExclusions() throws IOException { List<String> excludeTags = freq.domain.excludeTags; if (excludeTags == null || excludeTags.size() == 0) { return; } // TODO: somehow remove responsebuilder dependency ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder(); Map tagMap = (Map) rb.req.getContext().get("tags"); if (tagMap == null) { // no filters were tagged return; } IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>(); for (String excludeTag : excludeTags) { Object olst = tagMap.get(excludeTag); // tagMap has entries of List<String,List<QParser>>, but subject to change in the future if (!(olst instanceof Collection)) continue; for (Object o : (Collection<?>)olst) { if (!(o instanceof QParser)) continue; QParser qp = (QParser)o; try { excludeSet.put(qp.getQuery(), Boolean.TRUE); } catch (SyntaxError syntaxError) { // This should not happen since we should only be retrieving a previously parsed query throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); } } } if (excludeSet.size() == 0) return; List<Query> qlist = new ArrayList<>(); // add the base query if (!excludeSet.containsKey(rb.getQuery())) { qlist.add(rb.getQuery()); } // add the filters if (rb.getFilters() != null) { for (Query q : rb.getFilters()) { if (!excludeSet.containsKey(q)) { qlist.add(q); } } } // now walk back up the context tree // TODO: we lose parent exclusions... for (FacetContext curr = fcontext; curr != null; curr = curr.parent) { if (curr.filter != null) { qlist.add( curr.filter ); } } // recompute the base domain fcontext.base = fcontext.searcher.getDocSet(qlist); } /** modifies the context base if there is a join field domain change */ private void handleJoinField() throws IOException { if (null == freq.domain.joinField) return; final Query domainQuery = freq.domain.joinField.createDomainQuery(fcontext); fcontext.base = fcontext.searcher.getDocSet(domainQuery); } // returns "true" if filters were applied to fcontext.base already private boolean handleBlockJoin() throws IOException { boolean appliedFilters = false; if (!(freq.domain.toChildren || freq.domain.toParent)) return appliedFilters; // TODO: avoid query parsing per-bucket somehow... String parentStr = freq.domain.parents; Query parentQuery; try { QParser parser = QParser.getParser(parentStr, fcontext.req); parser.setIsFilter(true); parentQuery = parser.getQuery(); } catch (SyntaxError err) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr); } BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery); DocSet input = fcontext.base; DocSet result; if (freq.domain.toChildren) { // If there are filters on this facet, then use them as acceptDocs when executing toChildren. // We need to remember to not redundantly re-apply these filters after. DocSet acceptDocs = this.filter; if (acceptDocs == null) { acceptDocs = fcontext.searcher.getLiveDocs(); } else { appliedFilters = true; } result = BlockJoin.toChildren(input, parents, acceptDocs, fcontext.qcontext); } else { result = BlockJoin.toParents(input, parents, fcontext.qcontext); } fcontext.base = result; return appliedFilters; } protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException { if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) { bucket.add("count", docCount); return; } createAccs(docCount, 1); int collected = collect(docs, 0); countAcc.incrementCount(0, collected); assert collected == docCount; addStats(bucket, 0); } protected void createAccs(int docCount, int slotCount) throws IOException { accMap = new LinkedHashMap<>(); // allow a custom count acc to be used if (countAcc == null) { countAcc = new CountSlotArrAcc(fcontext, slotCount); countAcc.key = "count"; } for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) { SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount); acc.key = entry.getKey(); accMap.put(acc.key, acc); } accs = new SlotAcc[accMap.size()]; int i=0; for (SlotAcc acc : accMap.values()) { accs[i++] = acc; } } // note: only called by enum/stream prior to collect void resetStats() throws IOException { countAcc.reset(); for (SlotAcc acc : accs) { acc.reset(); } } int collect(DocSet docs, int slot) throws IOException { int count = 0; SolrIndexSearcher searcher = fcontext.searcher; final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); if (ctx == null) { // should be impossible throw new RuntimeException("INTERNAL FACET ERROR"); } segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReader(ctx); } count++; collect(doc - segBase, slot); // per-seg collectors } return count; } void collect(int segDoc, int slot) throws IOException { if (accs != null) { for (SlotAcc acc : accs) { acc.collect(segDoc, slot); } } } void setNextReader(LeafReaderContext ctx) throws IOException { // countAcc.setNextReader is a no-op for (SlotAcc acc : accs) { acc.setNextReader(ctx); } } void addStats(SimpleOrderedMap<Object> target, int slotNum) throws IOException { int count = countAcc.getCount(slotNum); target.add("count", count); if (count > 0 || freq.processEmpty) { for (SlotAcc acc : accs) { acc.setValues(target, slotNum); } } } void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip, Map<String,Object> facetInfo) throws IOException { boolean needDocSet = (skip==false && freq.getFacetStats().size() > 0) || freq.getSubFacets().size() > 0; int count; if (result != null) { count = result.size(); } else if (needDocSet) { if (q == null) { result = fcontext.base; // result.incref(); // OFF-HEAP } else { result = fcontext.searcher.getDocSet(q, fcontext.base); } count = result.size(); // don't really need this if we are skipping, but it's free. } else { if (q == null) { count = fcontext.base.size(); } else { count = fcontext.searcher.numDocs(q, fcontext.base); } } try { if (!skip) { processStats(bucket, result, count); } processSubs(bucket, q, result, skip, facetInfo); } finally { if (result != null) { // result.decref(); // OFF-HEAP result = null; } } } void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip, Map<String,Object> facetInfo) throws IOException { boolean emptyDomain = domain == null || domain.size() == 0; for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) { FacetRequest subRequest = sub.getValue(); // This includes a static check if a sub-facet can possibly produce something from // an empty domain. Should this be changed to a dynamic check as well? That would // probably require actually executing the facet anyway, and dropping it at the // end if it was unproductive. if (emptyDomain && !freq.processEmpty && !subRequest.canProduceFromEmpty()) { continue; } Map<String,Object>facetInfoSub = null; if (facetInfo != null) { facetInfoSub = (Map<String,Object>)facetInfo.get(sub.getKey()); } // If we're skipping this node, then we only need to process sub-facets that have facet info specified. if (skip && facetInfoSub == null) continue; // make a new context for each sub-facet since they can change the domain FacetContext subContext = fcontext.sub(filter, domain); subContext.facetInfo = facetInfoSub; if (!skip) subContext.flags &= ~FacetContext.SKIP_FACET; // turn off the skip flag if we're not skipping this bucket FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext); if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true FacetDebugInfo fdebug = new FacetDebugInfo(); subContext.setDebugInfo(fdebug); fcontext.getDebugInfo().addChild(fdebug); fdebug.setReqDescription(subRequest.getFacetDescription()); fdebug.setProcessor(subProcessor.getClass().getSimpleName()); if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString()); final RTimer timer = new RTimer(); subProcessor.process(); long timeElapsed = (long) timer.getTime(); fdebug.setElapse(timeElapsed); fdebug.putInfoItem("domainSize", (long)subContext.base.size()); } else { subProcessor.process(); } response.add( sub.getKey(), subProcessor.getResponse() ); } } @SuppressWarnings("unused") static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException { SchemaField sf = searcher.getSchema().getField(fieldName); DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false)); DocSet answer = docs.andNot(hasVal); // hasVal.decref(); // OFF-HEAP return answer; } static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException { SchemaField sf = searcher.getSchema().getField(fieldName); Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false); BooleanQuery.Builder noVal = new BooleanQuery.Builder(); noVal.add(hasVal, BooleanClause.Occur.MUST_NOT); return noVal.build(); } }