/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.RequiredSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.handler.component.PivotFacetHelper.PivotLimitInfo;
import org.apache.solr.request.FacetPercentiles;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.util.NamedListHelper;
import org.apache.solr.util.PivotListEntry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URL;
import java.util.*;
/**
* TODO!
*
*
* @since solr 1.3
*/
public class FacetComponent extends SearchComponent
{
public static Logger log = LoggerFactory.getLogger(FacetComponent.class);
public static final String COMPONENT_NAME = "facet";
static final String PIVOT_KEY = "facet_pivot";
PivotFacetHelper pivotHelper;
protected NamedListHelper namedListHelper = NamedListHelper.INSTANCE;
@Override
public void init( NamedList args )
{
pivotHelper = new PivotFacetHelper(); // Maybe this would configurable?
}
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
if (rb.req.getParams().getBool(FacetParams.FACET, false)) {
rb.setNeedDocSet(true);
rb.doFacets = true;
rb.doPercentiles = rb.req.getParams().getBool(FacetParams.PERCENTILE, false);
}
}
/**
* Actually run the query
* @param rb The response builder {@link ResponseBuilder}
*/
@Override
public void process(ResponseBuilder rb) throws IOException
{
if (rb.doFacets) {
SolrParams params = rb.req.getParams();
SimpleFacets f = new SimpleFacets(rb.req,
rb.getResults().docSet,
params,
rb );
NamedList<Object> counts = f.getFacetCounts();
String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
if( pivots != null && pivots.length > 0 ) {
NamedList v = pivotHelper.process(rb, params, pivots);
if( v != null ) {
counts.add(PIVOT_KEY, v);
}
}
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add("facet_counts", counts);
}
}
private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// overlap facet refinement requests (those shards that we need a count for
// particular facet values from), where possible, with
// the requests to get fields (because we know that is the
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
List<String> refinements = null;
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
if (!dff.needRefinements) continue;
List<String> refList = dff._toRefine[shardNum];
if (refList == null || refList.size() == 0) continue;
String key = dff.getKey(); // reuse the same key that was used for the main facet
String termsKey = key + "__terms";
String termsVal = StrUtils.join(refList, ',');
String facetCommand;
// add terms into the original facet.field command
// do it via parameter reference to avoid another layer of encoding.
String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
if (dff.localParams != null) {
facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
} else {
facetCommand = commandPrefix + termsKeyEncoded + '}' + dff.field;
}
if (refinements == null) {
refinements = new ArrayList<String>();
}
refinements.add(facetCommand);
refinements.add(termsKey);
refinements.add(termsVal);
}
if (refinements == null) continue;
String shard = rb.shards[shardNum];
ShardRequest refine = null;
boolean newRequest = false;
// try to find a request that is already going out to that shard.
// If nshards becomes to great, we way want to move to hashing for better
// scalability.
for (ShardRequest sreq : rb.outgoing) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
&& sreq.shards != null
&& sreq.shards.length==1
&& sreq.shards[0].equals(shard))
{
refine = sreq;
break;
}
}
if (refine == null) {
// we didn't find any other suitable requests going out to that shard, so
// create one ourselves.
newRequest = true;
refine = new ShardRequest();
refine.shards = new String[] {rb.shards[shardNum]};
refine.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
refine.params.remove(CommonParams.START);
refine.params.set(CommonParams.ROWS, "0");
}
refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
refine.params.set(FacetParams.FACET, "true");
refine.params.remove(FacetParams.FACET_FIELD);
refine.params.remove(FacetParams.FACET_QUERY);
for (int i = 0; i < refinements.size();) {
String facetCommand = refinements.get(i++);
String termsKey = refinements.get(i++);
String termsVal = refinements.get(i++);
refine.params.add(FacetParams.FACET_FIELD, facetCommand);
refine.params.set(termsKey, termsVal);
}
if (newRequest) {
rb.addRequest(this, refine);
}
}
}
return ResponseBuilder.STAGE_DONE;
}
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS;
if(rb.isDistrib && rb.doPercentiles) {
sreq.params.set(FacetParams.PERCENTILE_DISTRIBUTED, "true");
}
FacetInfo fi = rb._facetInfo;
if (fi == null) {
rb._facetInfo = fi = new FacetInfo();
fi.parse(rb.req.getParams(), rb);
// should already be true...
// sreq.params.set(FacetParams.FACET, "true");
}
sreq.params.remove(FacetParams.FACET_MINCOUNT);
sreq.params.remove(FacetParams.FACET_OFFSET);
sreq.params.remove(FacetParams.FACET_LIMIT);
for (DistribFieldFacet dff : fi.facets.values()) {
String paramStart = "f." + dff.field + '.';
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit;
if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
if (dff.limit > 0) {
// set the initial limit higher to increase accuracy
dff.initialLimit = (int) (dff.initialLimit * 1.5) + 10;
dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
} else {
// if limit==-1, then no need to artificially lower mincount to 0 if it's 1
dff.initialMincount = Math.min(dff.minCount, 1);
}
} else {
// we're sorting by index order.
// if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
// if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
// if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
// For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
// For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
// For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
// we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as
// much as when sorting by count.
if (dff.minCount <= 1) {
dff.initialMincount = dff.minCount;
} else {
dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.slices.length);
// dff.initialMincount = 1;
}
}
if (dff.initialMincount != 0) {
sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
}
// Currently this is for testing only and allows overriding of the
// facet.limit set to the shards
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
}
} else {
// turn off faceting on other requests
sreq.params.set(FacetParams.FACET, "false");
// we could optionally remove faceting params
}
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS) != 0) {
countFacets(rb, sreq);
} else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS) != 0) {
refineFacets(rb, sreq);
}
}
private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
SimpleOrderedMap<Map<Object,NamedList<Object>>> pivotFacetsMap = null;
SimpleOrderedMap<Map<Integer,Map<Object,Integer>>> fieldCountsMap = null;
boolean pivoting = false;
PivotLimitInfo limitInfo = null;
List<String> facetLimitIgnoreFieldList = null;
SimpleOrderedMap<List<Integer>> facetLimitIgnoreIndexsMap = null;
boolean sortPivotsByCount = true;
for (ShardResponse srsp: sreq.responses) {
int shardNum = rb.getShardNum(srsp.getShard());
NamedList facet_counts = null;
try {
facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
}
catch(Exception ex) {
if(rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) {
continue; // looks like a shard did not return anything
}
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to read facet info for shard: "+srsp.getShard(), ex);
}
// handle facet queries
NamedList facet_queries = (NamedList) facet_counts.get("facet_queries");
if (facet_queries != null) {
for (int i = 0; i < facet_queries.size(); i++) {
String returnedKey = facet_queries.getName(i);
long count = ((Number) facet_queries.getVal(i)).longValue();
QueryFacet qf = fi.queryFacets.get(returnedKey);
qf.count += count;
}
}
// step through each facet.field, adding results from this shard
NamedList facet_fields = (NamedList) facet_counts.get("facet_fields");
if (facet_fields != null) {
for (DistribFieldFacet dff : fi.facets.values()) {
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
// Distributed facet_dates
//
// The implementation below uses the first encountered shard's
// facet_dates as the basis for subsequent shards' data to be merged.
// (the "NOW" param should ensure consistency)
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
facet_counts.get("facet_dates");
if (facet_dates != null) {
// go through each facet_date
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
final String field = entry.getKey();
if (fi.dateFacets.get(field) == null) {
// first time we've seen this field, no merging
fi.dateFacets.add(field, entry.getValue());
} else {
// not the first time, merge current field
SimpleOrderedMap<Object> shardFieldValues
= entry.getValue();
SimpleOrderedMap<Object> existFieldValues
= fi.dateFacets.get(field);
for (Map.Entry<String,Object> existPair : existFieldValues) {
final String key = existPair.getKey();
if (key.equals("gap") ||
key.equals("end") ||
key.equals("start")) {
// we can skip these, must all be the same across shards
continue;
}
// can be null if inconsistencies in shards responses
Integer newValue = (Integer) shardFieldValues.get(key);
if (null != newValue) {
Integer oldValue = ((Integer) existPair.getValue());
existPair.setValue(oldValue + newValue);
}
}
}
}
}
// Distributed facet_ranges
//
// The implementation below uses the first encountered shard's
// facet_ranges as the basis for subsequent shards' data to be merged.
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
facet_counts.get("facet_ranges");
if (facet_ranges != null) {
// go through each facet_range
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
final String field = entry.getKey();
if (fi.rangeFacets.get(field) == null) {
// first time we've seen this field, no merging
fi.rangeFacets.add(field, entry.getValue());
} else {
// not the first time, merge current field counts
@SuppressWarnings("unchecked")
NamedList<Integer> shardFieldValues
= (NamedList<Integer>) entry.getValue().get("counts");
@SuppressWarnings("unchecked")
NamedList<Integer> existFieldValues
= (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
for (Map.Entry<String,Integer> existPair : existFieldValues) {
final String key = existPair.getKey();
// can be null if inconsistencies in shards responses
Integer newValue = shardFieldValues.get(key);
if (null != newValue) {
Integer oldValue = existPair.getValue();
existPair.setValue(oldValue + newValue);
}
}
}
}
}
// Distributed facet_pivots
//
// The implementation below uses the first encountered shard's
// facet_pivots as the basis for subsequent shards' data to be merged.
@SuppressWarnings("unchecked")
SimpleOrderedMap<List<NamedList<Object>>> facet_pivot = (SimpleOrderedMap<List<NamedList<Object>>>) facet_counts
.get("facet_pivot");
if (facet_pivot != null) {
if (pivotFacetsMap == null) {
pivoting = true;
SolrParams params = rb.req.getParams();
String facetSort = params.get(FacetParams.FACET_SORT);
if (facetSort != null
&& facetSort.equals(FacetParams.FACET_SORT_INDEX)) {
sortPivotsByCount = false;
}
limitInfo = new PivotLimitInfo();
limitInfo.limit = params.getInt(FacetParams.FACET_LIMIT, 0);
String facetLimitMethod = params
.get(FacetParams.FACET_PIVOT_LIMIT_METHOD);
if (facetLimitMethod != null
&& facetLimitMethod
.equals(FacetParams.COMBINED_PIVOT_FACET_LIMIT)) {
limitInfo.combinedPivotLimit = true;
fieldCountsMap = new SimpleOrderedMap<Map<Integer,Map<Object,Integer>>>();
facetLimitIgnoreIndexsMap = new SimpleOrderedMap<List<Integer>>();
}
String facetLimitIgnores = params
.get(FacetParams.FACET_PIVOT_LIMIT_IGNORE);
if (facetLimitIgnores != null) {
facetLimitIgnoreFieldList = Arrays.asList(facetLimitIgnores
.split(","));
}
pivotFacetsMap = new SimpleOrderedMap<Map<Object,NamedList<Object>>>();
}
// go through each facet_pivot
for (Map.Entry<String,List<NamedList<Object>>> pivot : facet_pivot) {
final String pivotName = pivot.getKey();
final Integer numberOfPivots;
if (facetLimitIgnoreFieldList != null) {
List<Integer> facetLimitIgnoreIndexList = new ArrayList<Integer>();
List<String> pivotFields = Arrays.asList(pivotName.split(","));
for (String facetLimitIgnore : facetLimitIgnoreFieldList) {
int thisIndex = pivotFields.indexOf(facetLimitIgnore);
if (thisIndex > -1) {
// Add one here because pivots start from 1, whereas list indexs
// start from 0
facetLimitIgnoreIndexList.add(thisIndex + 1);
}
}
facetLimitIgnoreIndexsMap.add(pivotName, facetLimitIgnoreIndexList);
numberOfPivots = pivotFields.size();
} else {
numberOfPivots = 1 + StringUtils.countMatches(pivotName, ",");
}
Map<Integer,Map<Object,Integer>> fieldCounts = null;
if (limitInfo.combinedPivotLimit) {
fieldCounts = fieldCountsMap.get(pivotName);
if (fieldCounts == null) {
fieldCounts = new HashMap<Integer,Map<Object,Integer>>();
fieldCountsMap.add(pivotName, fieldCounts);
}
}
Map<Object,NamedList<Object>> pivotValues = pivotFacetsMap
.get(pivotName);
if (pivotValues == null) {
// first time we've seen this pivot, no merging
pivotFacetsMap.add(pivotName, pivotHelper.convertPivotsToMaps(
pivot.getValue(), 1, numberOfPivots, fieldCounts));
} else {
// not the first time, merge
@SuppressWarnings("unchecked")
List<NamedList<Object>> shardPivotValues = (List<NamedList<Object>>) pivot
.getValue();
mergePivotFacet(pivotValues, shardPivotValues, 1, numberOfPivots,
fieldCounts);
}
}
}
}
// set pivot facets from map
if (pivoting) {
if (limitInfo.combinedPivotLimit) {
Comparator<Entry<Object,Integer>> entryCountComparator = new EntryCountComparator();
limitInfo.fieldLimitsMap = new SimpleOrderedMap<List<List<Object>>>();
for (Entry<String,Map<Integer,Map<Object,Integer>>> fieldCountsEntry : fieldCountsMap) {
List<Integer> facetLimitIgnoreIndexs = facetLimitIgnoreIndexsMap
.get(fieldCountsEntry.getKey());
List<List<Object>> limitedValuesForPivot = new ArrayList<List<Object>>();
Integer pivot = 1;
Map<Object,Integer> fieldCountsForPivot = fieldCountsEntry.getValue()
.get(pivot);
while (fieldCountsForPivot != null) {
List<Object> limitedValuesForField = null;
if ((facetLimitIgnoreIndexs == null || !facetLimitIgnoreIndexs
.contains(pivot))
&& fieldCountsForPivot.size() > limitInfo.limit) {
limitedValuesForField = new ArrayList<Object>();
List<Entry<Object,Integer>> fieldCountsForPivotList = new ArrayList<Map.Entry<Object,Integer>>(
fieldCountsForPivot.entrySet());
Collections.sort(fieldCountsForPivotList, entryCountComparator);
for (int valueIndex = 0; valueIndex < limitInfo.limit; valueIndex++) {
limitedValuesForField.add(fieldCountsForPivotList.get(
valueIndex).getKey());
}
}
limitedValuesForPivot.add(limitedValuesForField);
fieldCountsForPivot = fieldCountsEntry.getValue().get(++pivot);
}
limitInfo.fieldLimitsMap.add(fieldCountsEntry.getKey(),
limitedValuesForPivot);
}
}
fi.pivotFacets = pivotHelper.convertPivotMapsToList(pivotFacetsMap,
limitInfo, sortPivotsByCount);
}
//
// This code currently assumes that there will be only a single
// request ((with responses from all shards) sent out to get facets...
// otherwise we would need to wait until all facet responses were received.
//
for (DistribFieldFacet dff : fi.facets.values()) {
// no need to check these facets for refinement
if (dff.initialLimit <= 0 && dff.initialMincount == 0) continue;
// only other case where index-sort doesn't need refinement is if minCount==0
if (dff.minCount == 0 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
@SuppressWarnings("unchecked") // generic array's are annoying
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
dff._toRefine = tmp;
ShardFacetCount[] counts = dff.getCountSorted();
int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
long smallestCount = counts.length == 0 ? 0 : counts[ntop - 1].count;
for (int i = 0; i < counts.length; i++) {
ShardFacetCount sfc = counts[i];
boolean needRefinement = false;
if (i < ntop) {
// automatically flag the top values for refinement
// this should always be true for facet.sort=index
needRefinement = true;
} else {
// this logic should only be invoked for facet.sort=index (for now)
// calculate the maximum value that this term may have
// and if it is >= smallestCount, then flag for refinement
long maxCount = sfc.count;
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
OpenBitSet obs = dff.counted[shardNum];
if (obs!=null && !obs.get(sfc.termNum)) { // obs can be null if a shard request failed
// if missing from this shard, add the max it could be
maxCount += dff.maxPossible(sfc, shardNum);
}
}
if (maxCount >= smallestCount) {
// TODO: on a tie, we could check the term values
needRefinement = true;
}
}
if (needRefinement) {
// add a query for each shard missing the term that needs refinement
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
OpenBitSet obs = dff.counted[shardNum];
if(obs!=null && !obs.get(sfc.termNum) && dff.maxPossible(sfc,shardNum)>0) {
dff.needRefinements = true;
List<String> lst = dff._toRefine[shardNum];
if (lst == null) {
lst = dff._toRefine[shardNum] = new ArrayList<String>();
}
lst.add(sfc.name);
}
}
}
}
}
}
private void mergePivotFacet(Map<Object,NamedList<Object>> pivotValues, List<NamedList<Object>> shardPivotValues, int currentPivot, int numberOfPivots, Map<Integer,Map<Object,Integer>> fieldCounts) {
Iterator<NamedList<Object>> shardPivotValuesIterator = shardPivotValues.iterator();
boolean countFields = (fieldCounts != null);
Map<Object,Integer> thisFieldCountMap = null;
if (countFields) {
thisFieldCountMap = pivotHelper.getFieldCountMap(fieldCounts,currentPivot);
}
while (shardPivotValuesIterator.hasNext()) {
NamedList<Object> shardPivotValue = shardPivotValuesIterator.next();
Object valueObj = namedListHelper.getFromPivotList(PivotListEntry.VALUE,shardPivotValue);
Object shardCountObj = namedListHelper.getFromPivotList(PivotListEntry.COUNT, shardPivotValue);
int shardCount = 0;
if (shardCountObj instanceof Integer) {
shardCount = (Integer) shardCountObj;
}
if (countFields) {
pivotHelper.addFieldCounts(valueObj, shardCount, thisFieldCountMap);
}
NamedList<Object> pivotValue = pivotValues.get(valueObj);
if (pivotValue == null) {
// pivot value not found, add to existing values
pivotValues.put(valueObj, shardPivotValue);
if (currentPivot < numberOfPivots) {
int pivotIdx = shardPivotValue.indexOf(PivotListEntry.PIVOT.getName(), 0);
Object shardPivotObj = shardPivotValue.getVal(pivotIdx);
if (shardPivotObj instanceof List) {
shardPivotValue.setVal(pivotIdx, pivotHelper.convertPivotsToMaps((List) shardPivotObj, currentPivot + 1, numberOfPivots,fieldCounts));
}
}
} else {
Object existingCountObj = namedListHelper.getFromPivotList(PivotListEntry.COUNT, pivotValue);
if (existingCountObj instanceof Integer) {
int countIdx = pivotValue.indexOf(PivotListEntry.COUNT.getName(), 0);
pivotValue.setVal(countIdx, ((Integer) existingCountObj) + shardCount);
} else {
StringBuffer errMsg = new StringBuffer("Count value for pivot field: ");
errMsg.append(namedListHelper.getFromPivotList(PivotListEntry.FIELD,pivotValue));
errMsg.append(" with value: ");
errMsg.append(namedListHelper.getFromPivotList(PivotListEntry.VALUE,pivotValue));
errMsg.append(" is not of type Integer. Cannot increment count for this pivot. Count is of type: ");
errMsg.append(existingCountObj.getClass().getCanonicalName());
errMsg.append(" and value: ").append(existingCountObj);
log.error(errMsg.toString());
}
NamedList<NamedList<Object>> existingStatistics = (NamedList<NamedList<Object>>)namedListHelper.getFromPivotList(PivotListEntry.STATISTICS, pivotValue);
NamedList<NamedList<Object>> shardStatistics = (NamedList<NamedList<Object>>)namedListHelper.getFromPivotList(PivotListEntry.STATISTICS, shardPivotValue);
int statsIndex = pivotValue.indexOf(PivotListEntry.STATISTICS.getName(), 0);
pivotValue.setVal(statsIndex, mergePivotStatistics(existingStatistics, shardStatistics));
if (currentPivot < numberOfPivots) {
Object shardPivotObj = shardPivotValue.get("pivot");
Object pivotObj = pivotValue.get("pivot");
if (shardPivotObj instanceof List) {
if (pivotObj instanceof Map) {
mergePivotFacet((Map) pivotObj, (List) shardPivotObj,currentPivot + 1, numberOfPivots, fieldCounts);
} else {
pivotValue.add("pivot", pivotHelper.convertPivotsToMaps((List) shardPivotObj, currentPivot + 1, numberOfPivots,fieldCounts));
}
}
}
shardPivotValuesIterator.remove();
}
}
}
private Object mergePivotStatistics(NamedList<NamedList<Object>> existingFields, NamedList<NamedList<Object>> shardFields) {
boolean haveExistingStats = existingFields != null;
boolean haveShardStats = shardFields != null;
if(haveExistingStats && !haveShardStats)
return existingFields;
if(!haveExistingStats && haveShardStats)
return shardFields;
if(!haveExistingStats && !haveShardStats)
return null;
//stats->fields->{buckets,totalCount}
Iterator shardFieldsIterator = shardFields.iterator();
while(shardFieldsIterator.hasNext()) {
Entry<String,NamedList<Object>> shardSingleFieldStatistics = (Entry<String,NamedList<Object>>)shardFieldsIterator.next();
String fieldName = shardSingleFieldStatistics.getKey();
NamedList<Object> shardSingleFieldStatisticsData = shardSingleFieldStatistics.getValue();
int shardFieldBucketTotal = (Integer)shardSingleFieldStatisticsData.get(FacetParams.PERCENTILE_SHARD_TOTAL_COUNT);
if(shardFieldBucketTotal == 0) {
continue;
}
NamedList<Object> existingSingleFieldStatistics = (NamedList<Object>) existingFields.get(fieldName);
if (existingSingleFieldStatistics == null) {
existingFields.add(fieldName, shardSingleFieldStatisticsData);
continue;
}
else {
int fieldIndex = existingFields.indexOf(fieldName, 0);
existingFields.setVal(fieldIndex, mergeFieldStatistics(existingSingleFieldStatistics, shardSingleFieldStatisticsData));
}
}
return existingFields;
}
private NamedList<Object> mergeFieldStatistics(NamedList<Object> existingSingleFieldStatistics,NamedList<Object> shardSingleFieldStatistics) {
NamedList<Integer> existingBuckets = (NamedList<Integer>)existingSingleFieldStatistics.get(FacetParams.PERCENTILE_BUCKETS);
NamedList<Integer> shardBuckets = (NamedList<Integer>)shardSingleFieldStatistics.get(FacetParams.PERCENTILE_BUCKETS);
NamedList<Object> mergedBuckets = new NamedList<Object>();
NamedList<Object> mergedStatistics = new NamedList<Object>();
Double existingAverage;
Double shardAverage;
Double shardTotal = 0D;
int existingPercentilesCount;
int shardPercentilesCount;
//left-padded numeric bucket names with 0's to size of upper fence, rely on string sorting between bucket lists
//solr datetime format is lex sortable
while(existingBuckets.size() > 0 && shardBuckets.size() > 0) {
int comparison = shardBuckets.getName(0).compareTo(existingBuckets.getName(0));
if(comparison > 0) {
mergedBuckets.add(existingBuckets.getName(0), existingBuckets.remove(0));
}
else if(comparison < 0) {
mergedBuckets.add(shardBuckets.getName(0), shardBuckets.remove(0));
}
else if(comparison == 0) {
mergedBuckets.add(shardBuckets.getName(0), shardBuckets.remove(0) + existingBuckets.remove(0));
}
}
while(existingBuckets.size() > 0) {
mergedBuckets.add(existingBuckets.getName(0), existingBuckets.remove(0));
}
while(shardBuckets.size() > 0) {
mergedBuckets.add(shardBuckets.getName(0), shardBuckets.remove(0));
}
mergedStatistics.add(FacetParams.PERCENTILE_BUCKETS, mergedBuckets);
//don't forget the counts
int shardFieldBucketTotal = (Integer)shardSingleFieldStatistics.get(FacetParams.PERCENTILE_SHARD_TOTAL_COUNT);
int existingFieldBucketTotal = (Integer)existingSingleFieldStatistics.get(FacetParams.PERCENTILE_SHARD_TOTAL_COUNT);
mergedStatistics.add(FacetParams.PERCENTILE_SHARD_TOTAL_COUNT, shardFieldBucketTotal + existingFieldBucketTotal);
//check for and include averages
shardAverage = (Double)shardSingleFieldStatistics.get("percentiles_average");
if(shardAverage == null) {
//do nothing further, averages were not requested
}
else {
shardTotal += (Double)shardSingleFieldStatistics.get("percentiles_sum");
shardPercentilesCount = (Integer)shardSingleFieldStatistics.get("percentiles_count");
existingAverage = (Double)existingSingleFieldStatistics.get("percentiles_average");
existingPercentilesCount = (Integer)existingSingleFieldStatistics.get("percentiles_count");
int mergedCount = existingPercentilesCount + shardPercentilesCount;
double mergedAverage = ((shardPercentilesCount / mergedCount) * shardAverage) + ((existingPercentilesCount / mergedCount) * existingAverage);
mergedStatistics.add("percentiles_average", mergedAverage);
mergedStatistics.add("percentiles_count", mergedCount);
mergedStatistics.add("percentiles_sum", shardTotal);
}
return mergedStatistics;
}
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp : sreq.responses) {
// int shardNum = rb.getShardNum(srsp.shard);
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
NamedList facet_fields = (NamedList) facet_counts.get("facet_fields");
if (facet_fields == null) continue; // this can happen when there's an exception
for (int i = 0; i < facet_fields.size(); i++) {
String key = facet_fields.getName(i);
DistribFieldFacet dff = fi.facets.get(key);
if (dff == null) continue;
NamedList shardCounts = (NamedList) facet_fields.getVal(i);
for (int j = 0; j < shardCounts.size(); j++) {
String name = shardCounts.getName(j);
long count = ((Number) shardCounts.getVal(j)).longValue();
ShardFacetCount sfc = dff.counts.get(name);
if (sfc == null) {
// we got back a term we didn't ask for?
log.error("Unexpected term returned for facet refining. key=" + key + " term='" + name + "'"
+ "\n\trequest params=" + sreq.params
+ "\n\ttoRefine=" + dff._toRefine
+ "\n\tresponse=" + shardCounts
);
continue;
}
sfc.count += count;
}
}
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// wait until STAGE_GET_FIELDS
// so that "result" is already stored in the response (for aesthetics)
FacetInfo fi = rb._facetInfo;
NamedList<Object> facet_counts = new SimpleOrderedMap<Object>();
NamedList<Number> facet_queries = new SimpleOrderedMap<Number>();
facet_counts.add("facet_queries", facet_queries);
for (QueryFacet qf : fi.queryFacets.values()) {
facet_queries.add(qf.getKey(), num(qf.count));
}
NamedList<Object> facet_fields = new SimpleOrderedMap<Object>();
facet_counts.add("facet_fields", facet_fields);
for (DistribFieldFacet dff : fi.facets.values()) {
NamedList<Object> fieldCounts = new NamedList<Object>(); // order is more important for facets
facet_fields.add(dff.getKey(), fieldCounts);
ShardFacetCount[] counts;
boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT);
if (countSorted) {
counts = dff.countSorted;
if (counts == null || dff.needRefinements) {
counts = dff.getCountSorted();
}
} else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) {
counts = dff.getLexSorted();
} else { // TODO: log error or throw exception?
counts = dff.getLexSorted();
}
if (countSorted) {
int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
for (int i = dff.offset; i < end; i++) {
if (counts[i].count < dff.minCount) {
break;
}
fieldCounts.add(counts[i].name, num(counts[i].count));
}
} else {
int off = dff.offset;
int lim = dff.limit >= 0 ? dff.limit : Integer.MAX_VALUE;
// index order...
for (int i = 0; i < counts.length; i++) {
long count = counts[i].count;
if (count < dff.minCount) continue;
if (off > 0) {
off--;
continue;
}
if (lim <= 0) {
break;
}
lim--;
fieldCounts.add(counts[i].name, num(count));
}
}
if (dff.missing) {
fieldCounts.add(null, num(dff.missingCount));
}
}
facet_counts.add("facet_dates", fi.dateFacets);
facet_counts.add("facet_ranges", fi.rangeFacets);
if(fi.pivotFacets.size() > 0) {
if(rb.doPercentiles) {
SolrParams solrParams = rb.req.getParams();
fi.pivotFacets = convertPivotStatisticsBucketsToPercentiles(fi.pivotFacets, solrParams);
}
facet_counts.add("facet_pivot", fi.pivotFacets);
}
rb.rsp.add("facet_counts", facet_counts);
rb._facetInfo = null; // could be big, so release asap
}
private SimpleOrderedMap<List<NamedList<Object>>> convertPivotStatisticsBucketsToPercentiles(
SimpleOrderedMap<List<NamedList<Object>>> pivotFacets, SolrParams required) {
for(int i =0; i < pivotFacets.size(); i++) {
pivotFacets.setVal(i, convertPivotList(pivotFacets.getVal(i), required));
}
return pivotFacets;
}
private List<NamedList<Object>> convertPivotList(List<NamedList<Object>> val, SolrParams required) {
for(int i =0; i < val.size(); i++) {
val.set(i, convertPivotStatistics(val.get(i),required));
}
return val;
}
private NamedList<Object> convertPivotStatistics(NamedList<Object> thisPivot, SolrParams required) {
int pivotIndex = thisPivot.indexOf(PivotListEntry.PIVOT.getName(), 0);
if(pivotIndex > -1) {
ArrayList<Object> furtherPivots = (ArrayList<Object>)thisPivot.getVal(pivotIndex);
ArrayList<Object> convertedFurtherPivots = new ArrayList<Object>();
for(int i = 0; i < furtherPivots.size(); i++) {
convertedFurtherPivots.add(convertPivotStatistics((NamedList<Object>)furtherPivots.get(i),required));
}
thisPivot.setVal(pivotIndex, convertedFurtherPivots);
}
int statsIndex = thisPivot.indexOf(PivotListEntry.STATISTICS.getName(), 0);
if(statsIndex > -1) {
thisPivot.setVal(statsIndex, convertPivotStatisticsFields((NamedList<Object>)thisPivot.getVal(statsIndex), required));
}
return thisPivot;
}
private Object convertPivotStatisticsFields(NamedList<Object> listOfFields, SolrParams required) {
for(int i =0 ; i < listOfFields.size(); i++ ) {
String fieldName = listOfFields.getName(i);
listOfFields.setVal(i, convertOnePivotStatisticsField((NamedList<Object>)listOfFields.getVal(i), required, fieldName));
}
return listOfFields;
}
private NamedList<Object> convertOnePivotStatisticsField(NamedList<Object> statsData, SolrParams solrParams, String fieldName) {
Integer totalCount = (Integer)statsData.get(FacetParams.PERCENTILE_SHARD_TOTAL_COUNT);
if(totalCount != null) {
RequiredSolrParams required = new RequiredSolrParams(solrParams);
String[] requestedPercentiles = required.getFieldParams(fieldName, FacetParams.PERCENTILE_REQUESTED_PERCENTILES);
boolean calculateAverages = solrParams.getBool(FacetParams.PERCENTILE_AVERAGES, false);
Integer bucketsIndex = statsData.indexOf(FacetParams.PERCENTILE_BUCKETS, 0);
FacetPercentiles fp = new FacetPercentiles(requestedPercentiles, totalCount);
if(bucketsIndex > -1) {
NamedList<Integer> buckets = (NamedList<Integer>)statsData.getVal(bucketsIndex);
for(int i =0; i < buckets.size() && (fp.stillLookingForPercentiles() || calculateAverages); i++) {
fp.processFacetCount(buckets.getName(i), buckets.getVal(i));
if(calculateAverages) {
fp.accumulateAverage(buckets.getName(i), buckets.getVal(i));
}
}
}
statsData = new NamedList<Object>();
statsData.add(FacetParams.PERCENTILE, fp.getPercentiles());
if(calculateAverages) {
statsData.add("percentiles_average", fp.getAverage());
statsData.add("percentiles_count", fp.getTotalCount());
statsData.add("percentiles_sum", fp.getTotal());
}
} else {
statsData = new NamedList<Object>();
}
return statsData;
}
// use <int> tags for smaller facet counts (better back compatibility)
private Number num(long val) {
if (val < Integer.MAX_VALUE) return (int)val;
else return val;
}
private Number num(Long val) {
if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
else return val;
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "Handle Faceting";
}
@Override
public String getSource() {
return "$URL: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_0/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java $";
}
@Override
public URL[] getDocs() {
return null;
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FacetInfo {
public LinkedHashMap<String,QueryFacet> queryFacets;
public LinkedHashMap<String,DistribFieldFacet> facets;
public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
public SimpleOrderedMap<List<NamedList<Object>>> pivotFacets = new SimpleOrderedMap<List<NamedList<Object>>>();
void parse(SolrParams params, ResponseBuilder rb) {
queryFacets = new LinkedHashMap<String,QueryFacet>();
facets = new LinkedHashMap<String,DistribFieldFacet>();
String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
if (facetQs != null) {
for (String query : facetQs) {
QueryFacet queryFacet = new QueryFacet(rb, query);
queryFacets.put(queryFacet.getKey(), queryFacet);
}
}
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
if (facetFs != null) {
for (String field : facetFs) {
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
facets.put(ff.getKey(), ff);
}
}
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FacetBase {
String facetType; // facet.field, facet.query, etc (make enum?)
String facetStr; // original parameter value of facetStr
String facetOn; // the field or query, absent localParams if appropriate
private String key; // label in the response for the result... "foo" for {!key=foo}myfield
SolrParams localParams; // any local params for the facet
public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
this.facetType = facetType;
this.facetStr = facetStr;
try {
this.localParams = QueryParsing.getLocalParams(facetStr, rb.req.getParams());
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
this.facetOn = facetStr;
this.key = facetStr;
if (localParams != null) {
// remove local params unless it's a query
if (!facetType.equals(FacetParams.FACET_QUERY)) {
facetOn = localParams.get(CommonParams.VALUE);
key = facetOn;
}
key = localParams.get(CommonParams.OUTPUT_KEY, key);
}
}
/** returns the key in the response that this facet will be under */
public String getKey() { return key; }
public String getType() { return facetType; }
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class QueryFacet extends FacetBase {
public long count;
public QueryFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_QUERY, facetStr);
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FieldFacet extends FacetBase {
public String field; // the field to facet on... "myfield" for {!key=foo}myfield
public FieldType ftype;
public int offset;
public int limit;
public int minCount;
public String sort;
public boolean missing;
public String prefix;
public long missingCount;
public FieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_FIELD, facetStr);
fillParams(rb, rb.req.getParams(), facetOn);
}
private void fillParams(ResponseBuilder rb, SolrParams params, String field) {
this.field = field;
this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
if (mincount == null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros != null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
this.minCount = mincount;
this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting by count if there is a limit.
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
this.sort = FacetParams.FACET_SORT_COUNT;
} else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
this.sort = FacetParams.FACET_SORT_INDEX;
}
this.prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class DistribFieldFacet extends FieldFacet {
public List<String>[] _toRefine; // a List<String> of refinements needed, one for each shard.
// SchemaField sf; // currently unneeded
// the max possible count for a term appearing on no list
public long missingMaxPossible;
// the max possible count for a missing term for each shard (indexed by shardNum)
public long[] missingMax;
public OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
public HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128);
public int termNum;
public int initialLimit; // how many terms requested in first phase
public int initialMincount; // mincount param sent to each shard
public boolean needRefinements;
public ShardFacetCount[] countSorted;
DistribFieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, facetStr);
// sf = rb.req.getSchema().getField(field);
missingMax = new long[rb.shards.length];
counted = new OpenBitSet[rb.shards.length];
}
void add(int shardNum, NamedList shardCounts, int numRequested) {
// shardCounts could be null if there was an exception
int sz = shardCounts == null ? 0 : shardCounts.size();
int numReceived = sz;
OpenBitSet terms = new OpenBitSet(termNum + sz);
long last = 0;
for (int i = 0; i < sz; i++) {
String name = shardCounts.getName(i);
long count = ((Number) shardCounts.getVal(i)).longValue();
if (name == null) {
missingCount += count;
numReceived--;
} else {
ShardFacetCount sfc = counts.get(name);
if (sfc == null) {
sfc = new ShardFacetCount();
sfc.name = name;
sfc.indexed = ftype == null ? sfc.name : ftype.toInternal(sfc.name);
sfc.termNum = termNum++;
counts.put(name, sfc);
}
sfc.count += count;
terms.fastSet(sfc.termNum);
last = count;
}
}
// the largest possible missing term is initialMincount if we received less
// than the number requested.
if (numRequested < 0 || numRequested != 0 && numReceived < numRequested) {
last = initialMincount;
}
missingMaxPossible += last;
missingMax[shardNum] = last;
counted[shardNum] = terms;
}
public ShardFacetCount[] getLexSorted() {
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
return o1.indexed.compareTo(o2.indexed);
}
});
countSorted = arr;
return arr;
}
public ShardFacetCount[] getCountSorted() {
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
if (o2.count < o1.count) return -1;
else if (o1.count < o2.count) return 1;
return o1.indexed.compareTo(o2.indexed);
}
});
countSorted = arr;
return arr;
}
// returns the max possible value this ShardFacetCount could have for this shard
// (assumes the shard did not report a count for this value)
long maxPossible(ShardFacetCount sfc, int shardNum) {
return missingMax[shardNum];
// TODO: could store the last term in the shard to tell if this term
// comes before or after it. If it comes before, we could subtract 1
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class ShardFacetCount {
public String name;
public String indexed; // the indexed form of the name... used for comparisons.
public long count;
public int termNum; // term number starting at 0 (used in bit arrays)
@Override
public String toString() {
return "{term=" + name + ",termNum=" + termNum + ",count=" + count + "}";
}
}
}