/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.NamedListHelper;
import org.apache.solr.util.PivotListEntry;
/**
* This is thread safe
* @since solr 4.0
*/
public class PivotFacetHelper
{
protected NamedListHelper namedListHelper = NamedListHelper.INSTANCE;
protected Comparator<NamedList<Object>> namedListCountComparator = new PivotNamedListCountComparator();
/**
* Designed to be overridden by subclasses that provide different faceting implementations.
* TODO: Currently this is returning a SimpleFacets object, but those capabilities would
* be better as an extracted abstract class or interface.
*/
protected SimpleFacets getFacetImplementation(SolrQueryRequest req, DocSet docs, SolrParams params) {
return new SimpleFacets(req, docs, params);
}
public SimpleOrderedMap<List<NamedList<Object>>> process(ResponseBuilder rb, SolrParams params, String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
int minMatch = params.getInt(FacetParams.FACET_PIVOT_MINCOUNT, 1);
boolean distinct = params.getBool( FacetParams.FACET_PIVOT_DISTINCT, false); // distinct pivot?
boolean showDistinctCounts = params.getBool( FacetParams.FACET_PIVOT_DISTINCT, false);
if (showDistinctCounts) {
// force values in facet query to default values when facet.pivot.distinct = true
// facet.mincount = 1 ---- distinct count makes no sense if we filter out valid terms
// facet.limit = -1 ---- distinct count makes no sense if we limit terms
ModifiableSolrParams v = new ModifiableSolrParams(rb.req.getParams());
v.set("facet.mincount", 1);
v.set("facet.limit", -1);
params = v;
rb.req.setParams(params);
}
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<List<NamedList<Object>>>();
for (String pivot : pivots) {
String[] fields = pivot.split(","); // only support two levels for now
int depth = fields.length;
if( fields.length < 2 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least two fields: "+pivot );
}
DocSet docs = rb.getResults().docSet;
String field = fields[0];
Deque<String> fnames = new LinkedList<String>();
for( int i=fields.length-1; i>1; i-- ) {
fnames.push(fields[i]);
}
SimpleFacets sf = getFacetImplementation(rb.req, rb.getResults().docSet, params);
NamedList<Integer> superFacets = sf.getTermCounts(field);
if(fields.length > 1) {
String subField = fields[1];
pivotResponse.add(pivot,
doPivots(superFacets, field, subField, fnames, rb, docs, minMatch, distinct, depth, depth));
}
else {
pivotResponse.add(pivot,
doPivots(superFacets,field,null,fnames,rb,docs, minMatch, distinct, depth, depth));
}
}
return pivotResponse;
}
/**
* Recursive function to do all the pivots
*/
protected List<NamedList<Object>> doPivots( NamedList<Integer> superFacets, String field, String subField, Deque<String> fnames, ResponseBuilder rb, DocSet docs, int minMatch, boolean distinct, int maxDepth, int depth) throws IOException
{
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
SchemaField sfield = searcher.getSchema().getField(field);
FieldType ftype = sfield.getType();
String nextField = fnames.poll();
// when distinct and no subs, dont bother
if (subField == null && distinct == true) {
return new ArrayList<NamedList<Object>>();
}
Query baseQuery = rb.getQuery();
List<NamedList<Object>> values = new ArrayList<NamedList<Object>>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() >= minMatch ) {
// don't reuse the same BytesRef each time since we will be constructing Term
// objects that will most likely be cached.
BytesRef termval = new BytesRef();
ftype.readableToIndexed(kv.getKey(), termval);
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<Object>();
pivot.add("field", field );
pivot.add("value", ftype.toObject(sfield, termval) );
pivot.add("count", kv.getValue() );
// only due stats
DocSet subset = null;
SimpleFacets sf = null;
if (maxDepth != depth) {
Query query = new TermQuery(new Term(field, termval));
subset = searcher.getDocSet(query, docs);
sf = getFacetImplementation(rb.req, subset, rb.req.getParams());
NamedList<Object> subFieldStats = sf.getFacetPercentileCounts();
// if (subFieldStats != null && subFieldStats.size() > 0) {
pivot.add( "statistics", subFieldStats);
// }
}
if( subField == null) {
if (distinct == false) {
values.add( pivot );
}
}
else {
if (sf == null) {
Query query = new TermQuery(new Term(field, termval));
subset = searcher.getDocSet(query, docs);
sf = getFacetImplementation(rb.req, subset, rb.req.getParams());
NamedList<Object> subFieldStats = sf.getFacetPercentileCounts();
// if (subFieldStats != null && subFieldStats.size() > 0) {
pivot.add( "statistics", subFieldStats);
// }
}
NamedList<Integer> nl = sf.getTermCounts(subField);
if (distinct) {
pivot.add("distinct", nl.size());
if (depth > 1) {
List<NamedList<Object>> list = doPivots( nl, subField, nextField, fnames, rb, subset, minMatch, distinct, maxDepth, depth-1 );
// if (list.size() > 0) {
pivot.add( "pivot", list);
// }
values.add( pivot );
}
} else {
if (nl.size() >= minMatch) {
List<NamedList<Object>> list = doPivots( nl, subField, nextField, fnames, rb, subset, minMatch, distinct, maxDepth, depth-1 );
// if (list.size() > 0) {
pivot.add( "pivot", list);
// }
values.add( pivot );
}
}
}
}
}
// put the field back on the list
fnames.push( nextField );
return values;
}
private void mergeValueToMap(Map<Object,NamedList<Object>> polecatCounts,
String field, Object value, Integer count,
List<NamedList<Object>> subPivot, int pivotsDone, int numberOfPivots) {
if (polecatCounts.containsKey(value)) {
polecatCounts.put(
value,
mergePivots(polecatCounts.get(value), count, subPivot, pivotsDone,
numberOfPivots));
} else {
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<Object>();
pivot.add(PivotListEntry.FIELD.getName(), field);
pivot.add(PivotListEntry.VALUE.getName(), value);
pivot.add(PivotListEntry.COUNT.getName(), count);
if (subPivot != null) {
pivot.add(PivotListEntry.PIVOT.getName(),
convertPivotsToMaps(subPivot, pivotsDone, numberOfPivots));
}
polecatCounts.put(value, pivot);
}
}
private NamedList<Object> mergePivots(NamedList<Object> existingNamedList,
Integer countToMerge, List<NamedList<Object>> pivotToMergeList,
int pivotsDone, int numberOfPivots) {
if (countToMerge != null) {
// Cast here, but as we're only putting Integers in above it should be
// fine
existingNamedList.setVal(
PivotListEntry.COUNT.getIndex(),
((Integer) namedListHelper.getFromPivotList(PivotListEntry.COUNT,
existingNamedList)) + countToMerge);
}
if (pivotToMergeList != null) {
Object existingPivotObj = namedListHelper.getFromPivotList(
PivotListEntry.PIVOT, existingNamedList);
if (existingPivotObj instanceof Map) {
for (NamedList<Object> pivotToMerge : pivotToMergeList) {
String nextFieldToMerge = (String) namedListHelper.getFromPivotList(
PivotListEntry.FIELD, pivotToMerge);
Object nextValueToMerge = namedListHelper.getFromPivotList(
PivotListEntry.VALUE, pivotToMerge);
Integer nextCountToMerge = (Integer) namedListHelper
.getFromPivotList(PivotListEntry.COUNT, pivotToMerge);
Object nextPivotToMergeListObj = namedListHelper.getFromPivotList(
PivotListEntry.PIVOT, pivotToMerge);
List nextPivotToMergeList = null;
if (nextPivotToMergeListObj instanceof List) {
nextPivotToMergeList = (List) nextPivotToMergeListObj;
}
mergeValueToMap((Map) existingPivotObj, nextFieldToMerge,
nextValueToMerge, nextCountToMerge, nextPivotToMergeList,
pivotsDone++, numberOfPivots);
}
} else {
existingNamedList.add(
PivotListEntry.PIVOT.getName(),
convertPivotsToMaps(pivotToMergeList, pivotsDone + 1,
numberOfPivots));
}
}
return existingNamedList;
}
public Map<Object,NamedList<Object>> convertPivotsToMaps(
List<NamedList<Object>> pivots, int pivotsDone, int numberOfPivots) {
return convertPivotsToMaps(pivots, pivotsDone, numberOfPivots, null);
}
public Map<Object,NamedList<Object>> convertPivotsToMaps(
List<NamedList<Object>> pivots, int pivotsDone, int numberOfPivots,
Map<Integer,Map<Object,Integer>> fieldCounts) {
Map<Object,NamedList<Object>> pivotMap = new HashMap<Object,NamedList<Object>>();
boolean countFields = (fieldCounts != null);
Map<Object,Integer> thisFieldCountMap = null;
if (countFields) {
thisFieldCountMap = getFieldCountMap(fieldCounts, pivotsDone);
}
for (NamedList<Object> pivot : pivots) {
Object valueObj = namedListHelper.getFromPivotList(PivotListEntry.VALUE,
pivot);
pivotMap.put(valueObj, pivot);
if (countFields) {
Object countObj = namedListHelper.getFromPivotList(
PivotListEntry.COUNT, pivot);
int count = 0;
if (countObj instanceof Integer) {
count = (Integer) countObj;
}
addFieldCounts(valueObj, count, thisFieldCountMap);
}
if (pivotsDone < numberOfPivots) {
Integer pivotIdx = pivot.indexOf(PivotListEntry.PIVOT.getName(), 0);
if (pivotIdx > -1) {
Object pivotObj = pivot.getVal(pivotIdx);
if (pivotObj instanceof List) {
pivot.setVal(
pivotIdx,
convertPivotsToMaps((List) pivotObj, pivotsDone + 1,
numberOfPivots, fieldCounts));
}
}
}
}
return pivotMap;
}
public List<NamedList<Object>> convertPivotMapToList(
Map<Object,NamedList<Object>> pivotMap, int numberOfPivots) {
return convertPivotMapToList(pivotMap, new InternalPivotLimitInfo(), 0,
numberOfPivots, false);
}
private List<NamedList<Object>> convertPivotMapToList(
Map<Object,NamedList<Object>> pivotMap,
InternalPivotLimitInfo pivotLimitInfo, int currentPivot,
int numberOfPivots, boolean sortByCount) {
List<NamedList<Object>> pivots = new ArrayList<NamedList<Object>>();
currentPivot++;
List<Object> fieldLimits = null;
InternalPivotLimitInfo nextPivotLimitInfo = new InternalPivotLimitInfo(
pivotLimitInfo);
if (pivotLimitInfo.combinedPivotLimit
&& pivotLimitInfo.fieldLimitsList.size() > 0) {
fieldLimits = pivotLimitInfo.fieldLimitsList.get(0);
nextPivotLimitInfo.fieldLimitsList = pivotLimitInfo.fieldLimitsList
.subList(1, pivotLimitInfo.fieldLimitsList.size());
}
for (Entry<Object,NamedList<Object>> pivot : pivotMap.entrySet()) {
if (pivotLimitInfo.limit == 0 || !pivotLimitInfo.combinedPivotLimit
|| fieldLimits == null || fieldLimits.contains(pivot.getKey())) {
pivots.add(pivot.getValue());
convertPivotEntryToListType(pivot.getValue(), nextPivotLimitInfo,
currentPivot, numberOfPivots, sortByCount);
}
}
if (sortByCount) {
Collections.sort(pivots, namedListCountComparator);
}
if (!pivotLimitInfo.combinedPivotLimit && pivotLimitInfo.limit > 0
&& pivots.size() > pivotLimitInfo.limit) {
pivots = new ArrayList<NamedList<Object>>(pivots.subList(0,
pivotLimitInfo.limit));
}
return pivots;
}
public SimpleOrderedMap<List<NamedList<Object>>> convertPivotMapsToList(
SimpleOrderedMap<Map<Object,NamedList<Object>>> pivotValues,
PivotLimitInfo pivotLimitInfo, boolean sortByCount) {
SimpleOrderedMap<List<NamedList<Object>>> pivotsLists = new SimpleOrderedMap<List<NamedList<Object>>>();
for (Entry<String,Map<Object,NamedList<Object>>> pivotMapEntry : pivotValues) {
String pivotName = pivotMapEntry.getKey();
Integer numberOfPivots = 1 + StringUtils.countMatches(pivotName, ",");
InternalPivotLimitInfo internalPivotLimitInfo = new InternalPivotLimitInfo(
pivotLimitInfo, pivotName);
pivotsLists.add(
pivotName,
convertPivotMapToList(pivotMapEntry.getValue(),
internalPivotLimitInfo, 0, numberOfPivots, sortByCount));
}
return pivotsLists;
}
private void convertPivotEntryToListType(NamedList<Object> pivotEntry,
InternalPivotLimitInfo pivotLimitInfo, int pivotsDone,
int numberOfPivots, boolean sortByCount) {
if (pivotsDone < numberOfPivots) {
int pivotIdx = pivotEntry.indexOf(PivotListEntry.PIVOT.getName(), 0);
if (pivotIdx > -1) {
Object subPivotObj = pivotEntry.getVal(pivotIdx);
if (subPivotObj instanceof Map) {
Map<Object,NamedList<Object>> subPivotMap = (Map) subPivotObj;
pivotEntry.setVal(
pivotIdx,
convertPivotMapToList(subPivotMap, pivotLimitInfo, pivotsDone,
numberOfPivots, sortByCount));
}
}
}
}
public Map<Object,Integer> getFieldCountMap(
Map<Integer,Map<Object,Integer>> fieldCounts, int pivotNumber) {
Map<Object,Integer> fieldCountMap = fieldCounts.get(pivotNumber);
if (fieldCountMap == null) {
fieldCountMap = new HashMap<Object,Integer>();
fieldCounts.put(pivotNumber, fieldCountMap);
}
return fieldCountMap;
}
public void addFieldCounts(Object name, int count,
Map<Object,Integer> thisFieldCountMap) {
Integer existingFieldCount = thisFieldCountMap.get(name);
if (existingFieldCount == null) {
thisFieldCountMap.put(name, count);
} else {
thisFieldCountMap.put(name, existingFieldCount + count);
}
}
public static class PivotLimitInfo {
public SimpleOrderedMap<List<List<Object>>> fieldLimitsMap = null;
public int limit = 0;
public boolean combinedPivotLimit = false;
}
private static class InternalPivotLimitInfo {
public List<List<Object>> fieldLimitsList = null;
public int limit = 0;
public boolean combinedPivotLimit = false;
private InternalPivotLimitInfo() {}
private InternalPivotLimitInfo(PivotLimitInfo pivotLimitInfo,
String pivotName) {
this.limit = pivotLimitInfo.limit;
this.combinedPivotLimit = pivotLimitInfo.combinedPivotLimit;
if (pivotLimitInfo.fieldLimitsMap != null) {
this.fieldLimitsList = pivotLimitInfo.fieldLimitsMap.get(pivotName);
}
}
private InternalPivotLimitInfo(InternalPivotLimitInfo pivotLimitInfo) {
this.fieldLimitsList = pivotLimitInfo.fieldLimitsList;
this.limit = pivotLimitInfo.limit;
this.combinedPivotLimit = pivotLimitInfo.combinedPivotLimit;
}
}
// TODO: This is code from various patches to support distributed search.
// Some parts may be helpful for whoever implements distributed search.
//
// @Override
// public int distributedProcess(ResponseBuilder rb) throws IOException {
// if (!rb.doFacets) {
// return ResponseBuilder.STAGE_DONE;
// }
//
// if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// SolrParams params = rb.req.getParams();
// String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
// for ( ShardRequest sreq : rb.outgoing ) {
// if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0
// && sreq.shards != null && sreq.shards.length == 1 ) {
// sreq.params.set( FacetParams.FACET, "true" );
// sreq.params.set( FacetParams.FACET_PIVOT, pivots );
// sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything
// }
// }
// }
// return ResponseBuilder.STAGE_DONE;
// }
//
// @Override
// public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
// if (!rb.doFacets) return;
//
//
// if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
// if ( null == tf ) {
// tf = new SimpleOrderedMap<List<NamedList<Object>>>();
// rb._pivots = tf;
// }
// for (ShardResponse srsp: sreq.responses) {
// int shardNum = rb.getShardNum(srsp.getShard());
//
// NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
//
// // handle facet trees from shards
// SimpleOrderedMap<List<NamedList<Object>>> shard_pivots =
// (SimpleOrderedMap<List<NamedList<Object>>>)facet_counts.get( PIVOT_KEY );
//
// if ( shard_pivots != null ) {
// for (int j=0; j< shard_pivots.size(); j++) {
// // TODO -- accumulate the results from each shard
// // The following code worked to accumulate facets for an previous
// // two level patch... it is here for reference till someone can upgrade
// /**
// String shard_tree_name = (String) shard_pivots.getName( j );
// SimpleOrderedMap<NamedList> shard_tree = (SimpleOrderedMap<NamedList>)shard_pivots.getVal( j );
// SimpleOrderedMap<NamedList> facet_tree = tf.get( shard_tree_name );
// if ( null == facet_tree) {
// facet_tree = new SimpleOrderedMap<NamedList>();
// tf.add( shard_tree_name, facet_tree );
// }
//
// for( int o = 0; o < shard_tree.size() ; o++ ) {
// String shard_outer = (String) shard_tree.getName( o );
// NamedList shard_innerList = (NamedList) shard_tree.getVal( o );
// NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer );
// if ( null == tree_innerList ) {
// tree_innerList = new NamedList();
// facet_tree.add( shard_outer, tree_innerList );
// }
//
// for ( int i = 0 ; i < shard_innerList.size() ; i++ ) {
// String shard_term = (String) shard_innerList.getName( i );
// long shard_count = ((Number) shard_innerList.getVal(i)).longValue();
// int tree_idx = tree_innerList.indexOf( shard_term, 0 );
//
// if ( -1 == tree_idx ) {
// tree_innerList.add( shard_term, shard_count );
// } else {
// long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue();
// tree_innerList.setVal( tree_idx, shard_count + tree_count );
// }
// } // innerList loop
// } // outer loop
// **/
// } // each tree loop
// }
// }
// }
// return ;
// }
//
// @Override
// public void finishStage(ResponseBuilder rb) {
// if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// // wait until STAGE_GET_FIELDS
// // so that "result" is already stored in the response (for aesthetics)
//
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
//
// // get 'facet_counts' from the response
// NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
// if (facetCounts == null) {
// facetCounts = new NamedList();
// rb.rsp.add("facet_counts", facetCounts);
// }
// facetCounts.add( PIVOT_KEY, tf );
// rb._pivots = null;
// }
//
// public String getDescription() {
// return "Handle Pivot (multi-level) Faceting";
// }
//
// public String getSource() {
// return "$URL: http://svn.apache.org/repos/asf/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java $";
// }
}