/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request.mdrill;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.compare.ColumnKey;
import org.apache.solr.request.compare.GroupbyAgent;
import org.apache.solr.request.compare.GroupbyItem;
import org.apache.solr.request.compare.GroupbyRow;
import org.apache.solr.request.compare.MergerDetailSelectDetailRowCompare;
import org.apache.solr.request.compare.MergerGroupByGroupbyRowCompare;
import org.apache.solr.request.compare.SelectDetailRow;
import org.apache.solr.request.compare.UniqTypeNum;
import org.apache.solr.request.join.HigoJoinSort;
import org.apache.solr.request.join.HigoJoinUtils;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexReader;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import com.alimama.mdrill.distinct.DistinctCount.DistinctCountAutoAjuest;
import com.alimama.mdrill.utils.UniqConfig;
public class FacetComponent extends SearchComponent
{
private static Logger LOG = Logger.getLogger(FacetComponent.class);
public static final String COMPONENT_NAME = "facet";
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
rb.setNeedDocSet( true );
rb.doFacets = true;
}
}
public static ThreadPoolExecutor SUBMIT_POOL = new ThreadPoolExecutor(Math.max(UniqConfig.getFacetThreads()/2, 1),UniqConfig.getFacetThreads(), 100L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
@Override
public void process(ResponseBuilder rb1) throws IOException
{
if (!rb1.doFacets) {
return ;
}
final ResponseBuilder rb=rb1;
final long t1=System.currentTimeMillis();
ExecutorCompletionService<String> submit=new ExecutorCompletionService<String>(SUBMIT_POOL);
Callable<String> task = new Callable<String>() {
public String call() throws Exception {
long t2=System.currentTimeMillis();
SolrParams params = rb.req.getParams();
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
try{
if (null != facetFs) {
boolean isdetail = params.getBool(FacetParams.FACET_CROSS_DETAIL,false);
Object res= FacetComponent.this.getResult(isdetail,rb.req.getSearcher(), params,rb.req,facetFs,rb.getResults().docSet);
rb.rsp.add( "mdrill_data", res);
}else{
throw new Exception("null != facetFs");
}
}catch(Throwable e)
{
LOG.error("getFacetCounts",e);
throw new SolrException(ErrorCode.SERVER_ERROR,e);
}
long t3=System.currentTimeMillis();
LOG.info("####task####"+(t3-t2)+","+(t2-t1));
return "";
}
};
submit.submit(task);
try {
submit.take().get();
} catch (Throwable e) {
throw new IOException(e);
}
}
private Object getResult(boolean isdetail,SolrIndexSearcher searcher,SolrParams params,SolrQueryRequest req,String[] fields, DocSet base)throws Exception
{
String crcget=params.get("mdrill.crc.key.get",null);
if(crcget!=null&&(params.getBool("fetchfdt", false)||isdetail))
{
SolrIndexReader reader=searcher.getReader();
IndexReader.InvertParams invparam=new IndexReader.InvertParams();
invparam.searcher=searcher;
invparam.params=params;
invparam.fields=fields;
invparam.base=base;
invparam.req=req;
invparam.isdetail=isdetail;
IndexReader.InvertResult result=reader.invertScan(searcher.getSchema(), invparam);
ArrayList<NamedList> resultlist=result.getResult();
Map<Long,String> crcvalue=new HashMap<Long,String>();
for(NamedList nl:resultlist)
{
crcvalue.putAll((Map<? extends Long, ? extends String>) nl.get("fdtcre"));
}
return crcvalue;
}
if(crcget!=null)
{
ConcurrentHashMap<Long,String> cache=MdrillUtils.CRC_CACHE_SIZE.remove(crcget);
Map<Long,String> crcvalue=new HashMap<Long,String>();
if(cache==null)
{
return crcvalue;
}
String crcliststr=params.get("mdrill.crc.key.get.crclist");
if(crcliststr!=null)
{
String[] crclist=crcliststr.split(",");
for(String s:crclist)
{
Long crc=Long.parseLong(s);
String v=cache.get(crc);
if(v!=null)
{
crcvalue.put(crc, v);
}
}
}
return crcvalue;
}
SolrIndexReader reader=searcher.getReader();
IndexReader.InvertParams invparam=new IndexReader.InvertParams();
invparam.searcher=searcher;
invparam.params=params;
invparam.fields=fields;
invparam.base=base;
invparam.req=req;
invparam.isdetail=isdetail;
IndexReader.InvertResult result=reader.invertScan(searcher.getSchema(), invparam);
ArrayList<NamedList> resultlist=result.getResult();
if(resultlist.size()==1)
{
return resultlist.get(0);
}
FacetComponent.FacetInfo fi = new FacetComponent.FacetInfo(params);
DistribFieldFacet dff = fi.cross;
dff.isdetail=isdetail;
for (NamedList nl: resultlist) {
dff.add(nl);
}
int offset = params.getInt(FacetParams.FACET_CROSS_OFFSET, 0);
int limit = params.getInt(FacetParams.FACET_CROSS_LIMIT, 100);
int limit_offset = offset + limit;
NamedList fieldCounts = new NamedList();
GroupbyItem[] counts = dff.getPairSorted(limit_offset);
if(dff.recordcount!=null)
{
GroupbyItem recordcount=dff.recordcount;
fieldCounts.add("count", recordcount.toNamedList());
}
ArrayList<Object> list=new ArrayList<Object>();
int end = limit_offset> counts.length ?counts.length:limit_offset;
for (int i=offset; i<end; i++) {
list.add(counts[i].toNamedList());
}
fieldCounts.add("list", list);
return fieldCounts;
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
return ResponseBuilder.STAGE_DONE;
}
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doFacets)
{return;
}
FacetInfo fi = rb._facetInfo;
if (fi == null) {
rb._facetInfo = fi = new FacetInfo(rb.req.getParams());
}
sreq.params.remove(FacetParams.FACET_MINCOUNT);
sreq.params.remove(FacetParams.FACET_OFFSET);
sreq.params.remove(FacetParams.FACET_LIMIT);
if(sreq.params.getBool("fetchfdt", false))
{
int offset=sreq.params.getInt(FacetParams.FACET_CROSS_OFFSET,0);
int limit=sreq.params.getInt(FacetParams.FACET_CROSS_LIMIT,0);
sreq.params.remove(FacetParams.FACET_CROSS_OFFSET);
sreq.params.remove(FacetParams.FACET_CROSS_LIMIT);
sreq.params.set(FacetParams.FACET_CROSS_OFFSET, 0);
sreq.params.set(FacetParams.FACET_CROSS_LIMIT, offset+limit);
}else{
sreq.params.remove(FacetParams.FACET_CROSS_OFFSET);
sreq.params.remove(FacetParams.FACET_CROSS_LIMIT);
int maxlimit=MdrillGroupBy.MAX_CROSS_ROWS;
sreq.params.set(FacetParams.FACET_CROSS_OFFSET, 0);
sreq.params.set(FacetParams.FACET_CROSS_LIMIT, maxlimit);
}
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doFacets){
return;
}
long t1=System.currentTimeMillis();
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp: sreq.responses) {
NamedList<Object> rspq=srsp.getSolrResponse().getResponse();
Map<String,String> shardtime=(Map<String,String>)rspq.get("mdrill_shard_time");
if(shardtime!=null)
{
rb.timetaken.putAll(shardtime);
}
Object facet_counts = rspq.get("mdrill_data");
if(facet_counts==null)
{
SolrCore.log.error("mdrill_data is null "+srsp.getShard(),new Exception());
continue;
}
if(rb.req.getParams().get("mdrill.crc.key.get",null)!=null)
{
rb.crcvalue.putAll((Map<Long,String>)facet_counts);
}else{
fi.cross.add((NamedList)facet_counts);
}
}
long t2=System.currentTimeMillis();
LOG.info("##countFacets## time taken "+(t2-t1)+",responses.size="+sreq.responses.size());
}
@Override
public void finishStage(ResponseBuilder rb) {
if (!rb.doFacets){
return;
}
long t1=System.currentTimeMillis();
FacetInfo fi = rb._facetInfo;
if(rb.req.getParams().get("mdrill.crc.key.get",null)!=null)
{
rb.rsp.add("mdrill_data", rb.crcvalue);
}else{
NamedList fieldCounts = new SimpleOrderedMap();
DistribFieldFacet dff=fi.cross;
int saverecords=dff.offset + dff.limit;
GroupbyItem[] counts = dff.getPairSorted(saverecords);
if(dff.recordcount!=null)
{
GroupbyItem recordcount=dff.recordcount;
fieldCounts.add("count", recordcount.toNamedList());
}
int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
ArrayList<Object> list=new ArrayList<Object>();
for (int i=dff.offset; i<end; i++) {
GroupbyItem item=counts[i];
list.add(item.toNamedList());
}
fieldCounts.add("list", list);
rb.rsp.add("mdrill_data", fieldCounts);
}
rb.rsp.add("mdrill_shard_time", rb.timetaken);
rb.crcvalue=new HashMap<Long,String>() ;
rb.timetaken=new LinkedHashMap<String,String>();
rb._facetInfo = null; // could be big, so release asap
long t2=System.currentTimeMillis();
LOG.info("##finishStage## time taken "+(t2-t1));
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "Handle Faceting";
}
@Override
public String getVersion() {
return "$Revision: 1152531 $";
}
@Override
public String getSourceId() {
return "$Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $";
}
@Override
public String getSource() {
return "$URL: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_3_5/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java $";
}
@Override
public URL[] getDocs() {
return null;
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FacetInfo {
DistribFieldFacet cross;
public FacetInfo(SolrParams params) {
this.cross = new DistribFieldFacet(params,"solrCorssFields_s");
}
}
public static class FieldFacet {
private String key;
public int offset;
public int limit;
// public boolean isFinalResult = true;
public String sort_fl = null;
public String sort_type = null;
public boolean isdesc = true;
public String[] facetFs = null;
public HigoJoinSort[] joinSort={};
public String[] crossFs ;
public String[] distFS ;
public boolean isdetail;
public String sort_column_type;
public FieldFacet(SolrParams params, String facetStr) {
this.key = facetStr;
this.offset = params.getInt(FacetParams.FACET_CROSS_OFFSET, 0);
this.limit = params.getInt(FacetParams.FACET_CROSS_LIMIT, 100);
// boolean issub = params.getBool(FacetParams.IS_SUB_SHARDS, false);
// if (issub) {
// isFinalResult = false;
// } else {
// isFinalResult = true;
// }
this.isdetail = params.getBool(FacetParams.FACET_CROSS_DETAIL,false);
if (this.isdetail) {
this.sort_type = "detailMerge";
} else {
this.sort_type = params.get(FacetParams.FACET_CROSS_SORT_TYPE,"index");
}
this.sort_fl = params.get(FacetParams.FACET_CROSS_SORT_FL, null);
this.isdesc = params.getBool(FacetParams.FACET_CROSS_SORT_ISDESC,true);
this.crossFs = params.getParams(FacetParams.FACET_CROSS_FL);
this.distFS = params.getParams(FacetParams.FACET_CROSSDIST_FL);
this.facetFs = params.getParams(FacetParams.FACET_FIELD);
String[] joinList = params.getParams(HigoJoinUtils.getTables());
if (joinList == null) {
joinList = new String[0];
}
this.joinSort = new HigoJoinSort[joinList.length];
for (int i = 0; i < joinList.length; i++) {
this.joinSort[i] = new HigoJoinSort(joinList[i], params);
}
if (UniqTypeNum.parseSelectDetailType(this.facetFs, joinSort) != null) {
this.sort_column_type = "string";
} else {
this.sort_column_type = params.get("facet.cross.sort.cp");
}
}
public MergerGroupByGroupbyRowCompare createMergerGroupCmp()
{
return new MergerGroupByGroupbyRowCompare(this.sort_column_type,this.facetFs, this.crossFs,this.distFS, this.joinSort, this.sort_fl, this.sort_type, this.isdesc);
}
public MergerDetailSelectDetailRowCompare createMergerDetailCmp()
{
return new MergerDetailSelectDetailRowCompare(this.sort_column_type,this.isdesc);
}
public String getKey() { return key; }
}
public static class DistribFieldFacet extends FieldFacet {
public HashMap<ColumnKey,GroupbyRow> counts = new HashMap<ColumnKey,GroupbyRow>(128);
public ArrayList<SelectDetailRow> countsDetail = new ArrayList<SelectDetailRow>(128);
DistinctCountAutoAjuest autoDist=new DistinctCountAutoAjuest(UniqConfig.DistinctCountSize());
public GroupbyItem recordcount=null;
DistribFieldFacet(SolrParams localParams, String facetStr) {
super(localParams, facetStr);
}
long add(NamedList shardCounts) {
long t1 = System.currentTimeMillis();
ArrayList<Object> count=(ArrayList<Object>) shardCounts.get("count");
if(count!=null)
{
GroupbyAgent p = new GroupbyAgent(count);
p.setCross(this.crossFs, this.distFS);
if (recordcount == null) {
recordcount = p;
} else {
recordcount.shardsMerge(p);
}
}
ArrayList<Object> list=(ArrayList<Object>) shardCounts.get("list");
int sz = list == null ? 0 : list.size();
for (int i = 0; i < sz; i++) {
ArrayList<Object> obj=(ArrayList<Object>)list.get(i);
GroupbyAgent p = new GroupbyAgent(obj);
p.setCross(this.crossFs, this.distFS);
if (this.isdetail) {
countsDetail.add((SelectDetailRow) p.getRaw());
} else {
GroupbyRow row=(GroupbyRow) p.getRaw();
GroupbyRow sfc = counts.get(row.getKey());
if (sfc == null) {
row.setDist(autoDist);
counts.put(row.getKey(), row);
} else {
sfc.shardsMerge(row);
}
}
}
long t2 = System.currentTimeMillis();
return t2 - t1;
}
public GroupbyItem[] getPairSortedDetail(int saverecords) {
long t1=System.currentTimeMillis();
int sz=countsDetail.size();
final MergerDetailSelectDetailRowCompare cmp=this.createMergerDetailCmp();
if(sz<=(saverecords*2))
{
SelectDetailRow[] arr = new SelectDetailRow[sz];
int index=0;
for(SelectDetailRow f:countsDetail)
{
arr[index]=f;
index++;
}
Arrays.sort(arr, cmp);
long t2=System.currentTimeMillis();
LOG.info("####merger sort#### by array sort size="+sz+",timetaken="+(t2-t1));
return arr;
}
PriorityQueue<SelectDetailRow> res = new PriorityQueue<SelectDetailRow>(saverecords, Collections.reverseOrder(cmp));
for(SelectDetailRow f:countsDetail)
{
SelectDetailRow mrow=f;
if (res.size() < saverecords) {
res.add(mrow);
} else if (cmp.compare(res.peek(), mrow) > 0) {
res.add(mrow);
res.poll();
}
}
SelectDetailRow[] rtn=new SelectDetailRow[res.size()];
res.toArray(rtn);
Arrays.sort(rtn, cmp);
long t2=System.currentTimeMillis();
LOG.info("####merger sort#### by PriorityQueue size="+rtn.length+"@"+sz+",timetaken="+(t2-t1));
return rtn;
}
public GroupbyItem[] getPairSortedGroup(int saverecords) {
long t1=System.currentTimeMillis();
Collection<GroupbyRow> collections=counts.values();
int sz=counts.size();
final MergerGroupByGroupbyRowCompare cmp=this.createMergerGroupCmp();
if(sz<=(saverecords*2))
{
GroupbyRow[] arr = new GroupbyRow[sz];
int index=0;
for(GroupbyRow f:collections)
{
arr[index]=f;
index++;
}
Arrays.sort(arr, cmp);
long t2=System.currentTimeMillis();
LOG.info("####merger sort#### by array sort size="+sz+",timetaken="+(t2-t1));
return arr;
}
PriorityQueue<GroupbyRow> res = new PriorityQueue<GroupbyRow>(saverecords, Collections.reverseOrder(cmp));
for(GroupbyRow f:collections)
{
GroupbyRow mrow=f;
if (res.size() < saverecords) {
res.add(mrow);
} else if (cmp.compare(res.peek(), mrow) > 0) {
res.add(mrow);
res.poll();
}
}
GroupbyRow[] rtn=new GroupbyRow[res.size()];
res.toArray(rtn);
Arrays.sort(rtn, cmp);
long t2=System.currentTimeMillis();
LOG.info("####merger sort#### by PriorityQueue size="+rtn.length+"@"+sz+",timetaken="+(t2-t1));
return rtn;
}
public GroupbyItem[] getPairSorted(int saverecords) {
if(this.isdetail)
{
return getPairSortedDetail(saverecords);
}else{
return getPairSortedGroup( saverecords);
}
}
}
}