package org.apache.solr.request.mdrill; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.PriorityQueue; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; import org.apache.log4j.Logger; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.queryParser.ParseException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; import com.alimama.mdrill.utils.EncodeUtils; import com.alimama.mdrill.utils.UniqConfig; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.compare.ColumnKey; import org.apache.solr.request.compare.GroupbyRow; import org.apache.solr.request.compare.MergerGroupByGroupbyRowCompare; import org.apache.solr.request.compare.RecordCount; import org.apache.solr.request.compare.ShardGroupByTermNum; import org.apache.solr.request.join.HigoJoinInvert; import org.apache.solr.request.mdrill.MdrillUtils.*; /** * 多列group by 分类 汇总的实现 * @author yannian.mu */ public class MdrillGroupBy { private static Logger LOG = Logger.getLogger(MdrillGroupBy.class); public static Integer MAX_CROSS_ROWS=UniqConfig.defaultCrossMaxLimit(); private SolrIndexSearcher searcher; private SolrQueryRequest req; public int mergercount=0; private RecordCount recordCount ; private SegmentReader reader; private MdrillParseGroupby parse; private ShardGroupByTermNum smallestShardGroup=null; private MdrillParseGroupby.fetchContaioner container=null; public MdrillGroupBy(SolrIndexSearcher _searcher,SegmentReader reader,SolrParams _params,SolrQueryRequest req) { this.reader=reader; this.searcher=_searcher; this.req=req; this.parse=new MdrillParseGroupby(_params); this.recordCount = new RecordCount(); this.recordCount.setFinalResult(false); this.recordCount.setMaxUniqSize(this.parse.maxlimit); } public NamedList get(String[] fields, DocSet baseDocs) throws IOException, ParseException { long t1=System.currentTimeMillis(); this.container=this.parse.createContainer(fields, baseDocs, this.reader, this.searcher, this.req); long t2=System.currentTimeMillis(); QuickHashMap<GroupListCache.GroupList,RefRow> groups=this.makeTopGroups(fields); long t3=System.currentTimeMillis(); this.transGroupValue(groups); long t4=System.currentTimeMillis(); NamedList rtn= this.toNameList(); container.free(groups); long t5=System.currentTimeMillis(); LOG.info("##FacetCross## time taken "+",total:"+(t5-t1)+",init:"+(t2-t1)+",makeGroups:"+(t3-t2)+",transGroupValue:"+(t4-t3)+",groups.size:"+groups.size()); return rtn; } public QuickHashMap<GroupListCache.GroupList,RefRow> makeTopGroups(String[] fields) throws IOException { GroupListCache.GroupList group = GroupListCache.GroupList.INSTANCE(container.groupListCache, container.groupbySize); QuickHashMap<GroupListCache.GroupList,RefRow> groups=new QuickHashMap<GroupListCache.GroupList,RefRow>(this.parse.limit_offset_maxgroups+1); boolean issetDist=this.parse.isMustSetDistResult(); if(container.groupNonEmptySize==0) { group.reset(); RefRow cnt = this.makeOrGetGroup(groups, group); if(container.countOnly()) { cnt.val+=container.baseDocs.size(); }else{ DocIterator iter = container.baseDocs.iterator(); if(container.noDist()){ while (iter.hasNext()) { int doc = iter.nextDoc(); cnt.val++; container.updateStat(cnt, doc); } }else if(container.noStat()){ while (iter.hasNext()) { int doc = iter.nextDoc(); cnt.val++; if(issetDist) { container.updateDist(cnt, doc); } } }else{ while (iter.hasNext()) { int doc = iter.nextDoc(); cnt.val++; container.updateStat(cnt, doc); if(issetDist) { container.updateDist(cnt, doc); } } } } }else{ DocIterator iter = container.baseDocs.iterator(); if (container.countOnly()) { while (iter.hasNext()) { int doc = iter.nextDoc(); if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) { RefRow cnt = this.makeOrGetGroup(groups, group); cnt.val++; this.delayPut(groups, cnt,group); } } } else if (container.noDist()) { while (iter.hasNext()) { int doc = iter.nextDoc(); if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) { RefRow cnt = this.makeOrGetGroup(groups, group); cnt.val++; container.updateStat(cnt, doc); this.delayPut(groups, cnt, group); } } } else if (container.noStat()) { while (iter.hasNext()) { int doc = iter.nextDoc(); if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) { RefRow cnt = this.makeOrGetGroup(groups, group); cnt.val++; if(issetDist) { container.updateDist(cnt, doc); } this.delayPut(groups, cnt, group); } } } else { while (iter.hasNext()) { int doc = iter.nextDoc(); if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) { RefRow cnt = this.makeOrGetGroup(groups, group); cnt.val++; container.updateStat(cnt, doc); if(issetDist) { container.updateDist(cnt, doc); } this.delayPut(groups, cnt, group); } } } } TopMaps(groups); return groups; } private void TopMaps(QuickHashMap<GroupListCache.GroupList,RefRow> groups) { long t1=System.currentTimeMillis(); int groupsize=groups.size(); if(groupsize<=this.parse.limit_offset) { return ; } PriorityQueue<ShardGroupByTermNum> res = new PriorityQueue<ShardGroupByTermNum>(this.parse.limit_offset, Collections.reverseOrder(this.container.cmpTermNum)); LinkedBlockingQueue<GroupListCache.GroupList> toremove=new LinkedBlockingQueue<GroupListCache.GroupList>();; QuickHashMap<GroupListCache.GroupList,RefRow> debug=new QuickHashMap<GroupListCache.GroupList, MdrillUtils.RefRow>(this.parse.limit_offset); for(Entry<GroupListCache.GroupList,RefRow> e:groups.entrySet()) { debug.put(e.getKey(), e.getValue()); ShardGroupByTermNum mrow=new ShardGroupByTermNum(e.getKey(), e.getValue()); if (res.size() < this.parse.limit_offset) { res.add(mrow); } else if (this.container.cmpTermNum.compare(res.peek(), mrow) > 0) { res.add(mrow); ShardGroupByTermNum free=res.poll(); toremove.add(free.key); }else{ toremove.add(mrow.key); } } int cnt1=0; for(GroupListCache.GroupList torm:toremove) { groups.remove(torm); this.container.freeRow(torm); this.container.groupListCache.add(torm); cnt1++; } smallestShardGroup=res.peek(); long t2=System.currentTimeMillis(); LOG.info("TopMaps groups.size="+groupsize+"@"+debug.size() +" to "+groups.size()+"@"+this.parse.limit_offset+",res.size="+res.size()+",remove="+cnt1+",timetaken="+(t2-t1)+",mergercount="+this.mergercount); } private NamedList toNameList() { java.util.ArrayList<GroupbyRow> recommendations = new ArrayList<GroupbyRow>(this.container.res.size()); recommendations.addAll(this.container.res); Collections.sort(recommendations, this.container.cmpString); Integer index = 0; NamedList res = new NamedList(); res.add("count", recordCount.toNamedList()); ConcurrentHashMap<Long,String> cache=null; boolean issetCrc=this.parse.crcOutputSet!=null; MergerGroupByGroupbyRowCompare mergerCmp=null; if(issetCrc) { synchronized (MdrillUtils.CRC_CACHE_SIZE) { cache=MdrillUtils.CRC_CACHE_SIZE.get(this.parse.crcOutputSet); if(cache==null) { cache=new ConcurrentHashMap<Long,String>(); MdrillUtils.CRC_CACHE_SIZE.put(this.parse.crcOutputSet, cache); } } FacetComponent.FieldFacet facet=new FacetComponent.FieldFacet(this.parse.params, "solrCorssFields_s"); mergerCmp=facet.createMergerGroupCmp(); } ArrayList<Object> list=new ArrayList<Object>(); for (GroupbyRow kv : recommendations) { if (index >= this.parse.offset) { if(issetCrc) { kv.ToCrcSet(mergerCmp,cache); } list.add(kv.toNamedList()); } index++; } res.add("list", list); return res; } private void setCrossRow(RefRow ref,String groupname) throws ParseException, IOException { this.recordCount.setCrcRecord(groupname); GroupbyRow row = new GroupbyRow(new ColumnKey(groupname), ref.val); row.setCross(this.parse.crossFs, this.parse.distFS); if(this.parse.hasStat()) { for(int i=0;i<this.parse.crossFs.length;i++) { RefRowStat s=ref.stat[i]; if(s.issetup) { row.addStat(i, 1, s.sum); row.addStat(i, 2, s.max); row.addStat(i, 3, s.min); row.addStat(i, 4, (double)s.cnt); }else{ row.addStat(i, 1, 0d); row.addStat(i, 2, 0d); row.addStat(i, 3, 0d); row.addStat(i, 4, 0d); } } } if(this.parse.hasDist()) { for(int i=0;i<this.parse.distFS.length;i++) { row.setDistinct(i, ref.dist[i]); } } QueuePutUtils.put2Queue(row, this.container.res, this.parse.limit_offset, this.container.cmpString); } public static interface Iprecontains{ public boolean contains(GroupListCache.GroupList g); } public static class PreContains implements Iprecontains{ HashSet<GroupListCache.GroupList> preSet; public PreContains(HashSet<GroupListCache.GroupList> preSet) { this.preSet = preSet; } public boolean contains(GroupListCache.GroupList g) { return this.preSet.contains(g); } } public static class EmptyPrecontains implements Iprecontains{ public boolean contains(GroupListCache.GroupList g) { return true; } } public void transGroupValue(QuickHashMap<GroupListCache.GroupList,RefRow> groups) throws ParseException, IOException { TermNumToString[] tm= this.container.prefetch(groups); for(Entry<GroupListCache.GroupList,RefRow> e:groups.entrySet()) { int[] group=e.getKey().list; StringBuffer buff=new StringBuffer(); String j=""; for(int i=0;i<container.ufs.length;i++) { Integer termNum=group[i]; buff.append(j); if(container.ufs.cols[i]!=null) { buff.append(EncodeUtils.encode(tm[i].getTermValue(termNum))); }else{ buff.append("-"); } j=UniqConfig.GroupJoinString(); } int joinoffset=container.ufs.length; for(HigoJoinInvert inv: this.container.joinInvert) { int fc=inv.fieldCount(); for(int i=0;i<fc;i++) { buff.append(j); buff.append(EncodeUtils.encode(inv.getTermNumValue(group[joinoffset+i], i))); j=UniqConfig.GroupJoinString(); } joinoffset+=inv.fieldCount(); } String groupname=buff.toString(); this.setCrossRow(e.getValue(), groupname); } } private void delayPut(QuickHashMap<GroupListCache.GroupList,RefRow> groups,RefRow cnt,GroupListCache.GroupList group) { if(cnt.delayPut) { if( this.container.cmpTermNum.compare(smallestShardGroup,new ShardGroupByTermNum(group, cnt))>0){ cnt.delayPut=false; groups.put(group.copy( this.container.groupListCache), cnt); }else{ this.container.freeRow(group); } } } private RefRow makeOrGetGroup(QuickHashMap<GroupListCache.GroupList, RefRow> groups,GroupListCache.GroupList group) { RefRow cnt = groups.get(group); if (cnt == null) { if (groups.size() >= this.parse.limit_offset_maxgroups) { mergercount++; if (mergercount >= this.parse.limit_offset_maxgroups_merger) { return this.container.getEmptyRow(); } this.recordCount.setCrcRecord("-"); this.recordCount.setIsoversize(true); TopMaps(groups); } cnt = this.container.createRow(group); if (smallestShardGroup == null) { groups.put(group.copy(this.container.groupListCache), cnt); } else { cnt.delayPut = true; } } return cnt; } }