/** * */ package com.taobao.top.analysis.statistics.reduce.group; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.taobao.top.analysis.statistics.data.DistinctCountEntryValue; import com.taobao.top.analysis.statistics.data.ReportEntry; import com.taobao.top.analysis.statistics.reduce.IReducer.ReduceType; import com.taobao.top.analysis.util.AnalysisConstants; import com.taobao.top.analysis.util.bloom.ByteBloomFilter; /** * @author fangweng * email: fangweng@taobao.com * 下午4:51:14 * */ public class DistinctCountFunction implements GroupFunction{ /** * */ private static final long serialVersionUID = -7255142962029484084L; private static final Log logger = LogFactory.getLog(DistinctCountFunction.class); @Override public void group(ReportEntry entry,String key, Object value, Map<String, Object> result,ReduceType rs) { if (value == null) return; //浅层合并的时候,不做bloom过滤器 if (rs == ReduceType.SHALLOW_MERGE) { String nkey; if (key.startsWith(AnalysisConstants.MAGIC_NUM)) nkey = key; else nkey = new StringBuilder(AnalysisConstants.MAGIC_NUM).append(key).append(value).toString(); if (!result.containsKey(nkey)) result.put(nkey, value); return; } else { //一种情况是result里面还是原生态的数据,不是bloom过滤器,则需要构建bloom过滤器 String nkey = key.substring(AnalysisConstants.MAGIC_NUM.length(), key.length() - value.toString().length()); DistinctCountEntryValue distinctEntry = (DistinctCountEntryValue)result.get(nkey); if (distinctEntry == null) { distinctEntry = createDCEntryValue(entry); result.put(nkey, distinctEntry); } try { //存在一定危险性,key与nkey冲突 if (result.get(key) != null && !(result.get(key) instanceof DistinctCountEntryValue)) { distinctEntry.add(result.get(key).toString()); result.remove(key); } distinctEntry.add(value.toString()); } catch(Exception ex) { logger.error(ex); } } } DistinctCountEntryValue createDCEntryValue(ReportEntry entry) { DistinctCountEntryValue distinctEntryValue = new DistinctCountEntryValue(); ByteBloomFilter bloomFilter; //240k int maxKeys = 100000; float errorRate = 0.0001F; if (entry.getAdditions().get(AnalysisConstants.ANALYSIS_BLOOM_MAXKEYS) != null) maxKeys = (Integer)entry.getAdditions().get(AnalysisConstants.ANALYSIS_BLOOM_MAXKEYS); if (entry.getAdditions().get(AnalysisConstants.ANALYSIS_BLOOM_ERRORRATE) != null) errorRate = (Float)entry.getAdditions().get(AnalysisConstants.ANALYSIS_BLOOM_ERRORRATE); bloomFilter = new ByteBloomFilter(maxKeys,errorRate,1); distinctEntryValue.setBloomFilter(bloomFilter); return distinctEntryValue; } }