/** * */ package com.taobao.top.analysis.node.component; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.taobao.top.analysis.config.MasterConfig; import com.taobao.top.analysis.exception.AnalysisException; import com.taobao.top.analysis.node.IJobResultMerger; import com.taobao.top.analysis.node.job.Job; import com.taobao.top.analysis.node.job.JobMergedResult; import com.taobao.top.analysis.node.job.JobTask; import com.taobao.top.analysis.node.job.JobTaskResult; import com.taobao.top.analysis.node.operation.JobDataOperation; import com.taobao.top.analysis.node.operation.MergeJobOperation; import com.taobao.top.analysis.statistics.reduce.IReducer.ReduceType; import com.taobao.top.analysis.util.AnalysisConstants; import com.taobao.top.analysis.util.NamedThreadFactory; import com.taobao.top.analysis.util.ReportUtil; /** * 任务合并接口的实现 * @author fangweng * @Email fangweng@taobao.com * 2011-11-30 * */ public class JobResultMerger implements IJobResultMerger { private static final Log logger = LogFactory.getLog(JobResultMerger.class); MasterConfig config; /** * 用于合并结果集的线程池 */ private ThreadPoolExecutor mergeJobResultThreadPool; int maxMergeJobWorker = 2; public int getMaxMergeJobWorker() { return maxMergeJobWorker; } public void setMaxMergeJobWorker(int maxMergeJobWorker) { this.maxMergeJobWorker = maxMergeJobWorker; } @Override public void init() throws AnalysisException { if (config != null) maxMergeJobWorker = config.getMaxMergeJobWorker(); mergeJobResultThreadPool = new ThreadPoolExecutor( maxMergeJobWorker, maxMergeJobWorker, 0, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("mergeJobResult_worker")); if (logger.isInfoEnabled()) logger.info("JobResultMerger init end. maxMergeJobWorker size : " + maxMergeJobWorker); } @Override public void releaseResource() { if (mergeJobResultThreadPool != null) mergeJobResultThreadPool.shutdown(); } @Override public MasterConfig getConfig() { return config; } @Override public void setConfig(MasterConfig config) { this.config = config; } @Override public void merge(Job job,BlockingQueue<JobMergedResult> branchResultQueue ,BlockingQueue<JobTaskResult> jobTaskResultsQueue,boolean needMergeLazy) { if (logger.isInfoEnabled()) logger.info("start merge check jobName : " + job.getJobName()); // 检查job列表 List<Map<String, Map<String, Object>>> mergeResults = new ArrayList<Map<String, Map<String, Object>>>(); int mergeResultCount = 0; long collectJobTime = System.currentTimeMillis(); // 小于批量操作的数目,实际数目 while (mergeResults.size() < config .getMinMergeJobCount()) { JobTaskResult jt = jobTaskResultsQueue.poll(); while (jt != null) { mergeResults.add(jt.getResults()); mergeResultCount += jt.getTaskIds().size(); jt = jobTaskResultsQueue.poll(); } JobMergedResult jr = branchResultQueue.poll(); // 将未何并到主干的结果也继续交给线程去做合并 while (jr != null) { mergeResults.add(jr.getMergedResult()); mergeResultCount += jr.getMergeCount(); jr = branchResultQueue.poll(); } // 最后一拨需要合并的数据,不需要再等待批量去做 if (job.getMergedTaskCount().get() + mergeResultCount >= job.getTaskCount()) break; if (System.currentTimeMillis() - collectJobTime > config.getMaxJobResultBundleWaitTime()) break; // 放缓一些节奏 if (mergeResultCount == 0 && (this.config.getDispatchMaster() || mergeResults.size() == 0)) { try { Thread.sleep(50); } catch (InterruptedException e) {} } } if (logger.isInfoEnabled()) logger.info("jobName : " + job.getJobName() + ", got " + mergeResultCount + " need to merge"); //判断是否可以开始载入外部磁盘换存储的文件,大于AsynLoadDiskFilePrecent的时候开始载入数据等待分析 if (config.getSaveTmpResultToFile()) if (job.getMergedTaskCount().get() * 100 / job.getTaskCount() >= job.getJobConfig().getAsynLoadDiskFilePrecent()) { if (logger.isInfoEnabled()) logger.info("start asyn load " + job.getJobName() + " trunkData from disk"); if (job.getNeedLoadResultFile().compareAndSet(true, false)) { new Thread(new JobDataOperation(job,AnalysisConstants.JOBMANAGER_EVENT_LOADDATA_TO_TMP,this.config)).start(); } } if (mergeResultCount > 0 || (!this.config.getDispatchMaster() && mergeResults.size() > 0)) { mergeJobResultThreadPool .execute(new MergeJobOperation(job, mergeResultCount, mergeResults,config,branchResultQueue)); } else { // 放缓一点节奏 try { Thread.sleep(1000); } catch (InterruptedException e) { } } } @Override public JobTaskResult merge(JobTask jobTask, List<JobTaskResult> jobTaskResults,boolean needMergeLazy,boolean needDeepMerge) { if (jobTaskResults == null || (jobTaskResults != null && jobTaskResults.size() == 0)) return null; if (jobTaskResults.size() == 1) return jobTaskResults.get(0); if (logger.isInfoEnabled()) { StringBuilder info = new StringBuilder("start merge check jobTask : "); for(JobTaskResult taskResult : jobTaskResults) for(String id : taskResult.getTaskIds()) info.append(id).append(" , "); logger.info(info.toString()); } JobTaskResult base = jobTaskResults.get(0); @SuppressWarnings("unchecked") Map<String, Map<String, Object>>[] taskResultContents = new Map[jobTaskResults.size()]; taskResultContents[0] = base.getResults(); for(int i = 1 ; i < jobTaskResults.size(); i++) { JobTaskResult mergeResult = jobTaskResults.get(i); taskResultContents[i] = mergeResult.getResults(); base.addTaskIds(mergeResult.getTaskIds()); base.addTaskExecuteInfos(mergeResult.getTaskExecuteInfos()); } if (needDeepMerge) base.setResults(ReportUtil.mergeEntryResult(taskResultContents, jobTask.getStatisticsRule().getEntryPool(), needMergeLazy,ReduceType.DEEP_MERGE)); else base.setResults(ReportUtil.mergeEntryResult(taskResultContents, jobTask.getStatisticsRule().getEntryPool(), needMergeLazy,ReduceType.SHALLOW_MERGE)); return base; } }