/** * */ package com.taobao.top.analysis.statistics; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Calendar; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.taobao.top.analysis.config.SlaveConfig; import com.taobao.top.analysis.exception.AnalysisException; import com.taobao.top.analysis.node.io.IInputAdaptor; import com.taobao.top.analysis.node.io.IOutputAdaptor; import com.taobao.top.analysis.node.job.JobTask; import com.taobao.top.analysis.node.job.JobTaskExecuteInfo; import com.taobao.top.analysis.node.job.JobTaskResult; import com.taobao.top.analysis.statistics.data.ReportEntry; import com.taobao.top.analysis.util.AnalysisConstants; import com.taobao.top.analysis.util.AnalyzerUtil; import com.taobao.top.analysis.statistics.reduce.IReducer.ReduceType; import com.taobao.top.analysis.util.Threshold; /** * 默认计算引擎实现,用于分析任务 * * @author fangweng * @Email fangweng@taobao.com * 2011-11-24 * */ public class StatisticsEngine implements IStatisticsEngine{ private static final Log logger = LogFactory.getLog(StatisticsEngine.class); private Threshold threshold; SlaveConfig config; /** * 输入的适配器,用于支持任务执行时数据来源的扩展 */ List<IInputAdaptor> inputAdaptors; /** * 输出的适配器,用于支持任务执行完毕以后数据输出的扩展 */ List<IOutputAdaptor> outputAdaptors; public StatisticsEngine() { inputAdaptors = new ArrayList<IInputAdaptor>(); outputAdaptors = new ArrayList<IOutputAdaptor>(); threshold = new Threshold(1000); } @Override public void init() throws AnalysisException { } @Override public void releaseResource() { } @Override public SlaveConfig getConfig() { return config; } @Override public void setConfig(SlaveConfig config) { this.config = config; } @Override public void addInputAdaptor(IInputAdaptor inputAdaptor) { inputAdaptors.add(inputAdaptor); } @Override public void removeInputAdaptor(IInputAdaptor inputAdaptor) { inputAdaptors.remove(inputAdaptor); } @Override public void addOutputAdaptor(IOutputAdaptor outputAdaptor) { outputAdaptors.add(outputAdaptor); } @Override public void removeOutputAdaptor(IOutputAdaptor outputAdaptor) { outputAdaptors.remove(outputAdaptor); } @Override public void doExport(JobTask jobTask,JobTaskResult jobTaskResult) { for(IOutputAdaptor outputAdaptor : outputAdaptors) { if (outputAdaptor.ignore(jobTask.getOutput())) continue; outputAdaptor.sendResultToOutput(jobTask,jobTaskResult); } } @Override public JobTaskResult doAnalysis(JobTask jobTask) throws UnsupportedEncodingException, IOException { InputStream in = null; JobTaskExecuteInfo taskExecuteInfo = new JobTaskExecuteInfo(); try { // 寻找输入适配器 for(IInputAdaptor inputAdaptor : inputAdaptors) { if (inputAdaptor.ignore(jobTask.getInput())) continue; in = inputAdaptor.getInputFormJob(jobTask, taskExecuteInfo); if (in != null) break; } if (in == null) { if(config.isEnableAlert()) { AnalyzerUtil.sendOutAlert(Calendar.getInstance(), config.getAlertUrl(), config.getAlertFrom(), config.getAlertModel(), config.getAlertWangWang(), "Can't connect resource:" + jobTask.getInput()); } JobTaskResult jobTaskResult = new JobTaskResult(); jobTaskResult.setJobName(jobTask.getJobName()); jobTaskResult.addTaskId(jobTask.getTaskId()); jobTaskResult.setJobEpoch(jobTask.getJobEpoch()); taskExecuteInfo.setAnalysisConsume(0); taskExecuteInfo.setEmptyLine(0); taskExecuteInfo.setErrorLine(0); taskExecuteInfo.setJobDataSize(0); taskExecuteInfo.setTotalLine(0); taskExecuteInfo.setTaskId(jobTask.getTaskId()); taskExecuteInfo.setSuccess(false); jobTaskResult.addTaskExecuteInfo(taskExecuteInfo); logger.error("Input not found! input : " + jobTask.getInput()); return jobTaskResult; } return analysis(in,jobTask, taskExecuteInfo); } finally { if (in != null) try { in.close(); } catch (IOException e) { logger.error(e,e); } } } // 分析数据 JobTaskResult analysis(InputStream in,JobTask jobtask, JobTaskExecuteInfo taskExecuteInfo) throws UnsupportedEncodingException { String encoding = jobtask.getInputEncoding(); String splitRegex = jobtask.getSplitRegex(); JobTaskResult jobTaskResult = new JobTaskResult(); jobTaskResult.setJobName(jobtask.getJobName()); jobTaskResult.addTaskId(jobtask.getTaskId()); jobTaskResult.setJobEpoch(jobtask.getJobEpoch()); Map<String, ReportEntry> entryPool = jobtask.getStatisticsRule().getEntryPool(); BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding)); int normalLine = 0;//正常的行数 int emptyLine=0;//拉取空行的次数 int exceptionLine=0;//一行中,只要entry有异常,该行就是存在异常的行。 int size = 0; String record; ReportEntry entry = null; long beg = System.currentTimeMillis(); try { //逐行处理 while ((record = reader.readLine()) != null) { boolean failure=false; try { if (record == null || "".equals(record)) { emptyLine++; continue; } size += record.getBytes().length; String[] contents = StringUtils.splitByWholeSeparatorPreserveAllTokens(record, splitRegex); Iterator<String> keys = entryPool.keySet().iterator(); while (keys.hasNext()) { try { String key = keys.next(); entry = entryPool.get(key); if(!entry.isLazy()){ processSingleLine(entry, contents,jobtask,jobTaskResult, taskExecuteInfo); // if(!processSingleLine(entry, contents,jobtask,jobTaskResult, taskExecuteInfo)) { // if(entry.getReports().contains("appAuthReport")) // logger.error("key null, record:" + record); // } } } catch (Throwable e) { if (!failure) exceptionLine++; failure = true; if (!threshold.sholdBlock()) logger.error( new StringBuilder().append("Entry :").append(entry.getId()).append(", job : ") .append(jobtask.getJobName()).append(", entry:").append(entry.getName()) .append("\r\n record: ").append(record).toString(), e); } } if(!failure) normalLine++; } catch(Throwable t) { if(!failure) exceptionLine++; if (!threshold.sholdBlock()) logger.error( new StringBuilder() .append("\r\n record: ").append(record) .toString(), t); } } } catch (Throwable ex) { taskExecuteInfo.setSuccess(false); logger.error(ex,ex); } finally { if (reader != null) { try { reader.close(); reader = null; } catch (Throwable ex) { logger.error(ex,ex); } } taskExecuteInfo.setAnalysisConsume(System.currentTimeMillis() - beg); taskExecuteInfo.setEmptyLine(emptyLine); taskExecuteInfo.setErrorLine(exceptionLine); taskExecuteInfo.setJobDataSize(size); taskExecuteInfo.setTotalLine(normalLine+exceptionLine+emptyLine); taskExecuteInfo.setTaskId(jobtask.getTaskId()); taskExecuteInfo.setSuccess(true); jobTaskResult.addTaskExecuteInfo(taskExecuteInfo); if (logger.isWarnEnabled()) logger.warn(new StringBuilder("jobtask ").append(jobtask.getTaskId()) .append(",analysis consume time: ").append(taskExecuteInfo.getAnalysisConsume()) .append(",normal line count: ").append(normalLine) .append(",exception line count:").append(exceptionLine) .append(",empty line:").append(emptyLine).toString()); } return jobTaskResult; } //处理单行数据 public void processSingleLine(ReportEntry entry,String[] contents,JobTask jobtask,JobTaskResult jobTaskResult, JobTaskExecuteInfo taskExecuteInfo){ Map<String, Map<String, Object>> entryResult = jobTaskResult.getResults(); String key = entry.getMapClass().mapperKey(entry,contents, jobtask); // if(key == null) // return false; if(key != null && !AnalysisConstants.IGNORE_PROCESS.equals(key)){ // taskExecuteInfo.incKeyCount(1); Object value = entry.getMapClass().mapperValue(entry, contents, jobtask); // taskExecuteInfo.incValueCount(1); Map<String,Object> result = entryResult.get(entry.getId()); if(result == null){ result = new HashMap<String, Object>(); jobTaskResult.getResults().put(entry.getId(), result); } entry.getReduceClass().reducer(entry,key,value,result,ReduceType.SHALLOW_MERGE); // return true; } // return true; } }