package com.taobao.zeus.jobs.sub.tool; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.springframework.context.ApplicationContext; import com.taobao.zeus.jobs.AbstractJob; import com.taobao.zeus.jobs.JobContext; import com.taobao.zeus.jobs.sub.conf.ConfUtil; import com.taobao.zeus.model.processer.OutputCheckProcesser; public class OutputCheckJob extends AbstractJob { private OutputCheckProcesser ocp; private String path; public OutputCheckJob(JobContext jobContext, OutputCheckProcesser p, ApplicationContext applicationContext) { super(jobContext); this.ocp = p; path = ocp.getPath(); } @Override public Integer run() throws Exception { if (jobContext.getCoreExitCode() != 0) { jobContext.getJobHistory().getLog() .appendZeus("Job 运行失败,不进行产出数据大小检测"); return 0; } jobContext.getJobHistory().getLog() .appendZeus("OutputCheck 开始进行产出数据大小检测"); String upperPath = path; if (upperPath.endsWith("/")) { upperPath = upperPath.substring(0, path.length() - 1); } upperPath = upperPath.substring(0, upperPath.lastIndexOf("/")); Path hdfsPath = new Path(upperPath); FileSystem fs = FileSystem.get(ConfUtil.getDefaultCoreSite()); FileStatus[] files = fs.listStatus(hdfsPath); double total = 0; List<ContentSummary> dirFiles = new ArrayList<ContentSummary>(); for (FileStatus f : files) { if (f.isDir()) { ContentSummary cs = fs.getContentSummary(f.getPath()); if (cs.getLength() > 0) { dirFiles.add(cs); total += cs.getLength(); } } } double ava = total / dirFiles.size(); double upper = ava * 1.5; double lower = ava * 0.5; List<ContentSummary> valid = new ArrayList<ContentSummary>(); for (ContentSummary cs : dirFiles) { if (cs.getLength() < upper && cs.getLength() > lower) { valid.add(cs); } } total = 0d; for (ContentSummary cs : valid) { total += cs.getLength(); } ava = total / valid.size(); jobContext.getJobHistory().getLog().appendZeus("产出数据上层路径:" + upperPath); jobContext.getJobHistory().getLog() .appendZeus("有效的参考文件夹个数:" + valid.size()); jobContext.getJobHistory().getLog().appendZeus("平均产出数据大小:" + ava); jobContext.getJobHistory().getLog() .appendZeus("设定数据大小浮动百分比:" + ocp.getPercent() + "%"); jobContext.getJobHistory().getLog().appendZeus("当前任务产出数据路径:" + path); ContentSummary current = null; try { current = fs.getContentSummary(new Path(path)); } catch (Exception e) { log("本次job产出数据的文件夹有问题"); log(e); } if (current != null) { jobContext.getJobHistory().getLog() .appendZeus("本次job产出数据大小:" + current.getLength()); } else { return -1; } if ((Math.abs(current.getLength() - ava) / ava) > (ocp.getPercent() / 100.0)) { double rate = Math.abs(current.getLength() - ava) / ava; if (rate > (ocp.getPercent() / 100.0)) { // 超出浮动范围 jobContext.getJobHistory().getLog().appendZeus("超出设定浮动比例,发出报警"); String jobId = jobContext.getJobHistory().getJobId(); StringBuffer sb = new StringBuffer("jobid=" + jobId + " 产出数据大小超出浮动比例 " + ocp.getPercent() + "%"); sb.append("\n平均产出数据大小为:" + ava); sb.append("\n本次产出数据大小为:" + current.getLength()); } } else { jobContext.getJobHistory().getLog().appendZeus("产出数据检测OK"); } return 0; } @Override public void cancel() { canceled = true; } }