package org.openstack.atlas.scheduler.execution; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.openstack.atlas.exception.ExecutionException; import org.openstack.atlas.exception.SchedulingException; import org.openstack.atlas.logs.hadoop.jobs.HadoopJob; import org.openstack.atlas.logs.hadoop.jobs.HadoopLogSplitterJob; import org.openstack.atlas.scheduler.JobScheduler; import org.openstack.atlas.scheduler.SplitLoadBalancerLogsJob; import org.openstack.atlas.service.domain.entities.JobName; import org.openstack.atlas.service.domain.entities.JobState; import org.openstack.atlas.tools.QuartzSchedulerConfigs; import org.openstack.atlas.config.HadoopLogsConfigs; import org.openstack.atlas.service.domain.entities.JobStateVal; import org.openstack.atlas.logs.hadoop.util.HdfsUtils; import org.openstack.atlas.util.staticutils.StaticFileUtils; import org.openstack.atlas.util.common.VerboseLogger; import org.springframework.beans.factory.annotation.Required; public class MapReduceAggregateLogsJobExecution extends LoggableJobExecution implements QuartzExecutable { private static final Log LOG = LogFactory.getLog(MapReduceAggregateLogsJobExecution.class); private static final VerboseLogger vlog = new VerboseLogger(MapReduceAggregateLogsJobExecution.class); @Override public void execute(JobScheduler scheduler, QuartzSchedulerConfigs schedulerConfigs) throws ExecutionException { JobState state = createJob(JobName.MAPREDUCE, schedulerConfigs.getInputString()); int hadoopErrorCode = -1; //tool.setupHadoopRun(schedulerConfigs); try { String dstJarPath = HadoopLogsConfigs.getHdfsJobsJarPath(); // fileHour=2013021517 String fileHour = schedulerConfigs.getInputString(); // inDir = /user/lbass_prod/input/logs/2013021517 List<String> inDirComps = new ArrayList<String>(); inDirComps.add(HadoopLogsConfigs.getMapreduceInputPrefix()); inDirComps.add(fileHour); String inDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(inDirComps)); // outDir = /user/lbass_prod/output/logs/lb_logs_split/2013021517 List<String> outDirComps = new ArrayList<String>(); outDirComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); outDirComps.add("lb_logs_split"); outDirComps.add(fileHour); String outDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(outDirComps)); // histDir = /user/lbaas_prod/output/logs/lb_logs_split/_logs/2013021517 List<String> histDirComps = new ArrayList<String>(); histDirComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); histDirComps.add("lb_logs_split"); histDirComps.add("_logs"); histDirComps.add(fileHour); String histDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(histDirComps)); String numReducers = HadoopLogsConfigs.getNumReducers(); String userName = HadoopLogsConfigs.getHdfsUserName(); List<String> lzoFiles = new ArrayList<String>(); for (Path filePath : hdfsUtils.listPaths(inDir, false)) { String fileName = HdfsUtils.pathUriString(filePath); if (!fileName.endsWith(".lzo")) { continue; } lzoFiles.add(fileName); } if (lzoFiles.isEmpty()) { throw new Exception("Can not start hadoop job as there are no input files"); } List<String> argsList = new ArrayList<String>(); argsList.add(dstJarPath); argsList.add(outDir); argsList.add(histDir); argsList.add(fileHour); argsList.add(numReducers); argsList.add(userName); for (String lzoFileName : lzoFiles) { argsList.add(lzoFileName); } HadoopJob hadoopClient = new HadoopLogSplitterJob(); hadoopClient.setConfiguration(HadoopLogsConfigs.getHadoopConfiguration()); hadoopErrorCode = hadoopClient.run(argsList); // Actually runs the Hadoop Job if (hadoopErrorCode < 0) { LOG.error(String.format("Hadoop run FAILED with error code %d", hadoopErrorCode)); } else { vlog.log(String.format("Hadoop run SUCCEEDED with code %d", hadoopErrorCode)); } // Note that the SplitLoadBalancerLogsJob being called below is the quartz job that reads the zip files from HDFS. // This does not call hadoop job. The hadoop job was actually ran above via the hadoopClient.run(argsList) scheduler.scheduleJob(SplitLoadBalancerLogsJob.class, schedulerConfigs); } catch (Exception e) { LOG.error(e); failJob(state); throw new ExecutionException(e); } if (hadoopErrorCode < 0) { failJob(state); } else { finishJob(state); } } }