package org.openstack.atlas.scheduler.execution; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.openstack.atlas.exception.ExecutionException; import org.openstack.atlas.exception.SchedulingException; import org.openstack.atlas.scheduler.JobScheduler; import org.openstack.atlas.scheduler.MapReduceAggregateLogsJob; import org.openstack.atlas.service.domain.entities.JobName; import org.openstack.atlas.service.domain.entities.JobState; import org.openstack.atlas.service.domain.entities.JobStateVal; import org.openstack.atlas.tools.QuartzSchedulerConfigs; import org.springframework.beans.factory.annotation.Required; import java.io.File; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.fs.FSDataOutputStream; import org.openstack.atlas.config.HadoopLogsConfigs; import org.openstack.atlas.util.staticutils.StaticFileUtils; import org.openstack.atlas.util.staticutils.StaticStringUtils; import org.openstack.atlas.util.common.VerboseLogger; public class FileMoveJobExecution extends LoggableJobExecution implements QuartzExecutable { private static final Log LOG = LogFactory.getLog(FileMoveJobExecution.class); private static final VerboseLogger vlog = new VerboseLogger(FileMoveJobExecution.class); protected String fileHour; protected JobScheduler jobScheduler; @Override public void execute(JobScheduler scheduler, QuartzSchedulerConfigs schedulerConfigs) throws ExecutionException { // stupid manual set, this has to be done. a circular dep because of how // quartz must init its scheduler factory crap. currently u cannot have // a bean that has a dependency on a bean that is in the // schedulerFactoryBean#schedulerContextAsMap jobScheduler = scheduler; fileHour = schedulerConfigs.getInputString(); //hadoopTool.setupHadoopRun(runTime); //vlog.log(String.format("hadoopTool = %s", hadoopTool.toString())); try { List<String> localInputFiles = getLocalInputFiles(schedulerConfigs); vlog.log(String.format("calling createStateForMovingFiles(%s,%s)", fileHour, StaticStringUtils.collectionToString(localInputFiles, ","))); Map<String, JobState> fileNameStateMap = createStateForMovingFiles(fileHour, localInputFiles); for (String filename : localInputFiles) { if (filename.endsWith(".lzo")) { schedulerConfigs.setLzoInput(true); } } vlog.log(String.format("about to move files onto DFS: schedulerConfis = %s fastValues= %s", schedulerConfigs.toString(), StaticStringUtils.mapToString(fileNameStateMap))); moveFilesOntoDFS(fileNameStateMap); deleteIfFinished(fileNameStateMap); scheduleMapReduceAggregateLogsJob(schedulerConfigs); } catch (Exception e) { LOG.error(e); throw new ExecutionException(e); } } @Required private void scheduleMapReduceAggregateLogsJob(QuartzSchedulerConfigs schedulerConfigs) throws SchedulingException { jobScheduler.scheduleJob(MapReduceAggregateLogsJob.class, schedulerConfigs); } private Map<String, JobState> createStateForMovingFiles(String inputString, List<String> localInputFiles) { Map<String, JobState> fileNameStateMap = new HashMap<String, JobState>(); for (String inputFile : localInputFiles) { String jobInput = inputString + ":" + inputFile; vlog.log(String.format("calling createJob(FILECOPY,%s);", jobInput)); JobState state = createJob(JobName.FILECOPY, jobInput); fileNameStateMap.put(inputFile, state); vlog.log(String.format("calling fastValues.put(%s,%s)", inputFile, state.toString())); } return fileNameStateMap; } private List<String> getLocalInputFiles(QuartzSchedulerConfigs schedulerConfigs) throws Exception { List<String> localInputFiles = new ArrayList<String>(); if (schedulerConfigs.getFileMoveInput() != null) { localInputFiles.add(schedulerConfigs.getFileMoveInput()); } else if (schedulerConfigs.getInputForMultiPathJobs() != null) { localInputFiles = schedulerConfigs.getInputForMultiPathJobs(); } else { throw new Exception("Could not find any files for the copy. This job was fired without a indicator as to what files to run."); } return localInputFiles; } private void deleteIfFinished(Map<String, JobState> fastValues) throws ExecutionException { for (Entry<String, JobState> inputEntry : fastValues.entrySet()) { if (inputEntry.getValue().getState() == JobStateVal.FINISHED) { new File(inputEntry.getKey()).delete(); try { String filename = inputEntry.getKey().substring(inputEntry.getKey().lastIndexOf("/") + 1); // remove the seconds cuz it takes a few to write the logs sometimes // only delete the files from the backup dir IFF they are named the same (sans the seconds) String smallerFileName = filename.substring(0, filename.length() - 2); File backupDir = new File(HadoopLogsConfigs.getBackupDir()); if (backupDir.exists()) { String[] files = backupDir.list(); for (String file : files) { if (file.contains(smallerFileName)) { // this is a backup file that needs to be deleted, // its from the same hour as the regular file LOG.info("deleting file " + HadoopLogsConfigs.getBackupDir() + file); new File(HadoopLogsConfigs.getBackupDir() + file).delete(); } } } } catch (Exception e) { LOG.error("could not delete file from backup", e); } } } } private void moveFilesOntoDFS(Map<String, JobState> fileNameStateMap) throws ExecutionException { //HadoopConfiguration conf = hadoopTool.getConfiguration(); //String inputDir = hadoopTool.getInputDirectory(); List<String> inputDirList = new ArrayList<String>(); inputDirList.add(HadoopLogsConfigs.getMapreduceInputPrefix()); inputDirList.add(fileHour); String inputDir = StaticFileUtils.splitPathToString(inputDirList); int offset = 0; for (Entry<String, JobState> inputEntry : fileNameStateMap.entrySet()) { String inputFile = inputEntry.getKey(); JobState state = inputEntry.getValue(); try { LOG.info("putting file on the DFS at " + inputDir); hdfsUtils.mkDirs(inputDir, false); // The files will be the same, so we have to place it as the // named file, so we need a n String placedFile = inputDir + "/" + offset + "-" + StaticFileUtils.stripDirectoryFromFileName(inputFile); vlog.log(String.format("copying file %s -> to Hdfs %s", inputFile, placedFile)); //utils.placeFileOnDFS(inputFile, placedFile); //if its a LZO file, index it if (placedFile.endsWith(".lzo")) { vlog.log(String.format("file %s is an LZO recompressing and indexing", inputFile)); FSDataInputStream lzoIS = hdfsUtils.openHdfsInputFile(inputFile, true); FSDataOutputStream lzoOS = hdfsUtils.openHdfsOutputFile(placedFile, false, true); FSDataOutputStream idxOS = hdfsUtils.openHdfsOutputFile(placedFile + ".index", false, true); hdfsUtils.recompressAndIndexLzoStream(lzoIS, lzoOS, idxOS, null); idxOS.close(); lzoOS.close(); lzoIS.close(); } else { vlog.log(String.format("file %s is not compressed: Calling compression and indexer functions", inputFile)); FSDataInputStream uncompressedIS = hdfsUtils.openHdfsInputFile(inputFile, true); FSDataOutputStream lzoOS = hdfsUtils.openHdfsOutputFile(placedFile + ".lzo", false, true); FSDataOutputStream idxOS = hdfsUtils.openHdfsOutputFile(placedFile + ".lzo.index", false, true); hdfsUtils.compressAndIndexStreamToLzo(uncompressedIS, lzoOS, lzoOS, hdfsUtils.getBufferSize(), null); idxOS.close(); lzoOS.close(); uncompressedIS.close(); } offset++; finishJob(state); } catch (Exception e) { LOG.error(e); failJob(state); throw new ExecutionException(e); } } } }