FileMoveJobExecution.java example

Explorer
atlas-lb-master
package org.openstack.atlas.scheduler.execution;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.openstack.atlas.exception.ExecutionException;
import org.openstack.atlas.exception.SchedulingException;
import org.openstack.atlas.scheduler.JobScheduler;
import org.openstack.atlas.scheduler.MapReduceAggregateLogsJob;
import org.openstack.atlas.service.domain.entities.JobName;
import org.openstack.atlas.service.domain.entities.JobState;
import org.openstack.atlas.service.domain.entities.JobStateVal;
import org.openstack.atlas.tools.QuartzSchedulerConfigs;
import org.springframework.beans.factory.annotation.Required;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.openstack.atlas.config.HadoopLogsConfigs;
import org.openstack.atlas.util.staticutils.StaticFileUtils;
import org.openstack.atlas.util.staticutils.StaticStringUtils;
import org.openstack.atlas.util.common.VerboseLogger;

public class FileMoveJobExecution extends LoggableJobExecution implements QuartzExecutable {

    private static final Log LOG = LogFactory.getLog(FileMoveJobExecution.class);
    private static final VerboseLogger vlog = new VerboseLogger(FileMoveJobExecution.class);
    protected String fileHour;
    protected JobScheduler jobScheduler;

    @Override
    public void execute(JobScheduler scheduler, QuartzSchedulerConfigs schedulerConfigs) throws ExecutionException {

        // stupid manual set, this has to be done. a circular dep because of how
        // quartz must init its scheduler factory crap. currently u cannot have
        // a bean that has a dependency on a bean that is in the
        // schedulerFactoryBean#schedulerContextAsMap
        jobScheduler = scheduler;
        fileHour = schedulerConfigs.getInputString();
        //hadoopTool.setupHadoopRun(runTime);
        //vlog.log(String.format("hadoopTool = %s", hadoopTool.toString()));

        try {
            List<String> localInputFiles = getLocalInputFiles(schedulerConfigs);
            vlog.log(String.format("calling createStateForMovingFiles(%s,%s)", fileHour, StaticStringUtils.collectionToString(localInputFiles, ",")));
            Map<String, JobState> fileNameStateMap = createStateForMovingFiles(fileHour, localInputFiles);
            for (String filename : localInputFiles) {
                if (filename.endsWith(".lzo")) {
                    schedulerConfigs.setLzoInput(true);
                }
            }
            vlog.log(String.format("about to move files onto DFS: schedulerConfis = %s fastValues= %s", schedulerConfigs.toString(), StaticStringUtils.mapToString(fileNameStateMap)));
            moveFilesOntoDFS(fileNameStateMap);
            deleteIfFinished(fileNameStateMap);
            scheduleMapReduceAggregateLogsJob(schedulerConfigs);

        } catch (Exception e) {
            LOG.error(e);
            throw new ExecutionException(e);
        }
    }

    @Required
    private void scheduleMapReduceAggregateLogsJob(QuartzSchedulerConfigs schedulerConfigs) throws SchedulingException {
        jobScheduler.scheduleJob(MapReduceAggregateLogsJob.class, schedulerConfigs);
    }

    private Map<String, JobState> createStateForMovingFiles(String inputString,
            List<String> localInputFiles) {
        Map<String, JobState> fileNameStateMap = new HashMap<String, JobState>();
        for (String inputFile : localInputFiles) {
            String jobInput = inputString + ":" + inputFile;
            vlog.log(String.format("calling createJob(FILECOPY,%s);", jobInput));
            JobState state = createJob(JobName.FILECOPY, jobInput);
            fileNameStateMap.put(inputFile, state);
            vlog.log(String.format("calling fastValues.put(%s,%s)", inputFile, state.toString()));
        }
        return fileNameStateMap;
    }

    private List<String> getLocalInputFiles(QuartzSchedulerConfigs schedulerConfigs) throws Exception {
        List<String> localInputFiles = new ArrayList<String>();
        if (schedulerConfigs.getFileMoveInput() != null) {
            localInputFiles.add(schedulerConfigs.getFileMoveInput());
        } else if (schedulerConfigs.getInputForMultiPathJobs() != null) {
            localInputFiles = schedulerConfigs.getInputForMultiPathJobs();
        } else {
            throw new Exception("Could not find any files for the copy. This job was fired without a indicator as to what files to run.");
        }
        return localInputFiles;
    }

    private void deleteIfFinished(Map<String, JobState> fastValues) throws ExecutionException {
        for (Entry<String, JobState> inputEntry : fastValues.entrySet()) {
            if (inputEntry.getValue().getState() == JobStateVal.FINISHED) {
                new File(inputEntry.getKey()).delete();
                try {
                    String filename = inputEntry.getKey().substring(inputEntry.getKey().lastIndexOf("/") + 1);

                    // remove the seconds cuz it takes a few to write the logs sometimes
                    // only delete the files from the backup dir IFF they are named the same (sans the seconds)
                    String smallerFileName = filename.substring(0, filename.length() - 2);
                    File backupDir = new File(HadoopLogsConfigs.getBackupDir());
                    if (backupDir.exists()) {
                        String[] files = backupDir.list();
                        for (String file : files) {
                            if (file.contains(smallerFileName)) {
                                // this is a backup file that needs to be deleted,
                                // its from the same hour as the regular file
                                LOG.info("deleting file " + HadoopLogsConfigs.getBackupDir() + file);
                                new File(HadoopLogsConfigs.getBackupDir() + file).delete();
                            }
                        }
                    }
                } catch (Exception e) {
                    LOG.error("could not delete file from backup", e);
                }
            }
        }
    }

    private void moveFilesOntoDFS(Map<String, JobState> fileNameStateMap) throws ExecutionException {

        //HadoopConfiguration conf = hadoopTool.getConfiguration();
        //String inputDir = hadoopTool.getInputDirectory();
        List<String> inputDirList = new ArrayList<String>();
        inputDirList.add(HadoopLogsConfigs.getMapreduceInputPrefix());
        inputDirList.add(fileHour);
        String inputDir = StaticFileUtils.splitPathToString(inputDirList);
        int offset = 0;


        for (Entry<String, JobState> inputEntry : fileNameStateMap.entrySet()) {
            String inputFile = inputEntry.getKey();
            JobState state = inputEntry.getValue();
            try {
                LOG.info("putting file on the DFS at " + inputDir);

                hdfsUtils.mkDirs(inputDir, false);
                // The files will be the same, so we have to place it as the
                // named file, so we need a n
                String placedFile = inputDir + "/" + offset + "-" + StaticFileUtils.stripDirectoryFromFileName(inputFile);
                vlog.log(String.format("copying file %s -> to Hdfs %s", inputFile, placedFile));

                //utils.placeFileOnDFS(inputFile, placedFile);
                //if its a LZO file, index it
                if (placedFile.endsWith(".lzo")) {
                    vlog.log(String.format("file %s is an LZO recompressing and indexing", inputFile));
                    FSDataInputStream lzoIS = hdfsUtils.openHdfsInputFile(inputFile, true);
                    FSDataOutputStream lzoOS = hdfsUtils.openHdfsOutputFile(placedFile, false, true);
                    FSDataOutputStream idxOS = hdfsUtils.openHdfsOutputFile(placedFile + ".index", false, true);
                    hdfsUtils.recompressAndIndexLzoStream(lzoIS, lzoOS, idxOS, null);
                    idxOS.close();
                    lzoOS.close();
                    lzoIS.close();
                } else {
                    vlog.log(String.format("file %s is not compressed: Calling compression and indexer functions", inputFile));
                    FSDataInputStream uncompressedIS = hdfsUtils.openHdfsInputFile(inputFile, true);
                    FSDataOutputStream lzoOS = hdfsUtils.openHdfsOutputFile(placedFile + ".lzo", false, true);
                    FSDataOutputStream idxOS = hdfsUtils.openHdfsOutputFile(placedFile + ".lzo.index", false, true);
                    hdfsUtils.compressAndIndexStreamToLzo(uncompressedIS, lzoOS, lzoOS, hdfsUtils.getBufferSize(), null);
                    idxOS.close();
                    lzoOS.close();
                    uncompressedIS.close();
                }
                offset++;

                finishJob(state);

            } catch (Exception e) {
                LOG.error(e);
                failJob(state);
                throw new ExecutionException(e);
            }
        }
    }
}