MapReduceEncoder.java example

Explorer
hops-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.hops.erasure_coding;

import io.hops.metadata.hdfs.entity.EncodingJob;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;

public class MapReduceEncoder {

  protected static final Log LOG = LogFactory.getLog(MapReduceEncoder.class);

  static final String NAME = "mapReduceEncoder";
  static final String JOB_DIR_LABEL = NAME + ".job.dir";
  static final String OP_LIST_LABEL = NAME + ".op.list";
  static final String OP_COUNT_LABEL = NAME + ".op.count";
  static final String SCHEDULER_OPTION_LABEL = NAME + ".scheduleroption";
  static final String IGNORE_FAILURES_OPTION_LABEL = NAME + ".ignore.failures";

  private static final long OP_PER_MAP = 100;
  private static final int MAX_MAPS_PER_NODE = 20;
  private static final SimpleDateFormat dateForm =
      new SimpleDateFormat("yyyy-MM-dd HH:mm");
  private static String jobName = NAME;

  public static enum Counter {
    FILES_SUCCEEDED,
    FILES_FAILED,
    PROCESSED_BLOCKS,
    PROCESSED_SIZE,
    META_BLOCKS,
    META_SIZE,
    SAVING_SIZE
  }

  protected JobConf jobconf;

  /**
   * {@inheritDoc}
   */
  public void setConf(Configuration conf) {
    if (jobconf != conf) {
      jobconf = conf instanceof JobConf ? (JobConf) conf : new JobConf(conf);
    }
  }

  /**
   * {@inheritDoc}
   */
  public JobConf getConf() {
    return jobconf;
  }

  public MapReduceEncoder(Configuration conf) {
    setConf(createJobConf(conf));
  }

  /**
   * Recovery constructor for NameNode failures.
   *
   * The job start time is currently not recovered. This means that jobs might
   * need more time to timeout after recovery.
   *
   * @param job the job to recover
   */
  MapReduceEncoder(Configuration conf, EncodingJob job) throws IOException {
    jobconf = new JobConf(conf);
    jobconf.set(JOB_DIR_LABEL, job.getJobDir());
    JobID jobID = new JobID(job.getJtIdentifier(), job.getJobId());
    JobClient jobClient = new JobClient(jobconf);
    runningJob = jobClient.getJob(jobID);
    if (runningJob == null) {
      throw new IOException("Failed to recover");
    }
  }

  private static final Random RANDOM = new Random();

  protected static String getRandomId() {
    return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
  }

  private JobClient jobClient;
  private RunningJob runningJob;
  private int jobEventCounter = 0;
  private String lastReport = null;
  private long startTime = System.currentTimeMillis();

  /**
   * Responsible for generating splits of the src file list.
   */
  static class DistRaidInputFormat implements InputFormat<Text, PolicyInfo> {
    /**
     * Do nothing.
     */
    public void validateInput(JobConf job) {
    }

    /**
     * Produce splits such that each is no greater than the quotient of the
     * total size and the number of splits requested.
     *
     * @param job
     *     The handle to the JobConf object
     * @param numSplits
     *     Number of splits requested
     */
    public InputSplit[] getSplits(JobConf job, int numSplits)
        throws IOException {
      final int srcCount = job.getInt(OP_COUNT_LABEL, -1);
      final int targetcount = srcCount / numSplits;
      String srclist = job.get(OP_LIST_LABEL, "");
      if (srcCount < 0 || "".equals(srclist)) {
        throw new RuntimeException(
            "Invalid metadata: #files(" + srcCount + ") listuri(" + srclist +
                ")");
      }
      Path srcs = new Path(srclist);
      FileSystem fs = srcs.getFileSystem(job);

      List<FileSplit> splits = new ArrayList<FileSplit>(numSplits);

      Text key = new Text();
      PolicyInfo value = new PolicyInfo();
      SequenceFile.Reader in = null;
      long prev = 0L;
      int count = 0; // count src
      try {
        for (in = new SequenceFile.Reader(fs, srcs, job);
             in.next(key, value); ) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      LOG.info("jobname= " + jobName + " numSplits=" + numSplits +
          ", splits.size()=" + splits.size());
      return splits.toArray(new FileSplit[splits.size()]);
    }

    /**
     * {@inheritDoc}
     */
    public RecordReader<Text, PolicyInfo> getRecordReader(InputSplit split,
        JobConf job, Reporter reporter) throws IOException {
      return new SequenceFileRecordReader<Text, PolicyInfo>(job,
          (FileSplit) split);
    }
  }

  /**
   * The mapper for raiding files.
   */
  static class DistRaidMapper
      implements Mapper<Text, PolicyInfo, WritableComparable, Text> {
    private JobConf jobconf;
    private boolean ignoreFailures;

    private int failcount = 0;
    private int succeedcount = 0;
    private BaseEncodingManager.Statistics st = null;
    private Reporter reporter = null;

    private String getCountString() {
      return "Succeeded: " + succeedcount + " Failed: " + failcount;
    }

    /**
     * {@inheritDoc}
     */
    public void configure(JobConf job) {
      this.jobconf = job;
      ignoreFailures = jobconf.getBoolean(IGNORE_FAILURES_OPTION_LABEL, true);
      st = new BaseEncodingManager.Statistics();
    }

    /**
     * Run a FileOperation
     */
    public void map(Text key, PolicyInfo policy,
        OutputCollector<WritableComparable, Text> out, Reporter reporter)
        throws IOException {
      this.reporter = reporter;
      try {
        Codec.initializeCodecs(jobconf);

        LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
        Path p = new Path(key.toString());
        st.clear();
        BaseEncodingManager.doRaid(jobconf, policy, p, st, reporter);

        ++succeedcount;

        reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks);
        reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize);
        reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks);
        reporter.incrCounter(Counter.META_SIZE, st.metaSize);
        reporter.incrCounter(Counter.SAVING_SIZE,
            st.processedSize - st.remainingSize - st.metaSize);
        reporter.incrCounter(Counter.FILES_SUCCEEDED, 1);
      } catch (IOException e) {
        ++failcount;
        reporter.incrCounter(Counter.FILES_FAILED, 1);

        String s = "FAIL: " + policy + ", " + key + " " +
            StringUtils.stringifyException(e);
        out.collect(null, new Text(s));
        LOG.info(s);
      } finally {
        reporter.setStatus(getCountString());
      }
    }

    /**
     * {@inheritDoc}
     */
    public void close() throws IOException {
      if (failcount == 0 || ignoreFailures) {
        return;
      }
      throw new IOException(getCountString());
    }
  }

  /**
   * create new job conf based on configuration passed.
   *
   * @param conf
   * @return
   */
  private static JobConf createJobConf(Configuration conf) {
    JobConf jobconf = new JobConf(conf, MapReduceEncoder.class);
    jobName = NAME + " " + dateForm.format(new Date(BaseEncodingManager.now()));
    jobconf.setUser(BaseEncodingManager.JOBUSER);
    jobconf.setJobName(jobName);
    jobconf.setMapSpeculativeExecution(false);
    RaidUtils.parseAndSetOptions(jobconf, SCHEDULER_OPTION_LABEL);

    jobconf.setJarByClass(MapReduceEncoder.class);
    jobconf.setInputFormat(DistRaidInputFormat.class);
    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(Text.class);

    jobconf.setMapperClass(DistRaidMapper.class);
    jobconf.setNumReduceTasks(0);
    return jobconf;
  }

  /**
   * Calculate how many maps to run.
   */
  private static int getMapCount(int srcCount) {
    int numMaps = (int) (srcCount / OP_PER_MAP);
    return Math.max(numMaps, MAX_MAPS_PER_NODE);
  }

  /**
   * Invokes a map-reduce job do parallel raiding.
   *
   * @return true if the job was started, false otherwise
   */
  public boolean startDistRaid(PolicyInfo info) throws IOException {
    if (prepareJob(info)) {
      this.jobClient = new JobClient(jobconf);
      this.runningJob = this.jobClient.submitJob(jobconf);
      LOG.info("Job Started: " + runningJob.getID());
      this.startTime = System.currentTimeMillis();
      return true;
    }
    return false;
  }
  
  /**
   * Checks if the map-reduce job has completed.
   *
   * @return true if the job completed, false otherwise.
   * @throws java.io.IOException
   */
  public boolean checkComplete() throws IOException {
    JobID jobID = runningJob.getID();
    if (runningJob.isComplete()) {
      // delete job directory
      final String jobdir = jobconf.get(JOB_DIR_LABEL);
      if (jobdir != null) {
        final Path jobpath = new Path(jobdir);
        jobpath.getFileSystem(jobconf).delete(jobpath, true);
      }
      if (runningJob.isSuccessful()) {
        LOG.info("Job Complete(Succeeded): " + jobID);
      } else {
        LOG.info("Job Complete(Failed): " + jobID);
      }
      return true;
    } else {
      String report = (" job " + jobID +
          " map " + StringUtils.formatPercent(runningJob.mapProgress(), 0) +
          " reduce " +
          StringUtils.formatPercent(runningJob.reduceProgress(), 0));
      if (!report.equals(lastReport)) {
        LOG.info(report);
        lastReport = report;
      }
      TaskCompletionEvent[] events =
          runningJob.getTaskCompletionEvents(jobEventCounter);
      jobEventCounter += events.length;
      for (TaskCompletionEvent event : events) {
        if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) {
          LOG.info(" Job " + jobID + " " + event.toString());
        }
      }
      return false;
    }
  }

  public void killJob() throws IOException {
    runningJob.killJob();
  }

  public boolean successful() throws IOException {
    return runningJob.isSuccessful();
  }

  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws java.io.IOException
   */
  private boolean prepareJob(PolicyInfo info) throws IOException {
    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile
          .createWriter(fs, jobconf, opList, Text.class, PolicyInfo.class,
              SequenceFile.CompressionType.NONE);
      opWriter.append(new Text(info.getSrcPath().toString()), info);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
    }

    jobconf.setInt(OP_COUNT_LABEL, 1);
    jobconf.setNumMapTasks(1);
    LOG.info(
        "jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return true;
  }
  
  public long getStartTime() {
    return this.startTime;
  }
  
  public Counters getCounters() throws IOException {
    return this.runningJob.getCounters();
  }

  public JobID getJobID() {
    return this.runningJob.getID();
  }
}