DataFsck.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.StringUtils;

/**
 * Utility to check the parity data against source data.
 * It works by trying to compute the parity data of two 1KB chunks in each
 * stripe, and checking against the actual parity data.
 * The input to the utility is a file with the list of
 * files to check. DataFsck kicks off multiple map-only jobs to process the files,
 * waits for the jobs to finish, then prints the results to stdout.
 *
 * Each file is categorized in one of the following states.
 *  DATA_MATCHING_PARITY - file and parity match.
 *  DATA_NOT_MATCHING_PARITY - File and parity do not match.
 *  DATA_UNREADABLE - there was an IOException while reading the file.
 *
 * Options:
 *  -summary : this option prints the count of files in each state instead of a full report.
 *  -filesPerJob : controls the number of files per job.
 *
 * Important options passed through key-value pairs:
 *   -Dmapred.map.tasks=N
 *     start the map-only job with N tasks.
 *
 * Usage:
 * java DataFsck [options] [-summary] [-filesPerJob N] /path/to/input/file
 * The input file should contain the list of files to check. If the input file is "-"
 * the list of files is read from stdin.
 */
public class DataFsck extends Configured implements Tool {
  public static final int CHECKSIZE = 1024;
  private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd HH:mm");
  protected static final Log LOG = LogFactory.getLog(DataFsck.class);
  static final String NAME = "datafsck";
  static final String JOB_DIR_LABEL = NAME + ".job.dir";
  static final String OP_LIST_LABEL = NAME + ".op.list";
  static final String OP_COUNT_LABEL = NAME + ".op.count";
  private static final int SYNC_FILE_MAX = 10;
  static final short OP_LIST_REPLICATION = 10; // replication factor of control file
  Configuration conf;

  public enum State {
    DATA_MATCHING_PARITY,
    DATA_NOT_MATCHING_PARITY,
    DATA_UNREADABLE
  }

  static void printUsage() {
    System.err.println(
"java DataFsck [options] [-summary] [-filesPerJob N] /path/to/input/file\n" +
"Utility to check the parity data against source data.\n" +
"The input to the utility is a file with the list of files to check.\n"
);
    ToolRunner.printGenericCommandUsage(System.err);
  }

  public static void main(String[] args) throws Exception {
    org.apache.hadoop.hdfs.DnsMonitorSecurityManager.setTheManager();
    DataFsck dataFsck = new DataFsck(new Configuration());
    int res = ToolRunner.run(dataFsck, args);
    System.exit(res);
  }

  private JobConf createJobConf() {
    JobConf jobConf = new JobConf(getConf());
    String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
    jobConf.setJobName(jobName);
    jobConf.setMapSpeculativeExecution(false);

    jobConf.setJarByClass(DataFsck.class);
    jobConf.setInputFormat(DataFsckInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    jobConf.setMapperClass(DataFsckMapper.class);
    jobConf.setNumReduceTasks(0);
    return jobConf;
  }

  private static class JobContext {
    public RunningJob runningJob;
    public JobConf jobConf;
    public JobContext(RunningJob runningJob, JobConf jobConf) {
      this.runningJob = runningJob;
      this.jobConf = jobConf;
    }
  }

  List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
    boolean done = false;
    JobClient jClient = new JobClient(createJobConf());
    List<JobContext> submitted = new ArrayList<JobContext>();
    Random rand = new Random();
    do {
      JobConf jobConf = createJobConf();
      final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
      Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
      jobConf.set(JOB_DIR_LABEL, jobDir.toString());
      Path log = new Path(jobDir, "_logs");
      FileOutputFormat.setOutputPath(jobConf, log);
      LOG.info("log=" + log);

      // create operation list
      FileSystem fs = jobDir.getFileSystem(jobConf);
      Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
      jobConf.set(OP_LIST_LABEL, opList.toString());
      int opCount = 0, synCount = 0;
      SequenceFile.Writer opWriter = null;

      try {
        opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);
        String f = null;
        do {
          f = inputReader.readLine();
          if (f == null) {
            done = true;
            break;
          }
          opWriter.append(new Text(f), new Text(f));
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        } while (opCount < filesPerJob);
      } finally {
        if (opWriter != null) {
          opWriter.close();
        }
        fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
      }

      jobConf.setInt(OP_COUNT_LABEL, opCount);
      RunningJob rJob = jClient.submitJob(jobConf);
      JobContext ctx = new JobContext(rJob, jobConf);
      submitted.add(ctx);
    } while (!done);

    return submitted;
  }

  void waitForJobs(List<JobContext> submitted) throws IOException, InterruptedException {
    JobClient jClient = new JobClient(createJobConf());
    List<JobContext> running = new ArrayList<JobContext>(submitted);
    while (!running.isEmpty()) {
      Thread.sleep(60000);
      LOG.info("Checking " + running.size() + " running jobs");
      for (Iterator<JobContext> it = running.iterator(); it.hasNext(); ) {
        Thread.sleep(2000);
        JobContext ctx = it.next();
        try {
          if (ctx.runningJob.isComplete()) {
            it.remove();
            LOG.info("Job " + ctx.runningJob.getID() + " complete. URL: " +
              ctx.runningJob.getTrackingURL());
          } else {
            LOG.info("Job " + ctx.runningJob.getID() + " still running. URL: " +
              ctx.runningJob.getTrackingURL());
          }
        } catch (IOException e) {
          LOG.warn("Error while checking job " + ctx.runningJob.getID() +
            ", killing it ", e);
          it.remove();
          try {
            ctx.runningJob.killJob();
          } catch (IOException e2) {
          }
        }
      }
    }
  }

  List<SequenceFile.Reader> getOutputs(List<JobContext> submitted) throws IOException {
    List<SequenceFile.Reader> outputs = new ArrayList<SequenceFile.Reader>();
    for (JobContext ctx: submitted) {
      SequenceFile.Reader[] jobOutputs = SequenceFileOutputFormat.getReaders(
        getConf(),
        SequenceFileOutputFormat.getOutputPath(ctx.jobConf));
      for (SequenceFile.Reader r: jobOutputs) {
        outputs.add(r);
      }
    }
    return outputs;
  }

  void printResult(List<SequenceFile.Reader> outputs, boolean summary)
      throws IOException {
    // Start reading output of job.
    Text key = new Text();
    Text val = new Text();
    Map<State, Integer> stateToCountMap = new HashMap<State, Integer>();
    for (State s: State.values()) {
      stateToCountMap.put(s, 0);
    }
    for (SequenceFile.Reader r: outputs) {
      while (r.next(key, val)) {
        State s = State.valueOf(val.toString());
        stateToCountMap.put(s, 1 + stateToCountMap.get(s));
        if (summary) {
          System.err.println(key + " " + val);
        } else {
          System.out.println(key + " " + val);
        }
      }
    }
    // Print stats.
    for (State s: State.values()) {
      String stat = s + " " + stateToCountMap.get(s);
      if (summary) {
        System.out.println(stat);
      } else {
        System.err.println(stat);
      }
    }
  }

  @Override
  public int run(String args[]) throws Exception {
    String inputFile = null;
    boolean summary = false;
    int filesPerJob = Integer.MAX_VALUE;
    for (int i = 0; i < args.length; i++) {
      String arg = args[i];
      if (arg.equalsIgnoreCase("-summary")) {
        summary = true;
      } else if (arg.equalsIgnoreCase("-h") || arg.equalsIgnoreCase("--help")) {
        printUsage();
        return -1;
      } else if (arg.equalsIgnoreCase("-filesPerJob")) {
        i++;
        if (i == args.length) {
          printUsage();
          return -1;
        }
        filesPerJob = Integer.parseInt(args[i]);
      } else {
        inputFile = arg;
      }
    }
    if (inputFile == null) {
      printUsage();
      return -1;
    }

    InputStream in = inputFile.equals("-") ? System.in : new FileInputStream(inputFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    List<JobContext> submitted = submitJobs(reader, filesPerJob);

    waitForJobs(submitted);

    List<SequenceFile.Reader> outputs = getOutputs(submitted);

    printResult(outputs, summary);

    return 0;
  }

  public DataFsck(Configuration conf) {
    super(conf);
    getConf().set(
      "fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
  }

  static class DataFsckInputFormat implements InputFormat<Text, Text> {
    public void validateInput(JobConf job) {
    }

    public InputSplit[] getSplits(JobConf job, int numSplits)
        throws IOException {
      final int srcCount = job.getInt(OP_COUNT_LABEL, -1);
      final int targetcount = srcCount / numSplits;
      String srclist = job.get(OP_LIST_LABEL, "");
      if (srcCount < 0 || "".equals(srclist)) {
        throw new RuntimeException("Invalid metadata: #files(" + srcCount
            + ") listuri(" + srclist + ")");
      }
      Path srcs = new Path(srclist);
      FileSystem fs = srcs.getFileSystem(job);

      List<FileSplit> splits = new ArrayList<FileSplit>(numSplits);

      Text key = new Text();
      Text value = new Text();
      SequenceFile.Reader in = null;
      long prev = 0L;
      int count = 0; // count src
      try {
        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long curr = in.getPosition();
          long delta = curr - prev;
          if (++count > targetcount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[]) null));
            prev = curr;
          }
        }
      } finally {
        in.close();
      }
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (remaining != 0) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[]) null));
      }
      return splits.toArray(new FileSplit[splits.size()]);
    }

    /** {@inheritDoc} */
    public RecordReader<Text, Text> getRecordReader(InputSplit split,
        JobConf job, Reporter reporter) throws IOException {
      return new SequenceFileRecordReader<Text, Text>(job,
          (FileSplit) split);
    }
  }

  static class DataFsckMapper implements Mapper<Text, Text, Text, Text> {
    private JobConf conf;
    private Reporter reporter = null;
    private int processedCount;

    /** {@inheritDoc} */
    public void configure(JobConf job) {
      this.conf = job;
    }

    /** {@inheritDoc} */
    public void close() throws IOException {
    }

    private String getCountString() {
      return "Processed " + processedCount + " files";
    }

    /** Run a FileOperation */
    public void map(
      Text key, Text val, OutputCollector<Text, Text> out, Reporter reporter)
        throws IOException {
      String file = key.toString();
      State state = null;
      try {
        Path path = new Path(file);
        FileSystem fs = path.getFileSystem(conf);

        FileStatus stat = null;
        stat = fs.getFileStatus(path);
        if (stat == null) {
          state = State.DATA_UNREADABLE;
        } else {
          boolean isCorrupt = checkAgainstParity(fs, stat);
          state = isCorrupt ?
                  State.DATA_NOT_MATCHING_PARITY :
                  State.DATA_MATCHING_PARITY;
        }
      } catch (IOException e) {
        LOG.error("Marking file as unreadable: " + file, e);
        state = State.DATA_UNREADABLE;
      }
      out.collect(key, new Text(state.toString()));
    }

    // Is corrupt?
    boolean checkAgainstParity(FileSystem fs, FileStatus stat) throws IOException {
      Codec code = null;
      ParityFilePair ppair = null;
      for (Codec codec: Codec.getCodecs()) {
        ppair = ParityFilePair.getParityFile(
          codec, stat, conf);
        if (ppair != null) {
          code = codec; 
          break;
        } 
      }
      if (code == null) {
        LOG.info("No parity for " + stat.getPath());
        return false;
      }
      int parityLength = code.parityLength;
      LOG.info("Checking file parity " + stat.getPath() +
        " against parity " + ppair.getPath());

      final long blockSize = stat.getBlockSize();
      int stripeLength = code.stripeLength;
      long stripeBytes = stripeLength * blockSize;
      int numStripes = (int)Math.ceil(stat.getLen() * 1.0 / stripeBytes);

      // Look at all stripes.
      for (int stripeIndex = 0; stripeIndex < numStripes; stripeIndex++) {
        for (boolean lastKB : new boolean[]{true, false}) {
          long shortest = shortestBlockLength(stripeIndex, stat, stripeLength);
          // Optimization - if all blocks are the same size, one check is enough.
          if (!lastKB) {
            if (shortest == blockSize) {
              continue;
            }
          }

          long lastOffsetInBlock = lastKB ? blockSize : shortest;
          if (lastOffsetInBlock < CHECKSIZE) {
            lastOffsetInBlock = CHECKSIZE;
          }
          byte[][] stripeBufs = new byte[stripeLength][];
          for (int i = 0; i < stripeLength; i++) {
            stripeBufs[i] = new byte[CHECKSIZE];
          }
          byte[] parityBuf = new byte[CHECKSIZE];
          byte[] actualParityBuf = new byte[CHECKSIZE];
          // Read CHECKSIZE bytes from all blocks in a stripe and parity.
          computeParity(conf, fs, stat, code, stripeIndex, stripeBufs,
            parityBuf, lastOffsetInBlock);

          readActualParity(ppair, actualParityBuf,
            stripeIndex, parityLength, blockSize, lastOffsetInBlock);

          if (!Arrays.equals(parityBuf, actualParityBuf)) {
            return true;
          }
        }
      }
      // All stripes are good.
      LOG.info("Checking file parity " + stat.getPath() +
        " against parity " + ppair.getPath() + " was OK");
      return false;
    }

    long shortestBlockLength(int stripeIndex, FileStatus stat, int stripeLength) {
      final long blockSize = stat.getBlockSize();
      final long stripeBytes = stripeLength * blockSize;
      int numStripes = (int) Math.ceil(stat.getLen() * 1.0 / stripeBytes);
      if (stripeIndex == numStripes - 1) {
        long remainder = stat.getLen() % blockSize;
        return (remainder == 0) ? blockSize : remainder;
      } else {
        return blockSize;
      }
    }

    void computeParity(Configuration conf, FileSystem fs, FileStatus stat,
          Codec code,
          final int stripeIndex, byte[][] stripeBufs, byte[] parityBuf,
          long lastOffsetInBlock) throws IOException {
      final long blockSize = stat.getBlockSize();
      final long stripeBytes = stripeBufs.length * blockSize;
      final long stripeStartOffset = stripeIndex * stripeBytes;
      final long stripeEndOffset = stripeStartOffset + stripeBytes;
      LOG.info("Checking parity " + stat.getPath() + " with last offset " + lastOffsetInBlock);

      FSDataInputStream[] inputs = new FSDataInputStream[stripeBufs.length];
      try {
        int idx = 0;
        // Loop through the blocks in the stripe
        for (long blockStart = stripeStartOffset;
             blockStart < stripeEndOffset;
             blockStart += blockSize) {
          // First zero out the buffer.
          Arrays.fill(stripeBufs[idx], (byte)0);
          if (blockStart < stat.getLen()) {
            // Block is real, read some bytes from it.
            long readEndOffset = blockStart + lastOffsetInBlock; // readEndOffset > blockStart.
            long readStartOffset = readEndOffset - CHECKSIZE; // readEndOffset > readStartOffset.
            // readStartOffset = blockStart + lastOffsetInBlock - CHECKSIZE, readStartOffset >= blockStartOffset
            // blockStartOffset <= readStartOffset < readEndOffset
            // Check for the case that the readEndOffset is beyond eof.
            long blockEndOffset = Math.min((blockStart + blockSize), stat.getLen());

            if (readStartOffset < blockEndOffset) {
              // blockStart <= readStartOffset < blockEndOffset
              inputs[idx] = fs.open(stat.getPath());
              inputs[idx].seek(readStartOffset);
              int bytesToRead = (int)Math.min(CHECKSIZE, blockEndOffset - readStartOffset);
              IOUtils.readFully(inputs[idx], stripeBufs[idx], 0, bytesToRead);
              // Rest is zeros
            }
          }
          idx++;
        }
        if (code.id.equals("xor")) {
          for (int i = 0; i < CHECKSIZE; i++) {
            parityBuf[i] = 0;
            // For XOR, each byte is XOR of all the stripe bytes.
            for (int j = 0; j < stripeBufs.length; j++) {
              parityBuf[i] = (byte)(parityBuf[i] ^ stripeBufs[j][i]);
            }
          }
        } else if (code.id.equals("rs")) {
          int parityLength = code.parityLength;
          int[] msgbuf = new int[stripeBufs.length];
          int[] codebuf = new int[parityLength];
          ErasureCode rsCode = new ReedSolomonCode(stripeBufs.length, parityLength);
          for (int i = 0; i < CHECKSIZE; i++) {
            for (int j = 0; j < stripeBufs.length; j++) {
              msgbuf[j] = stripeBufs[j][i] & 0x000000FF;
            }
            rsCode.encode(msgbuf, codebuf);
            // Take the first parity byte.
            parityBuf[i] = (byte)codebuf[0];
          }
        }
      } finally {
        for (InputStream stm: inputs) {
          if (stm != null) stm.close();
        }
      }
    }

    void readActualParity(
        ParityFilePair ppair, byte[] actualParityBuf,
        int stripeIndex, int parityLength, long blockSize,
        long lastOffsetInBlock) throws IOException {
      FSDataInputStream parityIn = ppair.getFileSystem().open(ppair.getPath());
      try {
        // Seek to the beginning of parity stripe.
        parityIn.seek(stripeIndex * parityLength * blockSize + lastOffsetInBlock - CHECKSIZE);
        // Parity blocks are always full, so we should be able to read CHECKSIZE bytes.
        IOUtils.readFully(parityIn, actualParityBuf, 0, CHECKSIZE);
      } finally {
        parityIn.close();
      }
    }
  }
}