FastFileCheck.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid.tools;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.UUID;
import java.util.zip.CRC32;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.Codec;
import org.apache.hadoop.raid.Decoder;
import org.apache.hadoop.raid.LogUtils;
import org.apache.hadoop.raid.Decoder.DecoderInputStream;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.ParallelStreamReader;
import org.apache.hadoop.raid.ParallelStreamReader.ReadResult;
import org.apache.hadoop.raid.ParityFilePair;
import org.apache.hadoop.raid.RaidUtils;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.mapred.Utils;

public class FastFileCheck {
  final static Log LOG = LogFactory.getLog(
      "org.apache.hadoop.raid.tools.FastFileCheck");
  
  private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd-HH-mm");
  private static int DEFAULT_VERIFY_LEN = 64 * 1024; // 64k
  private static int BUFFER_LEN = DEFAULT_VERIFY_LEN;
  private static final String NAME = "fastfilecheck";
  private static final String JOB_DIR_LABEL = NAME + ".job.dir";
  private static final String OP_LIST_LABEL = NAME + ".op.list";
  private static final String OP_COUNT_LABEL = NAME + ".op.count";
  private static final String SOURCE_ONLY_CONF = "sourceonly";
  private static final short OP_LIST_RELICATION = 10;
  private static final int TASKS_PER_JOB = 50;
  private static long filesPerTask = 10;
  private static final int MAX_FILES_PER_TASK = 10000;
  private boolean sourceOnly = false;
  private Configuration conf;
  private static final Random rand = new Random();
  
  enum State{
    GOOD_FILE,
    BAD_FILE,
    NOT_RAIDED,
    UNREADABLE,
    NOT_FOUND
  }
  
  public FastFileCheck(Configuration conf) {
    this.conf = conf;
  }
  
  /**
   * Get the input splits from the operation file list.
   */
  static class FileCheckInputFormat implements InputFormat<Text, Text> {

    @Override
    public InputSplit[] getSplits(JobConf job, int numSplits)
        throws IOException {
      numSplits = TASKS_PER_JOB;
      // get how many records.
      final int totalRecords = job.getInt(OP_COUNT_LABEL, -1);
      final int targetCount = totalRecords / numSplits;
      
      String fileList = job.get(OP_LIST_LABEL, "");
      if (totalRecords < 0 || "".equals(fileList)) {
        throw new RuntimeException("Invalid metadata.");
      }
      
      Path srcs = new Path(fileList);
      FileSystem fs = srcs.getFileSystem(job);
      
      List<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
      Text key = new Text();
      Text value = new Text();
      SequenceFile.Reader in = null;
      long prev = 0L;
      int count = 0;
      
      // split the files to be checked.
      try {
        for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) {
          long cur = in.getPosition();
          long delta = cur - prev;
          if (++count > targetCount) {
            count = 0;
            splits.add(new FileSplit(srcs, prev, delta, (String[])null));
            prev = cur;
          }
        }
      } finally {
        in.close();
      }
      
      long remaining = fs.getFileStatus(srcs).getLen() - prev;
      if (0 != remaining) {
        splits.add(new FileSplit(srcs, prev, remaining, (String[])null));
      }
      
      return splits.toArray(new FileSplit[splits.size()]);
    }

    @Override
    public RecordReader<Text, Text> getRecordReader(InputSplit split,
        JobConf job, Reporter reporter) throws IOException {
      return new SequenceFileRecordReader<Text, Text>(job, (FileSplit) split);
    }
    
  }
  
  static class FileCheckMapper implements Mapper<Text, Text, Text, Text> {
    private JobConf jobConf;
    private int failCount = 0;
    private int succeedCount = 0;
    private boolean sourceOnly = false;

    @Override
    public void configure(JobConf job) {
      this.jobConf = job;
      this.sourceOnly = job.getBoolean(SOURCE_ONLY_CONF, false);
    }

    @Override
    public void close() throws IOException {
      
    }
    
    private String getCountString() {
      return "Succeeded: " + succeedCount + " Failed: " + failCount;
    }

    @Override
    public void map(Text key, Text value, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
      // run a file operation
      Path p = new Path(key.toString());
      String v;
      try {
        if (sourceOnly) {
          v = processSourceFile(p, reporter, jobConf);
        } else {
          v = processFile(p, reporter, jobConf);
        }
        LOG.info("File: " + p + ", result: " + v);
        output.collect(key, new Text(v));
        reporter.progress();
        ++ succeedCount;
      } catch (InterruptedException e) {
        ++ failCount;
        LOG.warn("Interrupted when processing file: " + p);
        throw new IOException(e);
      } finally {
        reporter.setStatus(getCountString());
      }
    }
    
    /**
     * check a source file.
     */
    String processSourceFile(Path p, Progressable reporter, 
        Configuration conf) throws IOException, InterruptedException {
      LOG.info("Processing Source file: " + p);
      FileSystem fs = p.getFileSystem(conf);
      if (!fs.exists(p)) {
        return State.NOT_FOUND.toString();
      }
      
      Codec codec = Codec.getCodec("rs");
      boolean result = false;
      try {
        result = checkFile(conf, fs, fs, p, null, codec, reporter, true);
      } catch (IOException ex) {
        LOG.warn("Encounter exception when checking file: " + p + 
            ", " + ex.getMessage());
        return State.UNREADABLE.toString();
      }
      
      return result ? State.GOOD_FILE.toString() : State.BAD_FILE.toString();
    }
    
    /**
     * check a single file.
     */
    String processFile(Path p, Progressable reporter, Configuration conf) 
        throws IOException, InterruptedException {
      
      LOG.info("Processing file: " + p);
      FileSystem fs = p.getFileSystem(conf);
      if (!fs.exists(p)) {
        return State.NOT_FOUND.toString();
      }
      FileStatus srcStat = null;
      try {
        srcStat = fs.getFileStatus(p);
      } catch (Exception e) {
        return State.NOT_FOUND.toString();
      }
      boolean result = false;
      boolean raided = false;
      for (Codec codec : Codec.getCodecs()) {
        ParityFilePair pfPair = ParityFilePair.getParityFile(codec, srcStat, conf);
        if (pfPair != null) {
          raided = true;
          Path parityPath = pfPair.getPath();
          try {
            result = checkFile(conf, fs, fs, p, parityPath, codec, reporter
                , false);
          } catch (IOException ex) {
            LOG.warn("Encounter exception when checking the file: " + p, ex);
            LogUtils.logFileCheckMetrics(LOGRESULTS.FAILURE, codec, p, 
                fs, -1, -1, ex, reporter);
            return State.UNREADABLE.toString();
          }
          break;
        }
      }
      
      if (!raided) {
        return State.NOT_RAIDED.toString();
      }
      
      return result ? State.GOOD_FILE.toString() : State.BAD_FILE.toString();
    }
  }
  
  private JobConf createJobConf(Configuration conf) {
    JobConf jobConf = new JobConf(conf);
    String jobName = NAME + "_" + dateForm.format(new Date(System.currentTimeMillis()));
    jobConf.setJobName(jobName);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setJarByClass(FastFileCheck.class);
    jobConf.setInputFormat(FileCheckInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setMapperClass(FileCheckMapper.class);
    jobConf.setNumReduceTasks(0);
    jobConf.setBoolean(SOURCE_ONLY_CONF, sourceOnly);
    
    return jobConf;
  }
  
  /**
   * Check a file.
   */
  public static boolean checkFile(Configuration conf,
      FileSystem srcFs, FileSystem parityFs,
      Path srcPath, Path parityPath, Codec codec, 
      Progressable reporter,
      boolean sourceOnly) 
          throws IOException, InterruptedException {
    FileStatus stat = srcFs.getFileStatus(srcPath);
    long blockSize = stat.getBlockSize();
    long len = stat.getLen();
    
    List<Long> offsets = new ArrayList<Long>();
    // check a small part of each stripe.
    for (int i = 0; i * blockSize < len; i += codec.stripeLength) {
      offsets.add(i * blockSize);
    }
    
    for (long blockOffset : offsets) {
      if (sourceOnly) {
        if (!verifySourceFile(conf, srcFs,stat,
            codec, blockOffset, reporter)) {
          return false;
        }
      }
      else {
        if (!verifyFile(conf, srcFs, parityFs, stat,
            parityPath, codec, blockOffset, reporter)) {
          return false;
        }
      }
    }
    
    return true;
  }
  
  private static boolean verifySourceFile(Configuration conf,
      FileSystem srcFs, FileStatus stat, Codec codec,
      long blockOffset, Progressable reporter)
    throws IOException, InterruptedException {
      Path srcPath = stat.getPath();
      LOG.info("Verify file: " + srcPath + " at offset: " + blockOffset);
      int limit = (int) Math.min(stat.getBlockSize(), DEFAULT_VERIFY_LEN);
      if (reporter == null) {
        reporter = RaidUtils.NULL_PROGRESSABLE;
      }
      
      List<Long> errorOffsets = new ArrayList<Long>();
      // first limit bytes
      errorOffsets.add(blockOffset);
      long left = Math.min(stat.getBlockSize(), stat.getLen() - blockOffset);
      if (left > limit) {
        // last limit bytes
        errorOffsets.add(blockOffset + left - limit);
        // random limit bytes.
        errorOffsets.add(blockOffset + 
            rand.nextInt((int)(left - limit)));
      }
      
      long blockSize = stat.getBlockSize();
      long fileLen = stat.getLen();
      List<InputStream> streamList = new ArrayList<InputStream>();
      List<InputStream> tmpList = new ArrayList<InputStream>();
      try {
        for (long errorOffset : errorOffsets) {
          int k = 0;
          int len = streamList.size();
          tmpList.clear();
          for (int i = 0; i < codec.stripeLength; i++) {
            if (errorOffset + blockSize * i >= fileLen) {
              break;
            }
            
            FSDataInputStream is = null;
            if (k < len) {
              // resue the input stream
              is = (FSDataInputStream) streamList.get(k);
              k++;
            } else {
              is = srcFs.open(srcPath);
              streamList.add(is);
            }
            is.seek(errorOffset + blockSize * i);
            tmpList.add(is);
          }
          
          if (tmpList.size() == 0) {
            continue;
          }
          
          InputStream[] streams = tmpList.toArray(new InputStream[] {});
          ParallelStreamReader reader = null;
          try {
            reader = new ParallelStreamReader(
                reporter, streams, 
                limit, 4, 2, limit);
            reader.start();
            int readNum = 0;
            while (readNum < limit) {
              ReadResult result = reader.getReadResult();
              for (IOException ex : result.ioExceptions) {
                if (ex != null) {
                  LOG.warn("Encounter exception when checking file: " + srcPath + 
                      ", " + ex.getMessage());
                  return false;
                }
              }
              readNum += result.readBufs[0].length;
            }
          } finally {
            if (null != reader) {
              reader.shutdown();
            }
            reporter.progress();
          }
        } 
      } finally {
        if (streamList.size()> 0) {
          RaidUtils.closeStreams(streamList.toArray(new InputStream[]{}));
        }
      }
      return true;
  }
  
  /**
   * Verify the certain offset of a file.
   */
  private static boolean verifyFile(Configuration conf,
      FileSystem srcFs, FileSystem parityFs,
      FileStatus stat, Path parityPath, Codec codec,
      long blockOffset, Progressable reporter) 
    throws IOException, InterruptedException {
    Path srcPath = stat.getPath();
    LOG.info("Verify file: " + srcPath + " at offset: " + blockOffset);
    int limit = (int) Math.min(stat.getBlockSize(), DEFAULT_VERIFY_LEN);
    if (reporter == null) {
      reporter = RaidUtils.NULL_PROGRESSABLE;
    }
    
    // try to decode.
    Decoder decoder = new Decoder(conf, codec);
    if (codec.isDirRaid) {
      decoder.connectToStore(srcPath);
    }
    
    List<Long> errorOffsets = new ArrayList<Long>();
    // first limit bytes
    errorOffsets.add(blockOffset);
    long left = Math.min(stat.getBlockSize(), stat.getLen() - blockOffset);
    if (left > limit) {
      // last limit bytes
      errorOffsets.add(blockOffset + left - limit);
      // random limit bytes.
      errorOffsets.add(blockOffset + 
          rand.nextInt((int)(left - limit)));
    }
 
    byte[] buffer = new byte[limit];
    FSDataInputStream is = srcFs.open(srcPath);
    try {
      for (long errorOffset : errorOffsets) {
        is.seek(errorOffset);
        is.read(buffer);
        // calculate the oldCRC.
        CRC32 oldCrc = new CRC32();
        oldCrc.update(buffer);
        
        CRC32 newCrc = new CRC32();
        DecoderInputStream stream = decoder.new DecoderInputStream(
            RaidUtils.NULL_PROGRESSABLE, limit, stat.getBlockSize(), errorOffset, 
            srcFs, srcPath, parityFs, parityPath, null, null, false);
        try {
          stream.read(buffer);
          newCrc.update(buffer);
          if (oldCrc.getValue() != newCrc.getValue()) {
            LogUtils.logFileCheckMetrics(LOGRESULTS.FAILURE, codec, srcPath, 
                srcFs, errorOffset, limit, null, reporter);
            LOG.error("mismatch crc, old " + oldCrc.getValue() + 
                ", new " + newCrc.getValue() + ", for file: " + srcPath
                + " at offset " + errorOffset + ", read limit " + limit);
            return false;
          }
        } finally {
          reporter.progress();
          if (stream != null) {
            stream.close();
          }
        }
      }
      return true;
    } finally {
      is.close();
    }
  }
 
  private static class JobContext {
    public RunningJob job;
    public JobConf jobConf;
    
    public JobContext(RunningJob job, JobConf jobConf) {
      this.job = job;
      this.jobConf = jobConf;
    }
  }
  
  private List<JobContext> submitJobs(BufferedReader reader, 
          int filesPerJob, Configuration conf) 
      throws IOException {
    List<JobContext> submitted = new ArrayList<JobContext>();
    boolean done = false;
    Random rand = new Random(new Date().getTime());
    filesPerTask = (long) Math.ceil((double)filesPerJob / TASKS_PER_JOB);
    filesPerTask = Math.min(filesPerTask, MAX_FILES_PER_TASK);
    do {
      JobConf jobConf = createJobConf(conf);
      JobClient jobClient = new JobClient(jobConf);
      String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
      Path jobDir = new Path(jobClient.getSystemDir(), NAME + "_" + randomId);
      jobConf.set(JOB_DIR_LABEL, jobDir.toString());
      Path log = new Path(jobDir, "_logs");
      FileOutputFormat.setOutputPath(jobConf, log);
      LOG.info("log=" + log);
      
      // create operation list
      FileSystem fs = jobDir.getFileSystem(jobConf);
      Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
      jobConf.set(OP_LIST_LABEL, opList.toString());
      int opCount = 0;
      int synCount = 0;
      SequenceFile.Writer opWriter = null;
      
      try {
        opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class, 
            Text.class, SequenceFile.CompressionType.NONE);
        String f = null;
        do {
          f = reader.readLine();
          if (f == null) {
            // no more file
            done = true;
            break;
          }
          opWriter.append(new Text(f), new Text(f));
          opCount ++;
          if (++synCount > filesPerTask) {
            opWriter.sync();
            synCount = 0;
          }
        } while (opCount < filesPerJob);
        
      } finally {
        if (opWriter != null) {
          opWriter.close();
        }
        fs.setReplication(opList, OP_LIST_RELICATION);
      }
      
      jobConf.setInt(OP_COUNT_LABEL, opCount);
      RunningJob job = jobClient.submitJob(jobConf);
      submitted.add(new JobContext(job, jobConf));
    } while (!done);
    
    return submitted;
  }
  
  private void waitForJobs(List<JobContext> submitted, Configuration conf) 
      throws IOException, InterruptedException {
    JobConf jobConf = createJobConf(conf);
    JobClient jobClient = new JobClient(jobConf);
    List<JobContext> running = new ArrayList<JobContext>(submitted);
    while (!running.isEmpty()) {
      Thread.sleep(60000);
      LOG.info("Checking " + running.size() + " running jobs");
      for (Iterator<JobContext> it = running.iterator(); it.hasNext();) {
        Thread.sleep(2000);
        JobContext context = it.next();
        try {
          if (context.job.isComplete()) {
            it.remove();
            LOG.info("Job " + context.job.getID() + " complete. URL: " + 
                    context.job.getTrackingURL());
          } else {
            LOG.info("Job " + context.job.getID() + " still running. URL: " +
                    context.job.getTrackingURL());
          }
        } catch (Exception ex) {
          LOG.error("Hit error while checking job status.", ex);
          it.remove();
          try {
            context.job.killJob();
          } catch (Exception ex2) {
            // ignore the exception.
          }
        }
      }
    }
  }
  
  private void printResult(List<JobContext> submitted, Configuration conf) 
      throws IOException {
    Text key = new Text();
    Text value = new Text();
    
    Map<State, Integer> stateToCountMap = new HashMap<State, Integer>();
    for (State s : State.values()) {
      stateToCountMap.put(s, 0);
    }
    
    for (JobContext context : submitted) {
      Path outputpath = SequenceFileOutputFormat.getOutputPath(context.jobConf);
      FileSystem fs = outputpath.getFileSystem(context.jobConf);
      
      Path dir = SequenceFileOutputFormat.getOutputPath(context.jobConf);
      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
      List<Path> resultPart = new ArrayList<Path>(); 
      for (Path name : names) {
        String fileName = name.toUri().getPath();
        int index = fileName.lastIndexOf('/');
        fileName = fileName.substring(index + 1);
        if (fileName.startsWith("part-")) {
          resultPart.add(name);
        }
      }
      names = resultPart.toArray(new Path[] {});
      
      // sort names, so that hash partitioning works
      Arrays.sort(names);
      
      SequenceFile.Reader[] jobOutputs = new SequenceFile.Reader[names.length];
      for (int i = 0; i < names.length; i++) {
        jobOutputs[i] = new SequenceFile.Reader(fs, names[i], conf);
      }
      
      // read ouput of job.
      try {
        for (SequenceFile.Reader r : jobOutputs) {
          while (r.next(key, value)) {
            State state = State.valueOf(value.toString());
            stateToCountMap.put(state, stateToCountMap.get(state) + 1);
            // print the file result to stdout.
            System.out.println(key + " " + value);
          }
        }
      } finally {
        for (SequenceFile.Reader r : jobOutputs) {
          r.close();
        }
      }
    }
    
    // print summary to std error.
    for (State s : State.values()) {
      String output = s + " " + stateToCountMap.get(s);
      System.err.println(output);
    }
  }
  
  private void printUsage() {
    System.err.println(
        "java FastFileCheck [options] [-filesPerJob N] [-sourceOnly] /path/to/inputfile\n");
    ToolRunner.printGenericCommandUsage(System.err);
  }

  public void startFileCheck(String[] args, int startIndex, Configuration conf) 
      throws IOException, InterruptedException {
    JobConf jobConf = createJobConf(conf);
    String inputFile = null;
    int filesPerJob = Integer.MAX_VALUE;
    sourceOnly = false;
    
    for (int i = startIndex; i < args.length; i++) {
      String arg = args[i];
      if (arg.equalsIgnoreCase("-filesPerJob")) {
        i ++;
        filesPerJob = Integer.parseInt(args[i]);
      } else if (arg.equalsIgnoreCase("-sourceOnly")) {
        sourceOnly = true;
      } else {
        inputFile = arg;
      }
    }
    
    InputStream in = 
        inputFile == null ? System.in : new FileInputStream(inputFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    List<JobContext> submitted = submitJobs(reader, filesPerJob, conf);
    waitForJobs(submitted, conf);
    printResult(submitted, conf);
  }
}