/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid.tools; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.UUID; import java.util.zip.CRC32; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.raid.Codec; import org.apache.hadoop.raid.Decoder; import org.apache.hadoop.raid.LogUtils; import org.apache.hadoop.raid.Decoder.DecoderInputStream; import org.apache.hadoop.raid.LogUtils.LOGRESULTS; import org.apache.hadoop.raid.ParallelStreamReader; import org.apache.hadoop.raid.ParallelStreamReader.ReadResult; import org.apache.hadoop.raid.ParityFilePair; import org.apache.hadoop.raid.RaidUtils; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.SequenceFileRecordReader; import org.apache.hadoop.mapred.Utils; public class FastFileCheck { final static Log LOG = LogFactory.getLog( "org.apache.hadoop.raid.tools.FastFileCheck"); private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd-HH-mm"); private static int DEFAULT_VERIFY_LEN = 64 * 1024; // 64k private static int BUFFER_LEN = DEFAULT_VERIFY_LEN; private static final String NAME = "fastfilecheck"; private static final String JOB_DIR_LABEL = NAME + ".job.dir"; private static final String OP_LIST_LABEL = NAME + ".op.list"; private static final String OP_COUNT_LABEL = NAME + ".op.count"; private static final String SOURCE_ONLY_CONF = "sourceonly"; private static final short OP_LIST_RELICATION = 10; private static final int TASKS_PER_JOB = 50; private static long filesPerTask = 10; private static final int MAX_FILES_PER_TASK = 10000; private boolean sourceOnly = false; private Configuration conf; private static final Random rand = new Random(); enum State{ GOOD_FILE, BAD_FILE, NOT_RAIDED, UNREADABLE, NOT_FOUND } public FastFileCheck(Configuration conf) { this.conf = conf; } /** * Get the input splits from the operation file list. */ static class FileCheckInputFormat implements InputFormat<Text, Text> { @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { numSplits = TASKS_PER_JOB; // get how many records. final int totalRecords = job.getInt(OP_COUNT_LABEL, -1); final int targetCount = totalRecords / numSplits; String fileList = job.get(OP_LIST_LABEL, ""); if (totalRecords < 0 || "".equals(fileList)) { throw new RuntimeException("Invalid metadata."); } Path srcs = new Path(fileList); FileSystem fs = srcs.getFileSystem(job); List<FileSplit> splits = new ArrayList<FileSplit>(numSplits); Text key = new Text(); Text value = new Text(); SequenceFile.Reader in = null; long prev = 0L; int count = 0; // split the files to be checked. try { for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) { long cur = in.getPosition(); long delta = cur - prev; if (++count > targetCount) { count = 0; splits.add(new FileSplit(srcs, prev, delta, (String[])null)); prev = cur; } } } finally { in.close(); } long remaining = fs.getFileStatus(srcs).getLen() - prev; if (0 != remaining) { splits.add(new FileSplit(srcs, prev, remaining, (String[])null)); } return splits.toArray(new FileSplit[splits.size()]); } @Override public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { return new SequenceFileRecordReader<Text, Text>(job, (FileSplit) split); } } static class FileCheckMapper implements Mapper<Text, Text, Text, Text> { private JobConf jobConf; private int failCount = 0; private int succeedCount = 0; private boolean sourceOnly = false; @Override public void configure(JobConf job) { this.jobConf = job; this.sourceOnly = job.getBoolean(SOURCE_ONLY_CONF, false); } @Override public void close() throws IOException { } private String getCountString() { return "Succeeded: " + succeedCount + " Failed: " + failCount; } @Override public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { // run a file operation Path p = new Path(key.toString()); String v; try { if (sourceOnly) { v = processSourceFile(p, reporter, jobConf); } else { v = processFile(p, reporter, jobConf); } LOG.info("File: " + p + ", result: " + v); output.collect(key, new Text(v)); reporter.progress(); ++ succeedCount; } catch (InterruptedException e) { ++ failCount; LOG.warn("Interrupted when processing file: " + p); throw new IOException(e); } finally { reporter.setStatus(getCountString()); } } /** * check a source file. */ String processSourceFile(Path p, Progressable reporter, Configuration conf) throws IOException, InterruptedException { LOG.info("Processing Source file: " + p); FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { return State.NOT_FOUND.toString(); } Codec codec = Codec.getCodec("rs"); boolean result = false; try { result = checkFile(conf, fs, fs, p, null, codec, reporter, true); } catch (IOException ex) { LOG.warn("Encounter exception when checking file: " + p + ", " + ex.getMessage()); return State.UNREADABLE.toString(); } return result ? State.GOOD_FILE.toString() : State.BAD_FILE.toString(); } /** * check a single file. */ String processFile(Path p, Progressable reporter, Configuration conf) throws IOException, InterruptedException { LOG.info("Processing file: " + p); FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { return State.NOT_FOUND.toString(); } FileStatus srcStat = null; try { srcStat = fs.getFileStatus(p); } catch (Exception e) { return State.NOT_FOUND.toString(); } boolean result = false; boolean raided = false; for (Codec codec : Codec.getCodecs()) { ParityFilePair pfPair = ParityFilePair.getParityFile(codec, srcStat, conf); if (pfPair != null) { raided = true; Path parityPath = pfPair.getPath(); try { result = checkFile(conf, fs, fs, p, parityPath, codec, reporter , false); } catch (IOException ex) { LOG.warn("Encounter exception when checking the file: " + p, ex); LogUtils.logFileCheckMetrics(LOGRESULTS.FAILURE, codec, p, fs, -1, -1, ex, reporter); return State.UNREADABLE.toString(); } break; } } if (!raided) { return State.NOT_RAIDED.toString(); } return result ? State.GOOD_FILE.toString() : State.BAD_FILE.toString(); } } private JobConf createJobConf(Configuration conf) { JobConf jobConf = new JobConf(conf); String jobName = NAME + "_" + dateForm.format(new Date(System.currentTimeMillis())); jobConf.setJobName(jobName); jobConf.setMapSpeculativeExecution(false); jobConf.setJarByClass(FastFileCheck.class); jobConf.setInputFormat(FileCheckInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setMapperClass(FileCheckMapper.class); jobConf.setNumReduceTasks(0); jobConf.setBoolean(SOURCE_ONLY_CONF, sourceOnly); return jobConf; } /** * Check a file. */ public static boolean checkFile(Configuration conf, FileSystem srcFs, FileSystem parityFs, Path srcPath, Path parityPath, Codec codec, Progressable reporter, boolean sourceOnly) throws IOException, InterruptedException { FileStatus stat = srcFs.getFileStatus(srcPath); long blockSize = stat.getBlockSize(); long len = stat.getLen(); List<Long> offsets = new ArrayList<Long>(); // check a small part of each stripe. for (int i = 0; i * blockSize < len; i += codec.stripeLength) { offsets.add(i * blockSize); } for (long blockOffset : offsets) { if (sourceOnly) { if (!verifySourceFile(conf, srcFs,stat, codec, blockOffset, reporter)) { return false; } } else { if (!verifyFile(conf, srcFs, parityFs, stat, parityPath, codec, blockOffset, reporter)) { return false; } } } return true; } private static boolean verifySourceFile(Configuration conf, FileSystem srcFs, FileStatus stat, Codec codec, long blockOffset, Progressable reporter) throws IOException, InterruptedException { Path srcPath = stat.getPath(); LOG.info("Verify file: " + srcPath + " at offset: " + blockOffset); int limit = (int) Math.min(stat.getBlockSize(), DEFAULT_VERIFY_LEN); if (reporter == null) { reporter = RaidUtils.NULL_PROGRESSABLE; } List<Long> errorOffsets = new ArrayList<Long>(); // first limit bytes errorOffsets.add(blockOffset); long left = Math.min(stat.getBlockSize(), stat.getLen() - blockOffset); if (left > limit) { // last limit bytes errorOffsets.add(blockOffset + left - limit); // random limit bytes. errorOffsets.add(blockOffset + rand.nextInt((int)(left - limit))); } long blockSize = stat.getBlockSize(); long fileLen = stat.getLen(); List<InputStream> streamList = new ArrayList<InputStream>(); List<InputStream> tmpList = new ArrayList<InputStream>(); try { for (long errorOffset : errorOffsets) { int k = 0; int len = streamList.size(); tmpList.clear(); for (int i = 0; i < codec.stripeLength; i++) { if (errorOffset + blockSize * i >= fileLen) { break; } FSDataInputStream is = null; if (k < len) { // resue the input stream is = (FSDataInputStream) streamList.get(k); k++; } else { is = srcFs.open(srcPath); streamList.add(is); } is.seek(errorOffset + blockSize * i); tmpList.add(is); } if (tmpList.size() == 0) { continue; } InputStream[] streams = tmpList.toArray(new InputStream[] {}); ParallelStreamReader reader = null; try { reader = new ParallelStreamReader( reporter, streams, limit, 4, 2, limit); reader.start(); int readNum = 0; while (readNum < limit) { ReadResult result = reader.getReadResult(); for (IOException ex : result.ioExceptions) { if (ex != null) { LOG.warn("Encounter exception when checking file: " + srcPath + ", " + ex.getMessage()); return false; } } readNum += result.readBufs[0].length; } } finally { if (null != reader) { reader.shutdown(); } reporter.progress(); } } } finally { if (streamList.size()> 0) { RaidUtils.closeStreams(streamList.toArray(new InputStream[]{})); } } return true; } /** * Verify the certain offset of a file. */ private static boolean verifyFile(Configuration conf, FileSystem srcFs, FileSystem parityFs, FileStatus stat, Path parityPath, Codec codec, long blockOffset, Progressable reporter) throws IOException, InterruptedException { Path srcPath = stat.getPath(); LOG.info("Verify file: " + srcPath + " at offset: " + blockOffset); int limit = (int) Math.min(stat.getBlockSize(), DEFAULT_VERIFY_LEN); if (reporter == null) { reporter = RaidUtils.NULL_PROGRESSABLE; } // try to decode. Decoder decoder = new Decoder(conf, codec); if (codec.isDirRaid) { decoder.connectToStore(srcPath); } List<Long> errorOffsets = new ArrayList<Long>(); // first limit bytes errorOffsets.add(blockOffset); long left = Math.min(stat.getBlockSize(), stat.getLen() - blockOffset); if (left > limit) { // last limit bytes errorOffsets.add(blockOffset + left - limit); // random limit bytes. errorOffsets.add(blockOffset + rand.nextInt((int)(left - limit))); } byte[] buffer = new byte[limit]; FSDataInputStream is = srcFs.open(srcPath); try { for (long errorOffset : errorOffsets) { is.seek(errorOffset); is.read(buffer); // calculate the oldCRC. CRC32 oldCrc = new CRC32(); oldCrc.update(buffer); CRC32 newCrc = new CRC32(); DecoderInputStream stream = decoder.new DecoderInputStream( RaidUtils.NULL_PROGRESSABLE, limit, stat.getBlockSize(), errorOffset, srcFs, srcPath, parityFs, parityPath, null, null, false); try { stream.read(buffer); newCrc.update(buffer); if (oldCrc.getValue() != newCrc.getValue()) { LogUtils.logFileCheckMetrics(LOGRESULTS.FAILURE, codec, srcPath, srcFs, errorOffset, limit, null, reporter); LOG.error("mismatch crc, old " + oldCrc.getValue() + ", new " + newCrc.getValue() + ", for file: " + srcPath + " at offset " + errorOffset + ", read limit " + limit); return false; } } finally { reporter.progress(); if (stream != null) { stream.close(); } } } return true; } finally { is.close(); } } private static class JobContext { public RunningJob job; public JobConf jobConf; public JobContext(RunningJob job, JobConf jobConf) { this.job = job; this.jobConf = jobConf; } } private List<JobContext> submitJobs(BufferedReader reader, int filesPerJob, Configuration conf) throws IOException { List<JobContext> submitted = new ArrayList<JobContext>(); boolean done = false; Random rand = new Random(new Date().getTime()); filesPerTask = (long) Math.ceil((double)filesPerJob / TASKS_PER_JOB); filesPerTask = Math.min(filesPerTask, MAX_FILES_PER_TASK); do { JobConf jobConf = createJobConf(conf); JobClient jobClient = new JobClient(jobConf); String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36); Path jobDir = new Path(jobClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDir.toString()); Path log = new Path(jobDir, "_logs"); FileOutputFormat.setOutputPath(jobConf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobDir.getFileSystem(jobConf); Path opList = new Path(jobDir, "_" + OP_LIST_LABEL); jobConf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0; int synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class, Text.class, SequenceFile.CompressionType.NONE); String f = null; do { f = reader.readLine(); if (f == null) { // no more file done = true; break; } opWriter.append(new Text(f), new Text(f)); opCount ++; if (++synCount > filesPerTask) { opWriter.sync(); synCount = 0; } } while (opCount < filesPerJob); } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_RELICATION); } jobConf.setInt(OP_COUNT_LABEL, opCount); RunningJob job = jobClient.submitJob(jobConf); submitted.add(new JobContext(job, jobConf)); } while (!done); return submitted; } private void waitForJobs(List<JobContext> submitted, Configuration conf) throws IOException, InterruptedException { JobConf jobConf = createJobConf(conf); JobClient jobClient = new JobClient(jobConf); List<JobContext> running = new ArrayList<JobContext>(submitted); while (!running.isEmpty()) { Thread.sleep(60000); LOG.info("Checking " + running.size() + " running jobs"); for (Iterator<JobContext> it = running.iterator(); it.hasNext();) { Thread.sleep(2000); JobContext context = it.next(); try { if (context.job.isComplete()) { it.remove(); LOG.info("Job " + context.job.getID() + " complete. URL: " + context.job.getTrackingURL()); } else { LOG.info("Job " + context.job.getID() + " still running. URL: " + context.job.getTrackingURL()); } } catch (Exception ex) { LOG.error("Hit error while checking job status.", ex); it.remove(); try { context.job.killJob(); } catch (Exception ex2) { // ignore the exception. } } } } } private void printResult(List<JobContext> submitted, Configuration conf) throws IOException { Text key = new Text(); Text value = new Text(); Map<State, Integer> stateToCountMap = new HashMap<State, Integer>(); for (State s : State.values()) { stateToCountMap.put(s, 0); } for (JobContext context : submitted) { Path outputpath = SequenceFileOutputFormat.getOutputPath(context.jobConf); FileSystem fs = outputpath.getFileSystem(context.jobConf); Path dir = SequenceFileOutputFormat.getOutputPath(context.jobConf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); List<Path> resultPart = new ArrayList<Path>(); for (Path name : names) { String fileName = name.toUri().getPath(); int index = fileName.lastIndexOf('/'); fileName = fileName.substring(index + 1); if (fileName.startsWith("part-")) { resultPart.add(name); } } names = resultPart.toArray(new Path[] {}); // sort names, so that hash partitioning works Arrays.sort(names); SequenceFile.Reader[] jobOutputs = new SequenceFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { jobOutputs[i] = new SequenceFile.Reader(fs, names[i], conf); } // read ouput of job. try { for (SequenceFile.Reader r : jobOutputs) { while (r.next(key, value)) { State state = State.valueOf(value.toString()); stateToCountMap.put(state, stateToCountMap.get(state) + 1); // print the file result to stdout. System.out.println(key + " " + value); } } } finally { for (SequenceFile.Reader r : jobOutputs) { r.close(); } } } // print summary to std error. for (State s : State.values()) { String output = s + " " + stateToCountMap.get(s); System.err.println(output); } } private void printUsage() { System.err.println( "java FastFileCheck [options] [-filesPerJob N] [-sourceOnly] /path/to/inputfile\n"); ToolRunner.printGenericCommandUsage(System.err); } public void startFileCheck(String[] args, int startIndex, Configuration conf) throws IOException, InterruptedException { JobConf jobConf = createJobConf(conf); String inputFile = null; int filesPerJob = Integer.MAX_VALUE; sourceOnly = false; for (int i = startIndex; i < args.length; i++) { String arg = args[i]; if (arg.equalsIgnoreCase("-filesPerJob")) { i ++; filesPerJob = Integer.parseInt(args[i]); } else if (arg.equalsIgnoreCase("-sourceOnly")) { sourceOnly = true; } else { inputFile = arg; } } InputStream in = inputFile == null ? System.in : new FileInputStream(inputFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); List<JobContext> submitted = submitJobs(reader, filesPerJob, conf); waitForJobs(submitted, conf); printResult(submitted, conf); } }