/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.BlockMissingException; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; /** * Represents a generic decoder that can be used to read a file with * corrupt blocks by using the parity file. * This is an abstract class, concrete subclasses need to implement * fixErasedBlock. */ public abstract class Decoder { public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.raid.Decoder"); protected Configuration conf; protected int stripeSize; protected int paritySize; protected Random rand; protected int bufSize; protected byte[][] readBufs; protected byte[][] writeBufs; Decoder(Configuration conf, int stripeSize, int paritySize) { this.conf = conf; this.stripeSize = stripeSize; this.paritySize = paritySize; this.rand = new Random(); this.bufSize = conf.getInt("raid.decoder.bufsize", 1024 * 1024); this.readBufs = new byte[stripeSize + paritySize][]; this.writeBufs = new byte[paritySize][]; allocateBuffers(); } private void allocateBuffers() { for (int i = 0; i < stripeSize + paritySize; i++) { readBufs[i] = new byte[bufSize]; } for (int i = 0; i < paritySize; i++) { writeBufs[i] = new byte[bufSize]; } } private void configureBuffers(long blockSize) { if ((long)bufSize > blockSize) { bufSize = (int)blockSize; allocateBuffers(); } else if (blockSize % bufSize != 0) { bufSize = (int)(blockSize / 256L); // heuristic. if (bufSize == 0) { bufSize = 1024; } bufSize = Math.min(bufSize, 1024 * 1024); allocateBuffers(); } } /** * The interface to generate a decoded file using the good portion of the * source file and the parity file. * @param fs The filesystem containing the source file. * @param srcFile The damaged source file. * @param parityFs The filesystem containing the parity file. This could be * different from fs in case the parity file is part of a HAR archive. * @param parityFile The parity file. * @param errorOffset Known location of error in the source file. There could * be additional errors in the source file that are discovered during * the decode process. * @param decodedFile The decoded file. This will have the exact same contents * as the source file on success. */ public void decodeFile( FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile, long errorOffset, Path decodedFile) throws IOException { LOG.info("Create " + decodedFile + " for error at " + srcFile + ":" + errorOffset); FileStatus srcStat = fs.getFileStatus(srcFile); long blockSize = srcStat.getBlockSize(); configureBuffers(blockSize); // Move the offset to the start of the block. errorOffset = (errorOffset / blockSize) * blockSize; // Create the decoded file. FSDataOutputStream out = fs.create( decodedFile, false, conf.getInt("io.file.buffer.size", 64 * 1024), srcStat.getReplication(), srcStat.getBlockSize()); // Open the source file. FSDataInputStream in = fs.open( srcFile, conf.getInt("io.file.buffer.size", 64 * 1024)); // Start copying data block-by-block. for (long offset = 0; offset < srcStat.getLen(); offset += blockSize) { long limit = Math.min(blockSize, srcStat.getLen() - offset); long bytesAlreadyCopied = 0; if (offset != errorOffset) { try { in = fs.open( srcFile, conf.getInt("io.file.buffer.size", 64 * 1024)); in.seek(offset); RaidUtils.copyBytes(in, out, readBufs[0], limit); assert(out.getPos() == offset +limit); LOG.info("Copied till " + out.getPos() + " from " + srcFile); continue; } catch (BlockMissingException e) { LOG.warn("Encountered BME at " + srcFile + ":" + offset); bytesAlreadyCopied = out.getPos() - offset; } catch (ChecksumException e) { LOG.warn("Encountered CE at " + srcFile + ":" + offset); bytesAlreadyCopied = out.getPos() - offset; } } // If we are here offset == errorOffset or we got an exception. // Recover the block starting at offset. fixErasedBlock(fs, srcFile, parityFs, parityFile, blockSize, offset, bytesAlreadyCopied, limit, out); } out.close(); try { fs.setOwner(decodedFile, srcStat.getOwner(), srcStat.getGroup()); fs.setPermission(decodedFile, srcStat.getPermission()); fs.setTimes(decodedFile, srcStat.getModificationTime(), srcStat.getAccessTime()); } catch (Exception exc) { LOG.warn("Didn't manage to copy meta information because of " + exc + " Ignoring..."); } } /** * Recovers a corrupt block to local file. * * @param srcFs The filesystem containing the source file. * @param srcPath The damaged source file. * @param parityPath The filesystem containing the parity file. This could be * different from fs in case the parity file is part of a HAR archive. * @param parityFile The parity file. * @param blockSize The block size of the file. * @param blockOffset Known location of error in the source file. There could * be additional errors in the source file that are discovered during * the decode process. * @param localBlockFile The file to write the block to. * @param limit The maximum number of bytes to be written out. * This is to prevent writing beyond the end of the file. */ public void recoverBlockToFile( FileSystem srcFs, Path srcPath, FileSystem parityFs, Path parityPath, long blockSize, long blockOffset, File localBlockFile, long limit) throws IOException { OutputStream out = new FileOutputStream(localBlockFile); fixErasedBlock(srcFs, srcPath, parityFs, parityPath, blockSize, blockOffset, 0, limit, out); out.close(); } /** * Implementation-specific mechanism of writing a fixed block. * @param fs The filesystem containing the source file. * @param srcFile The damaged source file. * @param parityFs The filesystem containing the parity file. This could be * different from fs in case the parity file is part of a HAR archive. * @param parityFile The parity file. * @param blockSize The maximum size of a block. * @param errorOffset Known location of error in the source file. There could * be additional errors in the source file that are discovered during * the decode process. * @param bytesToSkip After the block is generated, these many bytes should be * skipped before writing to the output. This is needed because the * output may have a portion of the block written from the source file * before a new corruption is discovered in the block. * @param limit The maximum number of bytes to be written out, including * bytesToSkip. This is to prevent writing beyond the end of the file. * @param out The output. */ protected abstract void fixErasedBlock( FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile, long blockSize, long errorOffset, long bytesToSkip, long limit, OutputStream out) throws IOException; }