/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.Vector;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.raid.DistRaid.EncodingCandidate;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.LogUtils.LOGTYPES;
import org.apache.hadoop.raid.BlockReconstructor.CorruptBlockReconstructor;
import org.apache.hadoop.raid.StripeReader.StripeInputInfo;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.io.MD5Hash;
/**
* Represents a generic encoder that can generate a parity file for a source
* file.
*/
public class Encoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.Encoder");
public static final int DEFAULT_PARALLELISM = 4;
public static final int DEFAULT_MAX_BUFFER_SIZE = 1024*1024;
public static final String ENCODING_MAX_BUFFER_SIZE_KEY =
"raid.encoder.max.buffer.size";
public static final String FINAL_TMP_PARITY_NAME = "0";
public static final int DEFAULT_RETRY_COUNT_PARTIAL_ENCODING = 3;
public static final String RETRY_COUNT_PARTIAL_ENCODING_KEY =
"raid.encoder.retry.count.partial.encoding";
protected Configuration conf;
protected int parallelism;
protected Codec codec;
protected ErasureCode code;
protected Random rand;
protected int bufSize;
protected int maxBufSize;
protected int retryCountPartialEncoding;
protected byte[][] readBufs;
protected byte[][] writeBufs;
protected ChecksumStore checksumStore;
protected StripeStore stripeStore;
protected boolean requiredChecksumStore = false;
/**
* A class that acts as a sink for data, similar to /dev/null.
*/
static class NullOutputStream extends OutputStream {
public void write(byte[] b) throws IOException {}
public void write(int b) throws IOException {}
public void write(byte[] b, int off, int len) throws IOException {}
}
Encoder(Configuration conf, Codec codec) {
this.conf = conf;
this.parallelism = conf.getInt("raid.encoder.parallelism",
DEFAULT_PARALLELISM);
this.codec = codec;
this.code = codec.createErasureCode(conf);
this.rand = new Random();
this.maxBufSize = conf.getInt(ENCODING_MAX_BUFFER_SIZE_KEY,
DEFAULT_MAX_BUFFER_SIZE);
this.bufSize = conf.getInt("raid.encoder.bufsize", maxBufSize);
this.writeBufs = new byte[codec.parityLength][];
this.checksumStore = RaidNode.createChecksumStore(conf, false);
this.requiredChecksumStore = conf.getBoolean(
RaidNode.RAID_CHECKSUM_STORE_REQUIRED_KEY,
false);
if (codec.isDirRaid) {
// only need by directory raid
this.stripeStore = RaidNode.createStripeStore(conf, false, null);
}
this.retryCountPartialEncoding = conf.getInt(RETRY_COUNT_PARTIAL_ENCODING_KEY,
DEFAULT_RETRY_COUNT_PARTIAL_ENCODING);
allocateBuffers();
}
public void verifyStore() throws IOException {
if (this.requiredChecksumStore && checksumStore == null) {
throw new IOException("Checksum store is required but is null");
}
if (codec.isDirRaid && stripeStore == null) {
throw new IOException("Stripe store is required but is null");
}
}
private void allocateBuffers() {
for (int i = 0; i < codec.parityLength; i++) {
writeBufs[i] = new byte[bufSize];
}
}
private void configureBuffers(long blockSize) {
if ((long)bufSize > blockSize) {
bufSize = (int)blockSize;
allocateBuffers();
} else if (blockSize % bufSize != 0) {
bufSize = (int)(blockSize / 256L); // heuristic.
if (bufSize == 0) {
bufSize = 1024;
}
bufSize = Math.min(bufSize, maxBufSize);
allocateBuffers();
}
}
private void writeToChecksumStore(DistributedFileSystem dfs,
CRC32[] crcOuts, Path parityTmp, long expectedParityFileSize,
Progressable reporter) throws IOException {
LocatedBlocks lbks = dfs.getLocatedBlocks(parityTmp, 0L,
expectedParityFileSize);
for (int i = 0; i < crcOuts.length; i++) {
this.checksumStore.putIfAbsentChecksum(lbks.get(i).getBlock(),
crcOuts[i].getValue());
reporter.progress();
}
LOG.info("Wrote checksums of parity file into checksum store");
}
private void writeToStripeStore(List<List<Block>> srcStripes, DistributedFileSystem
dfs, FileSystem srcFs, Path srcFile, FileSystem parityFs,
Path parityFile, long expectedParityFileSize, Progressable reporter,
Path finalTmpParity)
throws IOException {
try {
if (srcStripes == null) {
throw new IOException("source blocks are null");
}
LocatedBlocks lbks = dfs.getLocatedBlocks(parityFile, 0L,
expectedParityFileSize);
if (srcStripes.size() * codec.parityLength !=
lbks.locatedBlockCount()) {
throw new IOException("The number of stripes " + srcStripes.size()
+ "doesn't match the number of parity blocks " +
lbks.locatedBlockCount() + " and parity length is " +
codec.parityLength);
}
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE, parityFile,
finalTmpParity);
for (int i = 0, j = 0; i < srcStripes.size(); i++,
j+=codec.parityLength) {
ArrayList<Block> parityBlocks =
new ArrayList<Block>(codec.parityLength);
for (int k = 0; k < codec.parityLength; k++) {
parityBlocks.add(lbks.get(j + k).getBlock());
}
stripeStore.putStripe(codec, parityBlocks, srcStripes.get(i));
reporter.progress();
}
LOG.info("Wrote stripe information into stripe store");
} catch (Exception ex) {
LogUtils.logRaidEncodingMetrics(LOGRESULTS.FAILURE, codec,
-1L, -1L, -1L, -1L, -1L, -1L,
srcFile, LOGTYPES.ENCODING, srcFs, ex, reporter);
if (!dfs.rename(parityFile, finalTmpParity)) {
LOG.warn("Fail to rename " + parityFile + " back to " + finalTmpParity);
} else {
LOG.info("Rename parity file " + parityFile +
" back to " + finalTmpParity + " so that we could retry putStripe " +
" in the next round");
}
throw new IOException(ex);
}
}
private Vector<Path> getPartialPaths(int encodingUnit, int expectedNum,
FileStatus[] stats, Codec codec, long numStripes) throws IOException {
Vector<Path> partialPaths = new Vector<Path>(expectedNum);
partialPaths.setSize(expectedNum);
for (FileStatus stat : stats) {
int startStripeIdx;
try {
startStripeIdx = Integer.parseInt(stat.getPath().getName());
} catch (NumberFormatException nfe) {
throw new IOException("partial file " + stat.getPath() +
" is not a number");
}
if (startStripeIdx % encodingUnit != 0) {
throw new IOException("partial file " + stat.getPath() + " couldn't " +
"match " + encodingUnit);
}
long numBlocks = RaidNode.numBlocks(stat);
long expectedNumBlocks = Math.min(encodingUnit, numStripes - startStripeIdx)
* codec.parityLength;
if (numBlocks != expectedNumBlocks) {
throw new IOException("partial file " + stat.getPath() + " has " + numBlocks +
" blocks, but it should be " + expectedNumBlocks);
}
partialPaths.set(startStripeIdx /encodingUnit, stat.getPath());
}
return partialPaths;
}
private List<List<Block>> getSrcStripes(Configuration jobConf,
DistributedFileSystem dfs, Path srcFile, Codec codec, long numStripes,
StripeReader sReader, Progressable reporter)
throws IOException, InterruptedException {
List<List<Block>> srcStripes = new ArrayList<List<Block>>();
List<FileStatus> lfs =
RaidNode.listDirectoryRaidFileStatus(jobConf, dfs, srcFile);
if (lfs == null) {
return null;
}
ArrayList<Block> currentBlocks = new ArrayList<Block>();
long totalBlocks = 0L;
int index = 0;
for (FileStatus stat : lfs) {
LocatedBlocks lbs = dfs.getLocatedBlocks(stat.getPath(),
0L, stat.getLen());
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
currentBlocks.add(lb.getBlock());
if (currentBlocks.size() == codec.stripeLength) {
srcStripes.add(currentBlocks);
totalBlocks += currentBlocks.size();
currentBlocks = new ArrayList<Block>();
}
}
index++;
if (index % 10 == 0) {
Thread.sleep(1000);
}
reporter.progress();
}
if (currentBlocks.size() > 0) {
srcStripes.add(currentBlocks);
totalBlocks += currentBlocks.size();
}
if (srcStripes.size() != numStripes ||
totalBlocks != ((DirectoryStripeReader)sReader).numBlocks) {
StringBuilder sb = new StringBuilder();
for (List<Block> lb : srcStripes) {
for (Block blk : lb) {
sb.append(blk.toString());
sb.append(" ");
}
sb.append(";");
}
throw new IOException("srcStripes has " + srcStripes.size() +
" stripes and " + totalBlocks + " blocks : " + sb +
" Doesn't match " + srcFile);
}
return srcStripes;
}
/*
* Create the temp parity file and rename to the partial parity directory
*/
public boolean encodeTmpParityFile(Configuration jobConf, StripeReader sReader,
FileSystem parityFs, Path partialTmpParity, Path parityFile,
short tmpRepl, long blockSize, long expectedPartialParityBlocks,
long expectedPartialParityFileSize, Progressable reporter)
throws IOException, InterruptedException {
// Create a tmp file to which we will write first.
String jobID = RaidNode.getJobID(jobConf);
Path tmpDir = new Path(codec.tmpParityDirectory, jobID);
if (!parityFs.mkdirs(tmpDir)) {
throw new IOException("Could not create tmp dir " + tmpDir);
}
Path parityTmp = new Path(tmpDir, parityFile.getName() + rand.nextLong());
FSDataOutputStream out = parityFs.create(
parityTmp,
true,
conf.getInt("io.file.buffer.size", 64 * 1024),
tmpRepl,
blockSize);
try {
CRC32[] crcOuts = null;
if (checksumStore != null) {
crcOuts = new CRC32[(int)expectedPartialParityBlocks];
}
encodeFileToStream(sReader, blockSize, out, crcOuts, reporter);
out.close();
out = null;
LOG.info("Wrote temp parity file " + parityTmp);
FileStatus tmpStat = parityFs.getFileStatus(parityTmp);
if (tmpStat.getLen() != expectedPartialParityFileSize) {
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_PARTIAL_PARITY_SIZE_MISMATCH);
throw new IOException("Expected partial parity size " +
expectedPartialParityFileSize + " does not match actual " +
tmpStat.getLen() + " in path " + tmpStat.getPath());
}
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_PUT_CHECKSUM);
if (checksumStore != null) {
this.writeToChecksumStore((DistributedFileSystem)parityFs, crcOuts,
parityTmp, expectedPartialParityFileSize, reporter);
}
if (!parityFs.rename(parityTmp, partialTmpParity)) {
LOG.warn("Fail to rename file " + parityTmp + " to " + partialTmpParity);
return false;
}
LOG.info("renamed " + parityTmp + " to " + partialTmpParity);
return true;
} finally {
try {
if (out != null) {
out.close();
}
} finally {
parityFs.delete(parityTmp, false);
}
}
}
public boolean finishAllPartialEncoding(FileSystem parityFs,
Path tmpPartialParityDir, long expectedNum)
throws IOException, InterruptedException {
//Verify if we finish all partial encoding
try {
FileStatus[] stats = null;
long len = 0;
for (int i = 0; i < this.retryCountPartialEncoding; i++) {
stats = parityFs.listStatus(tmpPartialParityDir);
len = stats != null ? stats.length : 0;
if (len == expectedNum) {
return true;
}
if (i + 1 == this.retryCountPartialEncoding) {
Thread.sleep(rand.nextInt(2000));
}
}
LOG.info("Number of partial files in the directory " +
tmpPartialParityDir + " is " + len +
". It's different from expected number " + expectedNum);
return false;
} catch (FileNotFoundException fnfe) {
LOG.info("The temp directory is already moved to final partial" +
" directory");
return false;
}
}
/**
* The interface to use to generate a parity file.
* This method can be called multiple times with the same Encoder object,
* thus allowing reuse of the buffers allocated by the Encoder object.
*
* @param fs The filesystem containing the source file.
* @param srcFile The source file.
* @param parityFile The parity file to be generated.
* @throws InterruptedException
*/
public boolean encodeFile(
Configuration jobConf, FileSystem fs, FileSystem parityFs,
Path parityFile, short parityRepl, long numStripes, long blockSize,
Progressable reporter, StripeReader sReader, EncodingCandidate ec)
throws IOException, InterruptedException {
DistributedFileSystem dfs = DFSUtil.convertToDFS(parityFs);
Path srcFile = ec.srcStat.getPath();
long expectedParityFileSize = numStripes * blockSize * codec.parityLength;
long expectedPartialParityBlocks =
(sReader.stripeEndIdx - sReader.stripeStartIdx) * codec.parityLength;
long expectedPartialParityFileSize = expectedPartialParityBlocks
* blockSize;
// Create a tmp file to which we will write first.
String jobID = RaidNode.getJobID(jobConf);
Path tmpDir = new Path(codec.tmpParityDirectory, jobID);
if (!parityFs.mkdirs(tmpDir)) {
throw new IOException("Could not create tmp dir " + tmpDir);
}
String partialParityName = "partial_" + MD5Hash.digest(srcFile.toUri().getPath()) +
"_" + ec.srcStat.getModificationTime() + "_" + ec.encodingUnit + "_" +
ec.encodingId;
Path partialParityDir = new Path(tmpDir, partialParityName);
Path tmpPartialParityDir = new Path(partialParityDir, "tmp");
Path finalPartialParityDir = new Path(partialParityDir, "final");
if (!parityFs.mkdirs(partialParityDir)) {
throw new IOException("Could not create partial parity directory " +
partialParityDir);
}
// If we write a parity for a large directory,
// Use 3 replicas to guarantee the durability by default
short tmpRepl = (short)conf.getInt(RaidNode.RAID_PARITY_INITIAL_REPL_KEY,
RaidNode.DEFAULT_RAID_PARITY_INITIAL_REPL);
Path finalTmpParity = null;
/**
* To support retriable encoding, we use three checkpoints to represent
* the last success state.
* 1. isEncoded: Set to true when partial partiy is generated
* 2. isRenamed: Set to true when all partial parity are generated and
* tmpPartialParityDir is moved to finalPartialParityDir
* 3. isConcated: Set to true when partial parities are concatenated into
* a final parity.
*/
if (!ec.isConcated) {
if (!ec.isEncoded) {
if (!parityFs.mkdirs(tmpPartialParityDir)) {
throw new IOException("Could not create " + tmpPartialParityDir);
}
Path partialTmpParity = new Path(tmpPartialParityDir,
Long.toString(sReader.getCurrentStripeIdx()));
LOG.info("Encoding partial parity " + partialTmpParity);
if (!encodeTmpParityFile(jobConf, sReader, dfs,
partialTmpParity, parityFile, tmpRepl, blockSize,
expectedPartialParityBlocks, expectedPartialParityFileSize,
reporter)) {
return false;
}
LOG.info("Encoded partial parity " + partialTmpParity);
}
ec.isEncoded = true;
long expectedNum = (long) Math.ceil(numStripes * 1.0 / ec.encodingUnit);
if (!ec.isRenamed) {
if (!finishAllPartialEncoding(parityFs, tmpPartialParityDir, expectedNum)) {
return false;
}
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_RENAME_FILE);
// Move the directory to final
if (!dfs.rename(tmpPartialParityDir, finalPartialParityDir)) {
LOG.info("Fail to rename " + tmpPartialParityDir + " to " +
finalPartialParityDir);
return false;
}
LOG.info("Renamed " + tmpPartialParityDir + " to " +
finalPartialParityDir);
ec.isRenamed = true;
}
FileStatus[] stats = parityFs.listStatus(finalPartialParityDir);
// Verify partial parities are correct
Vector<Path> partialPaths = getPartialPaths((int)ec.encodingUnit,
(int)expectedNum, stats, codec, numStripes);
finalTmpParity = partialPaths.get(0);
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_CONCAT_FILE);
if (partialPaths.size() > 1) {
Path[] restPaths = partialPaths.subList(1,
partialPaths.size()).toArray(new Path[partialPaths.size() - 1]);
try {
// Concat requires source and target files are in the same directory
dfs.concat(finalTmpParity, restPaths, true);
LOG.info("Concated " + partialPaths.size() + " files into " + finalTmpParity);
} catch (IOException ioe) {
// Maybe other tasks already finish concating.
LOG.info("Fail to concat " + partialPaths.size() +
" files into " + finalTmpParity, ioe);
throw ioe;
}
}
ec.isConcated = true;
} else {
FileStatus[] stats = parityFs.listStatus(finalPartialParityDir);
if (stats == null || stats.length == 0) {
return false;
}
if (stats.length > 1) {
throw new IOException("We shouldn't have more than 1 files under"
+ finalPartialParityDir);
}
finalTmpParity = stats[0].getPath();
}
FileStatus tmpStat = parityFs.getFileStatus(finalTmpParity);
if (tmpStat.getBlockSize() != blockSize) {
throw new IOException("Expected parity block size " +
blockSize + " does not match actual " +
tmpStat.getBlockSize() + " in path " + finalTmpParity);
}
if (tmpStat.getLen() != expectedParityFileSize) {
throw new IOException("Expected parity size " +
expectedParityFileSize + " does not match actual " +
tmpStat.getLen() + " in path " + finalTmpParity);
}
if (ec.srcStripes == null && stripeStore != null) {
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_FAILURE_GET_SRC_STRIPES);
ec.srcStripes = getSrcStripes(jobConf, dfs, srcFile, codec, numStripes,
sReader, reporter);
if (ec.srcStripes == null) {
LOG.error("Cannot get srcStripes for " + srcFile);
return false;
}
}
// delete destination if exists
if (dfs.exists(parityFile)){
dfs.delete(parityFile, false);
}
dfs.mkdirs(parityFile.getParent());
if (!dfs.rename(finalTmpParity, parityFile)) {
String msg = "Unable to rename file " + finalTmpParity + " to " + parityFile;
throw new IOException (msg);
}
LOG.info("Wrote parity file " + parityFile);
if (stripeStore != null) {
this.writeToStripeStore(ec.srcStripes, dfs, fs,
srcFile, parityFs, parityFile, expectedParityFileSize, reporter,
finalTmpParity);
}
if (tmpRepl != parityRepl) {
dfs.setReplication(parityFile, parityRepl);
LOG.info("Reduce replication of " + parityFile + " to " + parityRepl);
}
dfs.delete(partialParityDir, true);
return true;
}
/**
* Recovers a corrupt block in a parity file to a local file.
*
* The encoder generates codec.parityLength parity blocks for a source file stripe.
* Since we want only one of the parity blocks, this function creates
* null outputs for the blocks to be discarded.
*
* @param fs The filesystem in which both srcFile and parityFile reside.
* @param srcStat fileStatus of The source file.
* @param blockSize The block size for the parity files.
* @param corruptOffset The location of corruption in the parity file.
* @param out The destination for the reovered block.
* @param progress A reporter for progress.
*/
public CRC32 recoverParityBlockToStream(
FileSystem fs, FileStatus srcStat, long blockSize,
Path parityFile, long corruptOffset,
OutputStream out, Progressable progress) throws IOException {
LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
Path srcFile = srcStat.getPath();
// Get the start offset of the corrupt block.
corruptOffset = (corruptOffset / blockSize) * blockSize;
// Output streams to each block in the parity file stripe.
OutputStream[] outs = new OutputStream[codec.parityLength];
long indexOfCorruptBlockInParityStripe =
(corruptOffset / blockSize) % codec.parityLength;
LOG.info("Index of corrupt block in parity stripe: " +
indexOfCorruptBlockInParityStripe);
CRC32[] crcOuts = null;
if (checksumStore != null) {
crcOuts = new CRC32[codec.parityLength];
}
// Create a real output stream for the block we want to recover,
// and create null streams for the rest.
for (int i = 0; i < codec.parityLength; i++) {
if (indexOfCorruptBlockInParityStripe == i) {
outs[i] = out;
if (checksumStore != null) {
crcOuts[i] = new CRC32();
}
} else {
outs[i] = new NullOutputStream();
}
}
// Get the stripe index and start offset of stripe.
long stripeIdx = corruptOffset / (codec.parityLength * blockSize);
StripeReader sReader = StripeReader.getStripeReader(codec, conf,
blockSize, fs, stripeIdx, srcStat);
// Get input streams to each block in the source file stripe.
assert sReader.hasNext() == true;
InputStream[] blocks = sReader.getNextStripeInputs().getInputs();
LOG.info("Starting recovery by using source stripe " +
srcFile + ": stripe " + stripeIdx);
try {
// Read the data from the blocks and write to the parity file.
encodeStripe(blocks, blockSize, outs, crcOuts, progress, false, null);
if (checksumStore != null) {
return crcOuts[(int)indexOfCorruptBlockInParityStripe];
} else {
return null;
}
} finally {
RaidUtils.closeStreams(blocks);
}
}
/**
* Recovers a corrupt block in a parity file to an output stream.
*
* The encoder generates codec.parityLength parity blocks for a source file stripe.
* Since there is only one output provided, some blocks are written out to
* files before being written out to the output.
*
* @param blockSize The block size for the source/parity files.
* @param out The destination for the reovered block.
* @throws InterruptedException
*/
private void encodeFileToStream(StripeReader sReader,
long blockSize, FSDataOutputStream out, CRC32[] crcOuts,
Progressable reporter)
throws IOException, InterruptedException {
OutputStream[] tmpOuts = new OutputStream[codec.parityLength];
// One parity block can be written directly to out, rest to local files.
tmpOuts[0] = out;
File[] tmpFiles = new File[codec.parityLength - 1];
for (int i = 0; i < codec.parityLength - 1; i++) {
tmpFiles[i] = File.createTempFile("parity", "_" + i);
LOG.info("Created tmp file " + tmpFiles[i]);
tmpFiles[i].deleteOnExit();
}
int finishedParityBlockIdx = 0;
List<Integer> errorLocations = new ArrayList<Integer>();
try {
// Loop over stripe
boolean redo;
while (sReader.hasNext()) {
reporter.progress();
StripeInputInfo stripeInputInfo = null;
InputStream[] blocks = null;
// Create input streams for blocks in the stripe.
long currentStripeIdx = sReader.getCurrentStripeIdx();
stripeInputInfo = sReader.getNextStripeInputs();
// The offset of first temporary output stream
long encodeStartOffset = out.getPos();
int retry = 3;
do {
redo = false;
retry --;
try {
blocks = stripeInputInfo.getInputs();
CRC32[] curCRCOuts = new CRC32[codec.parityLength];
if (crcOuts != null) {
for (int i = 0; i < codec.parityLength; i++) {
crcOuts[finishedParityBlockIdx + i] = curCRCOuts[i]
= new CRC32();
}
}
// Create output streams to the temp files.
for (int i = 0; i < codec.parityLength - 1; i++) {
tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
}
// Call the implementation of encoding.
encodeStripe(blocks, blockSize, tmpOuts, curCRCOuts, reporter,
true, errorLocations);
} catch (IOException e) {
if (out.getPos() > encodeStartOffset) {
// Partial data is already written, throw the exception
InjectionHandler.processEventIO(
InjectionEvent.RAID_ENCODING_PARTIAL_STRIPE_ENCODED);
throw e;
}
// try to fix the missing block in the stripe using stripe store.
if ((e instanceof BlockMissingException ||
e instanceof ChecksumException) && codec.isDirRaid) {
if (retry <= 0) {
throw e;
}
redo = true;
CorruptBlockReconstructor constructor =
new CorruptBlockReconstructor(conf);
Set<Path> srcPaths = new HashSet<Path>();
for (int idx : errorLocations) {
Path srcPath = stripeInputInfo.getSrcPaths()[idx];
if (srcPath != null) {
srcPaths.add(srcPath);
}
}
for (Path srcPath : srcPaths) {
Decoder decoder = new Decoder(conf, codec);
decoder.connectToStore(srcPath);
LOG.info("In Encoding: try to reconstruct the file: " + srcPath);
// will throw exception if it fails to reconstruct the lost
// blocks.
constructor.processFile(srcPath, null, decoder, true, null);
LOG.info("In Encoding: finished to reconstruct the file: " + srcPath);
}
} else {
throw e;
}
} finally {
if (blocks != null) {
RaidUtils.closeStreams(blocks);
}
}
if (redo) {
// rebuild the inputs.
stripeInputInfo = sReader.getStripeInputs(currentStripeIdx);
}
} while (redo);
// Close output streams to the temp files and write the temp files
// to the output provided.
for (int i = 0; i < codec.parityLength - 1; i++) {
tmpOuts[i + 1].close();
tmpOuts[i + 1] = null;
InputStream in = new FileInputStream(tmpFiles[i]);
RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
reporter.progress();
}
finishedParityBlockIdx += codec.parityLength;
}
} finally {
for (int i = 0; i < codec.parityLength - 1; i++) {
if (tmpOuts[i + 1] != null) {
tmpOuts[i + 1].close();
}
tmpFiles[i].delete();
LOG.info("Deleted tmp file " + tmpFiles[i]);
}
}
}
/**
* Wraps around encodeStripeImpl in order to configure buffers.
* Having buffers of the right size is extremely important. If the the
* buffer size is not a divisor of the block size, we may end up reading
* across block boundaries.
*/
void encodeStripe(
InputStream[] blocks,
long blockSize,
OutputStream[] outs,
CRC32[] crcOuts,
Progressable reporter,
boolean computeSrcChecksum,
List<Integer> errorLocations) throws IOException {
configureBuffers(blockSize);
int boundedBufferCapacity = 1;
ParallelStreamReader parallelReader = new ParallelStreamReader(
reporter, blocks, bufSize,
parallelism, boundedBufferCapacity, blockSize, computeSrcChecksum,
outs);
parallelReader.start();
try {
for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
ParallelStreamReader.ReadResult readResult = null;
try {
readResult = parallelReader.getReadResult();
} catch (InterruptedException e) {
throw new IOException("Interrupted while waiting for read result");
}
// Cannot tolerate any IO errors.
IOException readEx = readResult.getException();
if (readEx != null) {
if (errorLocations != null) {
errorLocations.clear();
for (int idx : readResult.getErrorIdx()) {
errorLocations.add(idx);
}
}
throw readEx;
}
code.encodeBulk(readResult.readBufs, writeBufs);
reporter.progress();
// Assume each byte is independently encoded
int toWrite = (int)Math.min(blockSize - encoded, bufSize);
// Now that we have some data to write, send it to the temp files.
for (int i = 0; i < codec.parityLength; i++) {
outs[i].write(writeBufs[i], 0, toWrite);
if (crcOuts != null && crcOuts[i] != null) {
crcOuts[i].update(writeBufs[i], 0, toWrite);
}
reporter.progress();
}
}
if (computeSrcChecksum) {
parallelReader.collectSrcBlocksChecksum(checksumStore);
}
} finally {
parallelReader.shutdown();
}
}
}