/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream;
import org.apache.hadoop.hdfs.DistributedRaidFileSystem;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.raid.StripeReader.LocationPair;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
public class RaidUtils {
public static final Log LOG = LogFactory.getLog(RaidUtils.class);
public static Progressable NULL_PROGRESSABLE = new Progressable() {
/**
* Do nothing.
**/
@Override
public void progress() {
}
};
/**
* Removes files matching the trash/tmp file pattern.
*/
public static void filterTrash(Configuration conf, List<String> files) {
filterTrash(conf, files.iterator());
}
public static void filterTrash(Configuration conf, Iterator<String> fileIt) {
// Remove files under Trash.
String trashPattern = conf.get("raid.blockfixer.trash.pattern",
"^/user/.*/\\.Trash.*|^/tmp/.*");
Pattern compiledPattern = Pattern.compile(trashPattern);
while (fileIt.hasNext()) {
Matcher m = compiledPattern.matcher(fileIt.next());
if (m.matches()) {
fileIt.remove();
}
}
}
/**
* holds raid type and parity file pair
*/
public static class RaidInfo {
public RaidInfo(final Codec codec,
final ParityFilePair parityPair,
final int parityBlocksPerStripe) {
this.codec = codec;
this.parityPair = parityPair;
this.parityBlocksPerStripe = parityBlocksPerStripe;
}
public final Codec codec;
public final ParityFilePair parityPair;
public final int parityBlocksPerStripe;
}
public static RaidInfo getFileRaidInfo(final FileStatus stat,
Configuration conf) throws IOException {
return getFileRaidInfo(stat, conf, false);
}
/**
* returns the raid for a given file
*/
public static RaidInfo getFileRaidInfo(final FileStatus stat,
Configuration conf, boolean skipHarChecking)
throws IOException {
// now look for the parity file
ParityFilePair ppair = null;
for (Codec c : Codec.getCodecs()) {
ppair = ParityFilePair.getParityFile(c, stat, conf, skipHarChecking);
if (ppair != null) {
return new RaidInfo(c, ppair, c.parityLength);
}
}
return new RaidInfo(null, ppair, 0);
}
public static void collectFileCorruptBlocksInStripe(
final DistributedFileSystem dfs,
final RaidInfo raidInfo, final FileStatus fileStatus,
final Map<Integer, Integer> corruptBlocksPerStripe)
throws IOException {
// read conf
final int stripeBlocks = raidInfo.codec.stripeLength;
// figure out which blocks are missing/corrupted
final Path filePath = fileStatus.getPath();
final long blockSize = fileStatus.getBlockSize();
final long fileLength = fileStatus.getLen();
final long fileLengthInBlocks = RaidNode.numBlocks(fileStatus);
final long fileStripes = RaidNode.numStripes(fileLengthInBlocks,
stripeBlocks);
final BlockLocation[] fileBlocks =
dfs.getFileBlockLocations(fileStatus, 0, fileLength);
// figure out which stripes these corrupted blocks belong to
for (BlockLocation fileBlock: fileBlocks) {
int blockNo = (int) (fileBlock.getOffset() / blockSize);
final int stripe = blockNo / stripeBlocks;
if (isBlockCorrupt(fileBlock)) {
incCorruptBlocksPerStripe(corruptBlocksPerStripe, stripe);
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " corrupt in block " +
blockNo + "/" + fileLengthInBlocks + ", stripe " + stripe +
"/" + fileStripes);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " OK in block " + blockNo +
"/" + fileLengthInBlocks + ", stripe " + stripe + "/" +
fileStripes);
}
}
}
checkParityBlocks(filePath, corruptBlocksPerStripe, blockSize, 0, fileStripes,
fileStripes, raidInfo);
}
public static void collectDirectoryCorruptBlocksInStripe(
final Configuration conf,
final DistributedFileSystem dfs,
final RaidInfo raidInfo, final FileStatus fileStatus,
Map<Integer, Integer> corruptBlocksPerStripe)
throws IOException {
final int stripeSize = raidInfo.codec.stripeLength;
final Path filePath = fileStatus.getPath();
final BlockLocation[] fileBlocks =
dfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
LocationPair lp = StripeReader.getBlockLocation(raidInfo.codec, dfs,
filePath, 0, conf, raidInfo.parityPair.getListFileStatus());
int startBlockIdx = lp.getStripeIdx() * stripeSize +
lp.getBlockIdxInStripe();
int startStripeIdx = lp.getStripeIdx();
long endStripeIdx = RaidNode.numStripes((long)(startBlockIdx + fileBlocks.length),
stripeSize);
long blockSize = DirectoryStripeReader.getParityBlockSize(conf,
lp.getListFileStatus());
long numBlocks = DirectoryStripeReader.getBlockNum(lp.getListFileStatus());
HashMap<Integer, Integer> allCorruptBlocksPerStripe =
new HashMap<Integer, Integer>();
checkParityBlocks(filePath, allCorruptBlocksPerStripe, blockSize,
startStripeIdx, endStripeIdx,
RaidNode.numStripes(numBlocks, stripeSize), raidInfo);
DirectoryStripeReader sReader = new DirectoryStripeReader(conf,
raidInfo.codec, dfs, lp.getStripeIdx(), -1L, filePath.getParent(),
lp.getListFileStatus());
// Get the corrupt block information for all stripes related to the file
while (sReader.getCurrentStripeIdx() < endStripeIdx) {
int stripe = (int)sReader.getCurrentStripeIdx();
BlockLocation[] bls = sReader.getNextStripeBlockLocations();
for (BlockLocation bl : bls) {
if (isBlockCorrupt(bl)) {
incCorruptBlocksPerStripe(allCorruptBlocksPerStripe,
stripe);
}
}
}
// figure out which stripes these corrupted blocks belong to
for (BlockLocation fileBlock: fileBlocks) {
int blockNo = startBlockIdx + (int) (fileBlock.getOffset() /
fileStatus.getBlockSize());
final int stripe = blockNo / stripeSize;
if (isBlockCorrupt(fileBlock)) {
corruptBlocksPerStripe.put(stripe, allCorruptBlocksPerStripe.get(stripe));
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " corrupt in block " +
blockNo + ", stripe " + stripe);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " OK in block " +
blockNo + ", stripe " + stripe);
}
}
}
}
/**
* gets the parity blocks corresponding to file
* returns the parity blocks in case of DFS
* and the part blocks containing parity blocks
* in case of HAR FS
*/
private static BlockLocation[] getParityBlocks(final Path filePath,
final long blockSize,
final long numStripes,
final RaidInfo raidInfo)
throws IOException {
FileSystem parityFS = raidInfo.parityPair.getFileSystem();
// get parity file metadata
FileStatus parityFileStatus = raidInfo.parityPair.getFileStatus();
long parityFileLength = parityFileStatus.getLen();
if (parityFileLength != numStripes * raidInfo.parityBlocksPerStripe *
blockSize) {
throw new IOException("expected parity file of length" +
(numStripes * raidInfo.parityBlocksPerStripe *
blockSize) +
" but got parity file of length " +
parityFileLength);
}
BlockLocation[] parityBlocks =
parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
if (parityFS instanceof DistributedFileSystem ||
parityFS instanceof DistributedRaidFileSystem) {
long parityBlockSize = parityFileStatus.getBlockSize();
if (parityBlockSize != blockSize) {
throw new IOException("file block size is " + blockSize +
" but parity file block size is " +
parityBlockSize);
}
} else if (parityFS instanceof HarFileSystem) {
LOG.debug("HAR FS found");
} else {
LOG.warn("parity file system is not of a supported type");
}
return parityBlocks;
}
/**
* checks the parity blocks for a given file and modifies
* corruptBlocksPerStripe accordingly
*/
private static void checkParityBlocks(final Path filePath,
final Map<Integer, Integer>
corruptBlocksPerStripe,
final long blockSize,
final long startStripeIdx,
final long endStripeIdx,
final long numStripes,
final RaidInfo raidInfo)
throws IOException {
// get the blocks of the parity file
// because of har, multiple blocks may be returned as one container block
BlockLocation[] containerBlocks = getParityBlocks(filePath, blockSize,
numStripes, raidInfo);
long parityStripeLength = blockSize *
((long) raidInfo.parityBlocksPerStripe);
long parityBlocksFound = 0L;
for (BlockLocation cb: containerBlocks) {
if (cb.getLength() % blockSize != 0) {
throw new IOException("container block size is not " +
"multiple of parity block size");
}
if (LOG.isDebugEnabled()) {
LOG.debug("found container with offset " + cb.getOffset() +
", length " + cb.getLength());
}
for (long offset = cb.getOffset();
offset < cb.getOffset() + cb.getLength();
offset += blockSize) {
long block = offset / blockSize;
int stripe = (int) (offset / parityStripeLength);
if (stripe < 0) {
// before the beginning of the parity file
continue;
}
if (stripe >= numStripes) {
// past the end of the parity file
break;
}
parityBlocksFound++;
if (stripe < startStripeIdx || stripe >= endStripeIdx) {
continue;
}
if (isBlockCorrupt(cb)) {
if (LOG.isDebugEnabled()) {
LOG.debug("parity file for " + filePath.toString() +
" corrupt in block " + block +
", stripe " + stripe + "/" + numStripes);
}
incCorruptBlocksPerStripe(corruptBlocksPerStripe, stripe);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("parity file for " + filePath.toString() +
" OK in block " + block +
", stripe " + stripe + "/" + numStripes);
}
}
}
}
long parityBlocksExpected = raidInfo.parityBlocksPerStripe * numStripes;
if (parityBlocksFound != parityBlocksExpected ) {
throw new IOException("expected " + parityBlocksExpected +
" parity blocks but got " + parityBlocksFound);
}
}
private static void incCorruptBlocksPerStripe(Map<Integer, Integer>
corruptBlocksPerStripe, int stripe) {
Integer value = corruptBlocksPerStripe.get(stripe);
if (value == null) {
value = 0;
}
corruptBlocksPerStripe.put(stripe, value + 1);
}
private static boolean isBlockCorrupt(BlockLocation fileBlock)
throws IOException {
if (fileBlock == null)
// empty block
return false;
return fileBlock.isCorrupt() ||
(fileBlock.getNames().length == 0 && fileBlock.getLength() > 0);
}
/**
* returns the source file corresponding to a parity file
* @throws IOException
*/
public static Path sourcePathFromParityPath(Path parityPath, FileSystem fs)
throws IOException {
String parityPathStr = parityPath.toUri().getPath();
for (Codec codec : Codec.getCodecs()) {
String prefix = codec.getParityPrefix();
if (parityPathStr.startsWith(prefix)) {
// Remove the prefix to get the source file.
String src = parityPathStr.replaceFirst(prefix, Path.SEPARATOR);
Path srcPath = new Path(src);
if (fs.exists(srcPath)) {
return srcPath;
}
}
}
return null;
}
public static int readTillEnd(InputStream in, byte[] buf, boolean eofOK,
long endOffset, int toRead)
throws IOException {
int numRead = 0;
while (numRead < toRead) {
int readLen = toRead - numRead;
if (in instanceof DFSDataInputStream) {
int available = (int)(endOffset - ((DFSDataInputStream)in).getPos());
if (available < readLen) {
readLen = available;
}
}
int nread = readLen > 0? in.read(buf, numRead, readLen): 0;
if (nread < 0) {
if (eofOK) {
// EOF hit, fill with zeros
Arrays.fill(buf, numRead, toRead, (byte)0);
break;
} else {
// EOF hit, throw.
throw new IOException("Premature EOF");
}
} else if (nread == 0) {
// reach endOffset, fill with zero;
Arrays.fill(buf, numRead, toRead, (byte)0);
break;
} else {
numRead += nread;
}
}
// return 0 if we read a ZeroInputStream
if (in instanceof ZeroInputStream) {
return 0;
}
return numRead;
}
public static void copyBytes(
InputStream in, OutputStream out, byte[] buf, long count)
throws IOException {
for (long bytesRead = 0; bytesRead < count; ) {
int toRead = Math.min(buf.length, (int)(count - bytesRead));
IOUtils.readFully(in, buf, 0, toRead);
bytesRead += toRead;
out.write(buf, 0, toRead);
}
}
/**
* Parse a condensed configuration option and set key:value pairs.
* @param conf the configuration object.
* @param optionKey the name of condensed option. The value corresponding
* to this should be formatted as key:value,key:value...
*/
public static void parseAndSetOptions(
Configuration conf, String optionKey) {
String optionValue = conf.get(optionKey);
if (optionValue != null) {
RaidNode.LOG.info("Parsing option " + optionKey);
// Parse the option value to get key:value pairs.
String[] keyValues = optionValue.trim().split(",");
for (String keyValue: keyValues) {
String[] fields = keyValue.trim().split(":");
String key = fields[0].trim();
String value = fields[1].trim();
conf.set(key, value);
}
} else {
RaidNode.LOG.error("Option " + optionKey + " not found");
}
}
public static void closeStreams(InputStream[] streams) throws IOException {
for (InputStream stm: streams) {
if (stm != null) {
stm.close();
}
}
}
public static class ZeroInputStream extends InputStream
implements Seekable, PositionedReadable {
private long endOffset;
private long pos;
public ZeroInputStream(long endOffset) {
this.endOffset = endOffset;
this.pos = 0;
}
@Override
public int read() throws IOException {
if (pos < endOffset) {
pos++;
return 0;
}
return -1;
}
@Override
public int available() throws IOException {
return (int)(endOffset - pos);
}
@Override
public long getPos() throws IOException {
return pos;
}
@Override
public void seek(long seekOffset) throws IOException {
if (seekOffset < endOffset) {
pos = seekOffset;
} else {
throw new IOException("Illegal Offset" + pos);
}
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
return false;
}
@Override
public int read(long position, byte[] buffer, int offset, int length)
throws IOException {
int count = 0;
for (; position < endOffset && count < length; position++) {
buffer[offset + count] = 0;
count++;
}
return count;
}
@Override
public void readFully(long position, byte[] buffer, int offset, int length)
throws IOException {
int count = 0;
for (; position < endOffset && count < length; position++) {
buffer[offset + count] = 0;
count++;
}
if (count < length) {
throw new IOException("Premature EOF");
}
}
@Override
public void readFully(long position, byte[] buffer) throws IOException {
readFully(position, buffer, 0, buffer.length);
}
public List<ByteBuffer> readFullyScatterGather(long position, int length)
throws IOException {
throw new IOException("ScatterGather not implemeted for Raid.");
}
}
private static int computeMaxMissingBlocks() {
int max = 0;
for (Codec codec : Codec.getCodecs()) {
if (max < codec.stripeLength + codec.parityLength) {
max = codec.stripeLength + codec.parityLength;
}
}
return max;
}
public static String getMissingBlksHtmlTable(long numNonRaidedMissingBlocks,
Map<String, long[]> numStrpWithMissingBlksMap) {
int max = computeMaxMissingBlocks();
String head = "";
for (int i = 1; i <= max; ++i) {
head += JspUtils.td(i + "");
}
head = JspUtils.tr(JspUtils.td("CODE") + head);
String result = head;
String row = JspUtils.td("Not Raided");
row += JspUtils.td(StringUtils.humanReadableInt(
numNonRaidedMissingBlocks));
row = JspUtils.tr(row);
result += row;
for (Codec codec : Codec.getCodecs()) {
String code = codec.id;
row = JspUtils.td(code);
long[] numStrps = numStrpWithMissingBlksMap.get(code);
if (null == numStrps) {
continue;
}
for (int i = 0; i < numStrps.length; ++i) {
row += JspUtils.td(StringUtils.humanReadableInt(
numStrps[i] * (i + 1)));
}
row = JspUtils.tr(row);
result += row;
}
return JspUtils.table(result);
}
}