Encoder.java example

Explorer
RDFS-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.InputStream;
import java.io.OutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;

/**
 * Represents a generic encoder that can generate a parity file for a source
 * file.
 */
public class Encoder {
	public static final Log LOG = LogFactory
			.getLog("org.apache.hadoop.raid.Encoder");
	public static final int DEFAULT_PARALLELISM = 4;
	protected Configuration conf;
	protected int parallelism;
	protected Codec codec;
	protected ErasureCode code;
	protected Random rand;
	protected int bufSize;
	protected byte[][] readBufs;
	protected byte[][] writeBufs;
	
	/**
	 * added by jason
	 */
	public static final int DEFAULT_CODING_THREADNUM = 5;
	protected int codingThreadNum;
	public static final boolean DEFAULT_CODING_IFPARALLELISM = true;
	protected boolean ifCodingParallelism;			
	/**
	 * added by jason ended
	 */

	/**
	 * A class that acts as a sink for data, similar to /dev/null.
	 */
	static class NullOutputStream extends OutputStream {
		public void write(byte[] b) throws IOException {
		}

		public void write(int b) throws IOException {
		}

		public void write(byte[] b, int off, int len) throws IOException {
		}
	}

	Encoder(Configuration conf, Codec codec) {
		this.conf = conf;
		this.parallelism = conf.getInt("raid.encoder.parallelism",
				DEFAULT_PARALLELISM);
		this.codec = codec;
		this.code = codec.createErasureCode(conf);
		this.rand = new Random();
		this.bufSize = conf.getInt("raid.encoder.bufsize", 1024 * 1024);
		this.writeBufs = new byte[codec.parityLength][];
		allocateBuffers();
		
		/**
		 * added by jason
		 */
		this.codingThreadNum = conf.getInt("raid.encoder.threadnum", DEFAULT_CODING_THREADNUM);
		this.ifCodingParallelism = conf.getBoolean("raid.encoder.ifparallelism", DEFAULT_CODING_IFPARALLELISM);
		code.initThreadPool(codingThreadNum);
		/**
		 * added by jason ended
		 */
	}

	private void allocateBuffers() {
		for (int i = 0; i < codec.parityLength; i++) {
			writeBufs[i] = new byte[bufSize];
		}
	}

	private void configureBuffers(long blockSize) {
		if ((long) bufSize > blockSize) {
			bufSize = (int) blockSize;
			allocateBuffers();
		} else if (blockSize % bufSize != 0) {
			bufSize = (int) (blockSize / 256L); // heuristic.
			if (bufSize == 0) {
				bufSize = 1024;
			}
			bufSize = Math.min(bufSize, 1024 * 1024);
			allocateBuffers();
		}
	}

	/**
	 * The interface to use to generate a parity file. This method can be called
	 * multiple times with the same Encoder object, thus allowing reuse of the
	 * buffers allocated by the Encoder object.
	 * 
	 * @param fs
	 *            The filesystem containing the source file.
	 * @param srcFile
	 *            The source file.
	 * @param parityFile
	 *            The parity file to be generated.
	 */
	public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile,
			FileSystem parityFs, Path parityFile, short parityRepl,
			long numStripes, long blockSize, Progressable reporter,
			StripeReader sReader) throws IOException {

		// --Test
		TimeStatistics.clear();
		long start = System.currentTimeMillis();

		long expectedParityBlocks = numStripes * codec.parityLength;
		long expectedParityFileSize = numStripes * blockSize
				* codec.parityLength;

		// Create a tmp file to which we will write first.
		String jobID = RaidNode.getJobID(jobConf);
		Path tmpDir = new Path(codec.tmpParityDirectory, jobID);
		//Path tmpDir = new Path(codec.tmpParityDirectory);
		if (!parityFs.mkdirs(tmpDir)) {
			throw new IOException("Could not create tmp dir " + tmpDir);
		}
		//Path parityTmp = new Path(tmpDir, parityFile.getName()
		//		+ rand.nextLong());
		Path parityTmp = new Path(tmpDir, parityFile.toString().substring(1));
		//LOG.info("encoder:jobID:" + jobID 
		//		+ ",parityFile:" + parityFile
		//		+ ",parityTmp:" + parityTmp);
		Path parent = parityTmp.getParent();
		if(!parityFs.exists(parent)) {
			if (!parityFs.mkdirs(parent)) {
				throw new IOException("Could not create parent dir of parityTmp " + tmpDir);
			}
		}
		//Path parityTmp = new Path(tmpDir, parityFile);
		// Writing out a large parity file at replication 1 is difficult since
		// some datanode could die and we would not be able to close() the file.
		// So write at replication 2 and then reduce it after close() succeeds.
		short tmpRepl = parityRepl;
		if (expectedParityBlocks >= conf.getInt(
				"raid.encoder.largeparity.blocks", 20)) {
			if (parityRepl == 1) {
				tmpRepl = 2;
			}
		}
		FSDataOutputStream out = parityFs.create(parityTmp, true,
				conf.getInt("io.file.buffer.size", 64 * 1024), tmpRepl,
				blockSize);

		try {
			encodeFileToStream(sReader, blockSize, out, reporter);
			out.close();
			out = null;
			LOG.info("Wrote temp parity file " + parityTmp);
			FileStatus tmpStat = parityFs.getFileStatus(parityTmp);
			if (tmpStat.getLen() != expectedParityFileSize) {
				throw new IOException("Expected parity size "
						+ expectedParityFileSize + " does not match actual "
						+ tmpStat.getLen());
			}

			// delete destination if exists
			if (parityFs.exists(parityFile)) {
				parityFs.delete(parityFile, false);
			}
			parityFs.mkdirs(parityFile.getParent());
			if (tmpRepl > parityRepl) {
				parityFs.setReplication(parityTmp, parityRepl);
			}
			if (!parityFs.rename(parityTmp, parityFile)) {
				String msg = "Unable to rename file " + parityTmp + " to "
						+ parityFile;
				throw new IOException(msg);
			}
			LOG.info("Wrote parity file " + parityFile);
		} finally {
			try {
				if (out != null) {
					out.close();
				}
			} finally {
				parityFs.delete(parityTmp, false);
			}
		}

		// --Test
		TimeStatistics.setTotalTime(System.currentTimeMillis() - start);
		//TimeStatistics.print(srcFile.getName(), codec.description, LOG);
	}

	/**
	 * Recovers a corrupt block in a parity file to a local file.
	 * 
	 * The encoder generates codec.parityLength parity blocks for a source file
	 * stripe. Since we want only one of the parity blocks, this function
	 * creates null outputs for the blocks to be discarded.
	 * 
	 * @param fs
	 *            The filesystem in which both srcFile and parityFile reside.
	 * @param srcStat
	 *            The FileStatus of source file.
	 * @param blockSize
	 *            The block size for the parity files.
	 * @param corruptOffset
	 *            The location of corruption in the parity file.
	 * @param localBlockFile
	 *            The destination for the reovered block.
	 * @param progress
	 *            A reporter for progress.
	 */
	public void recoverParityBlockToFile(FileSystem fs, FileStatus srcStat,
			long blockSize, Path parityFile, long corruptOffset,
			File localBlockFile, Progressable progress) throws IOException {
		OutputStream out = new FileOutputStream(localBlockFile);
		try {
			recoverParityBlockToStream(fs, srcStat, blockSize, parityFile,
					corruptOffset, out, progress);
		} finally {
			out.close();
		}
	}

	/**
	 * Recovers a corrupt block in a parity file to a local file.
	 * 
	 * The encoder generates codec.parityLength parity blocks for a source file
	 * stripe. Since we want only one of the parity blocks, this function
	 * creates null outputs for the blocks to be discarded.
	 * 
	 * @param fs
	 *            The filesystem in which both srcFile and parityFile reside.
	 * @param srcStat
	 *            fileStatus of The source file.
	 * @param blockSize
	 *            The block size for the parity files.
	 * @param corruptOffset
	 *            The location of corruption in the parity file.
	 * @param out
	 *            The destination for the reovered block.
	 * @param progress
	 *            A reporter for progress.
	 */
	public void recoverParityBlockToStream(FileSystem fs, FileStatus srcStat,
			long blockSize, Path parityFile, long corruptOffset,
			OutputStream out, Progressable progress) throws IOException {
		LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
		Path srcFile = srcStat.getPath();
		// Get the start offset of the corrupt block.
		corruptOffset = (corruptOffset / blockSize) * blockSize;
		// Output streams to each block in the parity file stripe.
		OutputStream[] outs = new OutputStream[codec.parityLength];
		long indexOfCorruptBlockInParityStripe = (corruptOffset / blockSize)
				% codec.parityLength;
		LOG.info("Index of corrupt block in parity stripe: "
				+ indexOfCorruptBlockInParityStripe);
		// Create a real output stream for the block we want to recover,
		// and create null streams for the rest.
		for (int i = 0; i < codec.parityLength; i++) {
			if (indexOfCorruptBlockInParityStripe == i) {
				outs[i] = out;
			} else {
				outs[i] = new NullOutputStream();
			}
		}
		// Get the stripe index and start offset of stripe.
		long stripeIdx = corruptOffset / (codec.parityLength * blockSize);
		StripeReader sReader = StripeReader.getStripeReader(codec, conf,
				blockSize, fs, stripeIdx, srcStat);
		// Get input streams to each block in the source file stripe.
		assert sReader.hasNext() == true;
		InputStream[] blocks = sReader.getNextStripeInputs();
		LOG.info("Starting recovery by using source stripe " + srcFile
				+ ": stripe " + stripeIdx);
		try {
			// Read the data from the blocks and write to the parity file.
			encodeStripe(blocks, blockSize, outs, progress);
		} finally {
			RaidUtils.closeStreams(blocks);
		}
	}

	/**
	 * Recovers a corrupt block in a parity file to an output stream.
	 * 
	 * The encoder generates codec.parityLength parity blocks for a source file
	 * stripe. Since there is only one output provided, some blocks are written
	 * out to files before being written out to the output.
	 * 
	 * @param blockSize
	 *            The block size for the source/parity files.
	 * @param out
	 *            The destination for the reovered block.
	 */
	private void encodeFileToStream(StripeReader sReader, long blockSize,
			OutputStream out, Progressable reporter) throws IOException {
		OutputStream[] tmpOuts = new OutputStream[codec.parityLength];
		// One parity block can be written directly to out, rest to local files.
		tmpOuts[0] = out;
		File[] tmpFiles = new File[codec.parityLength - 1];
		for (int i = 0; i < codec.parityLength - 1; i++) {
			tmpFiles[i] = File.createTempFile("parity", "_" + i);
			LOG.info("Created tmp file " + tmpFiles[i]);
			tmpFiles[i].deleteOnExit();
		}
		try {
			// Loop over stripe
			while (sReader.hasNext()) {
				reporter.progress();
				// Create input streams for blocks in the stripe.
				InputStream[] blocks = sReader.getNextStripeInputs();
				try {
					// Create output streams to the temp files.
					for (int i = 0; i < codec.parityLength - 1; i++) {
						tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
					}
					// Call the implementation of encoding.
					encodeStripe(blocks, blockSize, tmpOuts, reporter);
				} finally {
					RaidUtils.closeStreams(blocks);
				}

				// --Test
				long start = System.currentTimeMillis();

				// Close output streams to the temp files and write the temp
				// files
				// to the output provided.
				for (int i = 0; i < codec.parityLength - 1; i++) {
					tmpOuts[i + 1].close();
					tmpOuts[i + 1] = null;
					InputStream in = new FileInputStream(tmpFiles[i]);
					RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
					reporter.progress();
				}

				// --Test
				TimeStatistics.addCopyTime(System.currentTimeMillis() - start);
			}
		} finally {
			for (int i = 0; i < codec.parityLength - 1; i++) {
				if (tmpOuts[i + 1] != null) {
					tmpOuts[i + 1].close();
				}
				tmpFiles[i].delete();
				LOG.info("Deleted tmp file " + tmpFiles[i]);
			}
		}
	}

	/**
	 * Wraps around encodeStripeImpl in order to configure buffers. Having
	 * buffers of the right size is extremely important. If the the buffer size
	 * is not a divisor of the block size, we may end up reading across block
	 * boundaries.
	 */
	void encodeStripe(InputStream[] blocks, long blockSize,
			OutputStream[] outs, Progressable reporter) throws IOException {
		configureBuffers(blockSize);
		int boundedBufferCapacity = 1;
		ParallelStreamReader parallelReader = new ParallelStreamReader(
				reporter, blocks, bufSize, parallelism, boundedBufferCapacity,
				blockSize);
		parallelReader.start();
		try {
			for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
				ParallelStreamReader.ReadResult readResult = null;
				try {
					readResult = parallelReader.getReadResult();
				} catch (InterruptedException e) {
					throw new IOException(
							"Interrupted while waiting for read result");
				}
				// Cannot tolerate any IO errors.
				IOException readEx = readResult.getException();
				if (readEx != null) {
					throw readEx;
				}

				// --Test
				long start = System.currentTimeMillis();

				//code.encodeBulk(readResult.readBufs, writeBufs);
				
				/**
				 * added by jason
				 */
				
				if(!ifCodingParallelism) {
					
					code.encodeBulk(readResult.readBufs, writeBufs);
					
				}else if(ifCodingParallelism) {
					
					code.encodeBulkParallel(readResult.readBufs, writeBufs, codingThreadNum);
					
				}


				/**
				 * added by jason ended
				 */

				// --Test
				TimeStatistics.addComputeTime(System.currentTimeMillis()
						- start);

				reporter.progress();

				// --Test
				start = System.currentTimeMillis();

				// Now that we have some data to write, send it to the temp
				// files.
				for (int i = 0; i < codec.parityLength; i++) {
					outs[i].write(writeBufs[i], 0, bufSize);
					reporter.progress();
				}

				// --Test
				TimeStatistics.addWriteTime(System.currentTimeMillis() - start);

			}
		} finally {

			// --Test
			TimeStatistics.addReadTime(parallelReader.readTime);

			parallelReader.shutdown();
		}
	}
}