/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.azure;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Locale;
import java.util.List;
import java.util.UUID;
import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.microsoft.azure.storage.AccessCondition;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.blob.BlobRequestOptions;
import com.microsoft.azure.storage.blob.BlockEntry;
import com.microsoft.azure.storage.blob.BlockListingFilter;
/**
* Stream object that implememnts append for Block Blobs in WASB.
*/
public class BlockBlobAppendStream extends OutputStream {
private final String key;
private final int bufferSize;
private ByteArrayOutputStream outBuffer;
private final CloudBlockBlobWrapper blob;
private final OperationContext opContext;
/**
* Variable to track if the stream has been closed.
*/
private boolean closed = false;
/**
* Variable to track if the append lease is released.
*/
private volatile boolean leaseFreed;
/**
* Variable to track if the append stream has been
* initialized.
*/
private boolean initialized = false;
/**
* Last IOException encountered
*/
private volatile IOException lastError = null;
/**
* List to keep track of the uncommitted azure storage
* block ids
*/
private final List<BlockEntry> uncommittedBlockEntries;
private static final int UNSET_BLOCKS_COUNT = -1;
/**
* Variable to hold the next block id to be used for azure
* storage blocks.
*/
private long nextBlockCount = UNSET_BLOCKS_COUNT;
/**
* Variable to hold the block id prefix to be used for azure
* storage blocks from azure-storage-java sdk version 4.2.0 onwards
*/
private String blockIdPrefix = null;
private final Random sequenceGenerator = new Random();
/**
* Time to wait to renew lease in milliseconds
*/
private static final int LEASE_RENEWAL_PERIOD = 10000;
/**
* Number of times to retry for lease renewal
*/
private static final int MAX_LEASE_RENEWAL_RETRY_COUNT = 3;
/**
* Time to wait before retrying to set the lease
*/
private static final int LEASE_RENEWAL_RETRY_SLEEP_PERIOD = 500;
/**
* Metadata key used on the blob to indicate append lease is active
*/
public static final String APPEND_LEASE = "append_lease";
/**
* Timeout value for the append lease in millisecs. If the lease is not
* renewed within 30 seconds then another thread can acquire the append lease
* on the blob
*/
public static final int APPEND_LEASE_TIMEOUT = 30000;
/**
* Metdata key used on the blob to indicate last modified time of append lease
*/
public static final String APPEND_LEASE_LAST_MODIFIED = "append_lease_last_modified";
/**
* Number of times block upload needs is retried.
*/
private static final int MAX_BLOCK_UPLOAD_RETRIES = 3;
/**
* Wait time between block upload retries in millisecs.
*/
private static final int BLOCK_UPLOAD_RETRY_INTERVAL = 1000;
private static final Logger LOG = LoggerFactory.getLogger(BlockBlobAppendStream.class);
private static final int MAX_BLOCK_COUNT = 100000;
private ThreadPoolExecutor ioThreadPool;
/**
* Atomic integer to provide thread id for thread names for uploader threads.
*/
private final AtomicInteger threadSequenceNumber;
/**
* Prefix to be used for thread names for uploader threads.
*/
private static final String THREAD_ID_PREFIX = "BlockBlobAppendStream";
private static final String UTC_STR = "UTC";
public BlockBlobAppendStream(final CloudBlockBlobWrapper blob,
final String aKey, final int bufferSize, final OperationContext opContext)
throws IOException {
if (null == aKey || 0 == aKey.length()) {
throw new IllegalArgumentException(
"Illegal argument: The key string is null or empty");
}
if (0 >= bufferSize) {
throw new IllegalArgumentException(
"Illegal argument bufferSize cannot be zero or negative");
}
this.blob = blob;
this.opContext = opContext;
this.key = aKey;
this.bufferSize = bufferSize;
this.threadSequenceNumber = new AtomicInteger(0);
this.blockIdPrefix = null;
setBlocksCountAndBlockIdPrefix();
this.outBuffer = new ByteArrayOutputStream(bufferSize);
this.uncommittedBlockEntries = new ArrayList<BlockEntry>();
// Acquire append lease on the blob.
try {
//Set the append lease if the value of the append lease is false
if (!updateBlobAppendMetadata(true, false)) {
LOG.error("Unable to set Append Lease on the Blob : {} "
+ "Possibly because another client already has a create or append stream open on the Blob", key);
throw new IOException("Unable to set Append lease on the Blob. "
+ "Possibly because another client already had an append stream open on the Blob.");
}
} catch (StorageException ex) {
LOG.error("Encountered Storage exception while acquiring append "
+ "lease on blob : {}. Storage Exception : {} ErrorCode : {}",
key, ex, ex.getErrorCode());
throw new IOException(ex);
}
leaseFreed = false;
}
/**
* Helper method that starts an Append Lease renewer thread and the
* thread pool.
*/
public synchronized void initialize() {
if (initialized) {
return;
}
/*
* Start the thread for Append lease renewer.
*/
Thread appendLeaseRenewer = new Thread(new AppendRenewer());
appendLeaseRenewer.setDaemon(true);
appendLeaseRenewer.setName(String.format("%s-AppendLeaseRenewer", key));
appendLeaseRenewer.start();
/*
* Parameters to ThreadPoolExecutor:
* corePoolSize : the number of threads to keep in the pool, even if they are idle,
* unless allowCoreThreadTimeOut is set
* maximumPoolSize : the maximum number of threads to allow in the pool
* keepAliveTime - when the number of threads is greater than the core,
* this is the maximum time that excess idle threads will
* wait for new tasks before terminating.
* unit - the time unit for the keepAliveTime argument
* workQueue - the queue to use for holding tasks before they are executed
* This queue will hold only the Runnable tasks submitted by the execute method.
*/
this.ioThreadPool = new ThreadPoolExecutor(4, 4, 2, TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>(), new UploaderThreadFactory());
initialized = true;
}
/**
* Get the blob name.
*
* @return String Blob name.
*/
public String getKey() {
return key;
}
/**
* Get the backing blob.
* @return buffer size of the stream.
*/
public int getBufferSize() {
return bufferSize;
}
/**
* Writes the specified byte to this output stream. The general contract for
* write is that one byte is written to the output stream. The byte to be
* written is the eight low-order bits of the argument b. The 24 high-order
* bits of b are ignored.
*
* @param byteVal
* the byteValue to write.
* @throws IOException
* if an I/O error occurs. In particular, an IOException may be
* thrown if the output stream has been closed.
*/
@Override
public void write(final int byteVal) throws IOException {
write(new byte[] { (byte) (byteVal & 0xFF) });
}
/**
* Writes b.length bytes from the specified byte array to this output stream.
*
* @param data
* the byte array to write.
*
* @throws IOException
* if an I/O error occurs. In particular, an IOException may be
* thrown if the output stream has been closed.
*/
@Override
public void write(final byte[] data) throws IOException {
write(data, 0, data.length);
}
/**
* Writes length bytes from the specified byte array starting at offset to
* this output stream.
*
* @param data
* the byte array to write.
* @param offset
* the start offset in the data.
* @param length
* the number of bytes to write.
* @throws IOException
* if an I/O error occurs. In particular, an IOException may be
* thrown if the output stream has been closed.
*/
@Override
public void write(final byte[] data, final int offset, final int length)
throws IOException {
if (offset < 0 || length < 0 || length > data.length - offset) {
throw new IndexOutOfBoundsException("write API in append stream called with invalid arguments");
}
writeInternal(data, offset, length);
}
@Override
public synchronized void close() throws IOException {
if (!initialized) {
throw new IOException("Trying to close an uninitialized Append stream");
}
if (closed) {
return;
}
if (leaseFreed) {
throw new IOException(String.format("Attempting to close an append stream on blob : %s "
+ " that does not have lease on the Blob. Failing close", key));
}
if (outBuffer.size() > 0) {
uploadBlockToStorage(outBuffer.toByteArray());
}
ioThreadPool.shutdown();
try {
if (!ioThreadPool.awaitTermination(10, TimeUnit.MINUTES)) {
LOG.error("Time out occured while waiting for IO request to finish in append"
+ " for blob : {}", key);
NativeAzureFileSystemHelper.logAllLiveStackTraces();
throw new IOException("Timed out waiting for IO requests to finish");
}
} catch(InterruptedException intrEx) {
// Restore the interrupted status
Thread.currentThread().interrupt();
LOG.error("Upload block operation in append interrupted for blob {}. Failing close", key);
throw new IOException("Append Commit interrupted.");
}
// Calling commit after all blocks are succesfully uploaded.
if (lastError == null) {
commitAppendBlocks();
}
// Perform cleanup.
cleanup();
if (lastError != null) {
throw lastError;
}
}
/**
* Helper method that cleans up the append stream.
*/
private synchronized void cleanup() {
closed = true;
try {
// Set the value of append lease to false if the value is set to true.
updateBlobAppendMetadata(false, true);
} catch(StorageException ex) {
LOG.debug("Append metadata update on the Blob : {} encountered Storage Exception : {} "
+ "Error Code : {}",
key, ex, ex.getErrorCode());
lastError = new IOException(ex);
}
leaseFreed = true;
}
/**
* Method to commit all the uncommited blocks to azure storage.
* If the commit fails then blocks are automatically cleaned up
* by Azure storage.
* @throws IOException
*/
private synchronized void commitAppendBlocks() throws IOException {
SelfRenewingLease lease = null;
try {
if (uncommittedBlockEntries.size() > 0) {
//Acquiring lease on the blob.
lease = new SelfRenewingLease(blob);
// Downloading existing blocks
List<BlockEntry> blockEntries = blob.downloadBlockList(BlockListingFilter.COMMITTED,
new BlobRequestOptions(), opContext);
// Adding uncommitted blocks.
blockEntries.addAll(uncommittedBlockEntries);
AccessCondition accessCondition = new AccessCondition();
accessCondition.setLeaseID(lease.getLeaseID());
blob.commitBlockList(blockEntries, accessCondition, new BlobRequestOptions(), opContext);
uncommittedBlockEntries.clear();
}
} catch(StorageException ex) {
LOG.error("Storage exception encountered during block commit phase of append for blob"
+ " : {} Storage Exception : {} Error Code: {}", key, ex, ex.getErrorCode());
throw new IOException("Encountered Exception while committing append blocks", ex);
} finally {
if (lease != null) {
try {
lease.free();
} catch(StorageException ex) {
LOG.debug("Exception encountered while releasing lease for "
+ "blob : {} StorageException : {} ErrorCode : {}", key, ex, ex.getErrorCode());
// Swallowing exception here as the lease is cleaned up by the SelfRenewingLease object.
}
}
}
}
/**
* Helper method used to generate the blockIDs. The algorithm used is similar to the Azure
* storage SDK.
*/
private void setBlocksCountAndBlockIdPrefix() throws IOException {
try {
if (nextBlockCount == UNSET_BLOCKS_COUNT && blockIdPrefix==null) {
List<BlockEntry> blockEntries =
blob.downloadBlockList(BlockListingFilter.COMMITTED, new BlobRequestOptions(), opContext);
String blockZeroBlockId = (blockEntries.size() > 0) ? blockEntries.get(0).getId() : "";
String prefix = UUID.randomUUID().toString() + "-";
String sampleNewerVersionBlockId = generateNewerVersionBlockId(prefix, 0);
if (blockEntries.size() > 0 && blockZeroBlockId.length() < sampleNewerVersionBlockId.length()) {
// If blob has already been created with 2.2.0, append subsequent blocks with older version (2.2.0) blockId
// compute nextBlockCount, the way it was done before; and don't use blockIdPrefix
this.blockIdPrefix = "";
nextBlockCount = (long) (sequenceGenerator.nextInt(Integer.MAX_VALUE))
+ sequenceGenerator.nextInt(Integer.MAX_VALUE - MAX_BLOCK_COUNT);
nextBlockCount += blockEntries.size();
} else {
// If there are no existing blocks, create the first block with newer version (4.2.0) blockId
// If blob has already been created with 4.2.0, append subsequent blocks with newer version (4.2.0) blockId
this.blockIdPrefix = prefix;
nextBlockCount = blockEntries.size();
}
}
} catch (StorageException ex) {
LOG.debug("Encountered storage exception during setting next Block Count and BlockId prefix."
+ " StorageException : {} ErrorCode : {}", ex, ex.getErrorCode());
throw new IOException(ex);
}
}
/**
* Helper method that generates the next block id for uploading a block to azure storage.
* @return String representing the block ID generated.
* @throws IOException
*/
private String generateBlockId() throws IOException {
if (nextBlockCount == UNSET_BLOCKS_COUNT) {
throw new IOException("Append Stream in invalid state. nextBlockCount not set correctly");
}
if (this.blockIdPrefix == null) {
throw new IOException("Append Stream in invalid state. blockIdPrefix not set correctly");
}
if (!this.blockIdPrefix.equals("")) {
return generateNewerVersionBlockId(this.blockIdPrefix, nextBlockCount++);
} else {
return generateOlderVersionBlockId(nextBlockCount++);
}
}
/**
* Helper method that generates an older (2.2.0) version blockId
* @return String representing the block ID generated.
*/
private String generateOlderVersionBlockId(long id) {
byte[] blockIdInBytes = getBytesFromLong(id);
return new String(Base64.encodeBase64(blockIdInBytes), StandardCharsets.UTF_8);
}
/**
* Helper method that generates an newer (4.2.0) version blockId
* @return String representing the block ID generated.
*/
private String generateNewerVersionBlockId(String prefix, long id) {
String blockIdSuffix = String.format("%06d", id);
byte[] blockIdInBytes = (prefix + blockIdSuffix).getBytes(StandardCharsets.UTF_8);
return new String(Base64.encodeBase64(blockIdInBytes), StandardCharsets.UTF_8);
}
/**
* Returns a byte array that represents the data of a <code>long</code> value. This
* utility method is copied from com.microsoft.azure.storage.core.Utility class.
* This class is marked as internal, hence we clone the method here and not express
* dependency on the Utility Class
*
* @param value
* The value from which the byte array will be returned.
*
* @return A byte array that represents the data of the specified <code>long</code> value.
*/
private static byte[] getBytesFromLong(final long value) {
final byte[] tempArray = new byte[8];
for (int m = 0; m < 8; m++) {
tempArray[7 - m] = (byte) ((value >> (8 * m)) & 0xFF);
}
return tempArray;
}
/**
* Helper method that creates a thread to upload a block to azure storage.
* @param payload
* @throws IOException
*/
private synchronized void uploadBlockToStorage(byte[] payload)
throws IOException {
// upload payload to azure storage
String blockId = generateBlockId();
// Since uploads of the Azure storage are done in parallel threads, we go ahead
// add the blockId in the uncommitted list. If the upload of the block fails
// we don't commit the blockIds.
BlockEntry blockEntry = new BlockEntry(blockId);
blockEntry.setSize(payload.length);
uncommittedBlockEntries.add(blockEntry);
ioThreadPool.execute(new WriteRequest(payload, blockId));
}
/**
* Helper method to updated the Blob metadata during Append lease operations.
* Blob metadata is updated to holdLease value only if the current lease
* status is equal to testCondition and the last update on the blob metadata
* is less that 30 secs old.
* @param holdLease
* @param testCondition
* @return true if the updated lease operation was successful or false otherwise
* @throws StorageException
*/
private boolean updateBlobAppendMetadata(boolean holdLease, boolean testCondition)
throws StorageException {
SelfRenewingLease lease = null;
StorageException lastStorageException = null;
int leaseRenewalRetryCount = 0;
/*
* Updating the Blob metadata honours following algorithm based on
* 1) If the append lease metadata is present
* 2) Last updated time of the append lease
* 3) Previous value of the Append lease metadata.
*
* The algorithm:
* 1) If append lease metadata is not part of the Blob. In this case
* this is the first client to Append so we update the metadata.
* 2) If append lease metadata is present and timeout has occurred.
* In this case irrespective of what the value of the append lease is we update the metadata.
* 3) If append lease metadata is present and is equal to testCondition value (passed as parameter)
* and timeout has not occurred, we update the metadata.
* 4) If append lease metadata is present and is not equal to testCondition value (passed as parameter)
* and timeout has not occurred, we do not update metadata and return false.
*
*/
while (leaseRenewalRetryCount < MAX_LEASE_RENEWAL_RETRY_COUNT) {
lastStorageException = null;
synchronized(this) {
try {
final Calendar currentCalendar = Calendar
.getInstance(Locale.US);
currentCalendar.setTimeZone(TimeZone.getTimeZone(UTC_STR));
long currentTime = currentCalendar.getTime().getTime();
// Acquire lease on the blob.
lease = new SelfRenewingLease(blob);
blob.downloadAttributes(opContext);
HashMap<String, String> metadata = blob.getMetadata();
if (metadata.containsKey(APPEND_LEASE)
&& currentTime - Long.parseLong(
metadata.get(APPEND_LEASE_LAST_MODIFIED)) <= BlockBlobAppendStream.APPEND_LEASE_TIMEOUT
&& !metadata.get(APPEND_LEASE).equals(Boolean.toString(testCondition))) {
return false;
}
metadata.put(APPEND_LEASE, Boolean.toString(holdLease));
metadata.put(APPEND_LEASE_LAST_MODIFIED, Long.toString(currentTime));
blob.setMetadata(metadata);
AccessCondition accessCondition = new AccessCondition();
accessCondition.setLeaseID(lease.getLeaseID());
blob.uploadMetadata(accessCondition, null, opContext);
return true;
} catch (StorageException ex) {
lastStorageException = ex;
LOG.debug("Lease renewal for Blob : {} encountered Storage Exception : {} "
+ "Error Code : {}",
key, ex, ex.getErrorCode());
leaseRenewalRetryCount++;
} finally {
if (lease != null) {
try {
lease.free();
} catch(StorageException ex) {
LOG.debug("Encountered Storage exception while releasing lease for Blob {} "
+ "during Append metadata operation. Storage Exception {} "
+ "Error Code : {} ", key, ex, ex.getErrorCode());
} finally {
lease = null;
}
}
}
}
if (leaseRenewalRetryCount == MAX_LEASE_RENEWAL_RETRY_COUNT) {
throw lastStorageException;
} else {
try {
Thread.sleep(LEASE_RENEWAL_RETRY_SLEEP_PERIOD);
} catch(InterruptedException ex) {
LOG.debug("Blob append metadata updated method interrupted");
Thread.currentThread().interrupt();
}
}
}
// The code should not enter here because the while loop will
// always be executed and if the while loop is executed we
// would returning from the while loop.
return false;
}
/**
* This is the only method that should be writing to outBuffer to maintain consistency of the outBuffer.
* @param data
* @param offset
* @param length
* @throws IOException
*/
private synchronized void writeInternal(final byte[] data, final int offset, final int length)
throws IOException {
if (!initialized) {
throw new IOException("Trying to write to an un-initialized Append stream");
}
if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
}
if (leaseFreed) {
throw new IOException(String.format("Write called on a append stream not holding lease. Failing Write"));
}
byte[] currentData = new byte[length];
System.arraycopy(data, offset, currentData, 0, length);
// check to see if the data to be appended exceeds the
// buffer size. If so we upload a block to azure storage.
while ((outBuffer.size() + currentData.length) > bufferSize) {
byte[] payload = new byte[bufferSize];
// Add data from the existing buffer
System.arraycopy(outBuffer.toByteArray(), 0, payload, 0, outBuffer.size());
// Updating the available size in the payload
int availableSpaceInPayload = bufferSize - outBuffer.size();
// Adding data from the current call
System.arraycopy(currentData, 0, payload, outBuffer.size(), availableSpaceInPayload);
uploadBlockToStorage(payload);
// updating the currentData buffer
byte[] tempBuffer = new byte[currentData.length - availableSpaceInPayload];
System.arraycopy(currentData, availableSpaceInPayload,
tempBuffer, 0, currentData.length - availableSpaceInPayload);
currentData = tempBuffer;
outBuffer = new ByteArrayOutputStream(bufferSize);
}
outBuffer.write(currentData);
}
/**
* Runnable instance that uploads the block of data to azure storage.
*
*
*/
private class WriteRequest implements Runnable {
private final byte[] dataPayload;
private final String blockId;
public WriteRequest(byte[] dataPayload, String blockId) {
this.dataPayload = dataPayload;
this.blockId = blockId;
}
@Override
public void run() {
int uploadRetryAttempts = 0;
IOException lastLocalException = null;
while (uploadRetryAttempts < MAX_BLOCK_UPLOAD_RETRIES) {
try {
blob.uploadBlock(blockId, new ByteArrayInputStream(dataPayload),
dataPayload.length, new BlobRequestOptions(), opContext);
break;
} catch(Exception ioe) {
Log.debug("Encountered exception during uploading block for Blob : {} Exception : {}", key, ioe);
uploadRetryAttempts++;
lastLocalException = new IOException("Encountered Exception while uploading block", ioe);
try {
Thread.sleep(BLOCK_UPLOAD_RETRY_INTERVAL);
} catch(InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
}
if (uploadRetryAttempts == MAX_BLOCK_UPLOAD_RETRIES) {
lastError = lastLocalException;
}
}
}
/**
* A ThreadFactory that creates uploader thread with
* meaningful names helpful for debugging purposes.
*/
class UploaderThreadFactory implements ThreadFactory {
@Override
public Thread newThread(Runnable r) {
Thread t = new Thread(r);
t.setName(String.format("%s-%s-%d", THREAD_ID_PREFIX, key,
threadSequenceNumber.getAndIncrement()));
return t;
}
}
/**
* A deamon thread that renews the Append lease on the blob.
* The thread sleeps for LEASE_RENEWAL_PERIOD time before renewing
* the lease. If an error is encountered while renewing the lease
* then an lease is released by this thread, which fails all other
* operations.
*/
private class AppendRenewer implements Runnable {
@Override
public void run() {
while (!leaseFreed) {
try {
Thread.sleep(LEASE_RENEWAL_PERIOD);
} catch (InterruptedException ie) {
LOG.debug("Appender Renewer thread interrupted");
Thread.currentThread().interrupt();
}
Log.debug("Attempting to renew append lease on {}", key);
try {
if (!leaseFreed) {
// Update the blob metadata to renew the append lease
if (!updateBlobAppendMetadata(true, true)) {
LOG.error("Unable to re-acquire append lease on the Blob {} ", key);
leaseFreed = true;
}
}
} catch (StorageException ex) {
LOG.debug("Lease renewal for Blob : {} encountered "
+ "Storage Exception : {} Error Code : {}", key, ex, ex.getErrorCode());
// We swallow the exception here because if the blob metadata is not updated for
// APPEND_LEASE_TIMEOUT period, another thread would be able to detect this and
// continue forward if it needs to append.
leaseFreed = true;
}
}
}
}
}