/*
* Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.kinesisfirehose.model.BufferingHints;
import com.amazonaws.services.kinesisfirehose.model.CopyCommand;
import com.amazonaws.services.kinesisfirehose.model.CreateDeliveryStreamRequest;
import com.amazonaws.services.kinesisfirehose.model.DeliveryStreamDescription;
import com.amazonaws.services.kinesisfirehose.model.RedshiftDestinationConfiguration;
import com.amazonaws.services.kinesisfirehose.model.RedshiftDestinationUpdate;
import com.amazonaws.services.kinesisfirehose.model.S3DestinationConfiguration;
import com.amazonaws.services.kinesisfirehose.model.UpdateDestinationRequest;
import com.amazonaws.util.StringUtils;
/**
* Amazon Kinesis Firehose is a fully managed service for real-time streaming data delivery
* to destinations such as Amazon S3 and Amazon Redshift. Firehose is part of the Amazon Kinesis
* streaming data family, along with Amazon Kinesis Streams. With Firehose, you do not need to
* write any applications or manage any resources. You configure your data producers to send data
* to Firehose and it automatically delivers the data to the destination that you specified.
*
* Detailed Amazon Kinesis Firehose documentation can be found here:
* https://aws.amazon.com/documentation/firehose/
*
* This is a sample java application to deliver data to Amazon Redshift destination.
*/
public class AmazonKinesisFirehoseToRedshiftSample extends AbstractAmazonKinesisFirehoseDelivery {
/*
* Before running the code:
*
* Step 1: Please check you have AWS access credentials set under
* (~/.aws/credentials). If not, fill in your AWS access credentials in the
* provided credentials file template, and be sure to move the file to the
* default location (~/.aws/credentials) where the sample code will load the
* credentials from.
* https://console.aws.amazon.com/iam/home?#security_credential
*
* WARNING: To avoid accidental leakage of your credentials, DO NOT keep the
* credentials file in your source directory.
*
* Step 2: Update the firehosetoredshiftsample.properties file with required parameters.
*/
// Redshift properties
private static String clusterJDBCUrl;
private static String username;
private static String password;
private static String dataTableName;
private static String copyOptions;
private static String updatedCopyOptions;
// Properties file
private static final String CONFIG_FILE = "firehosetoredshiftsample.properties";
// Logger
private static final Log LOG = LogFactory.getLog(AmazonKinesisFirehoseToRedshiftSample.class);
/**
* Initialize the parameters.
*
* @throws Exception
*/
private static void init() throws Exception {
// Load the parameters from properties file
loadConfig();
// Initialize the clients
initClients();
// Validate AccountId parameter is set
if (StringUtils.isNullOrEmpty(accountId)) {
throw new IllegalArgumentException("AccountId is empty. Please enter the accountId in "
+ CONFIG_FILE + " file");
}
}
/**
* Load the input parameters from properties file.
*
* @throws FileNotFoundException
* @throws IOException
*/
private static void loadConfig() throws FileNotFoundException, IOException {
try (InputStream configStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(CONFIG_FILE)) {
if (configStream == null) {
throw new FileNotFoundException();
}
properties = new Properties();
properties.load(configStream);
}
// Read properties
accountId = properties.getProperty("customerAccountId");
createS3Bucket = Boolean.valueOf(properties.getProperty("createS3Bucket"));
s3RegionName = properties.getProperty("s3RegionName");
s3BucketName = properties.getProperty("s3BucketName").trim();
s3BucketARN = getBucketARN(s3BucketName);
s3ObjectPrefix = properties.getProperty("s3ObjectPrefix").trim();
String sizeInMBsProperty = properties.getProperty("destinationSizeInMBs");
s3DestinationSizeInMBs = StringUtils.isNullOrEmpty(sizeInMBsProperty) ? null : Integer.parseInt(sizeInMBsProperty.trim());
String intervalInSecondsProperty = properties.getProperty("destinationIntervalInSeconds");
s3DestinationIntervalInSeconds = StringUtils.isNullOrEmpty(intervalInSecondsProperty) ? null : Integer.parseInt(intervalInSecondsProperty.trim());
clusterJDBCUrl = properties.getProperty("clusterJDBCUrl");
username = properties.getProperty("username");
password = properties.getProperty("password");
dataTableName = properties.getProperty("dataTableName");
copyOptions = properties.getProperty("copyOptions");
deliveryStreamName = properties.getProperty("deliveryStreamName");
firehoseRegion = properties.getProperty("firehoseRegion");
iamRoleName = properties.getProperty("iamRoleName");
iamRegion = properties.getProperty("iamRegion");
// Update Delivery Stream Destination related properties
enableUpdateDestination = Boolean.valueOf(properties.getProperty("updateDestination"));
updatedCopyOptions = properties.getProperty("updatedCopyOptions");
}
public static void main(String[] args) throws Exception {
init();
try {
// Create S3 bucket for DeliveryStream to deliver data
createS3Bucket();
// Create the DeliveryStream
createDeliveryStream();
// Print the list of delivery streams
printDeliveryStreams();
// Put records into DeliveryStream
LOG.info("Putting records in DeliveryStream : " + deliveryStreamName + " via Put Record method.");
putRecordIntoDeliveryStream();
// Batch Put records into DeliveryStream
LOG.info("Putting records in DeliveryStream : " + deliveryStreamName
+ " via Put Record Batch method. Now you can check your S3 bucket " + s3BucketName
+ " for the data delivered by DeliveryStream.");
putRecordBatchIntoDeliveryStream();
// Wait for some interval for the firehose to write data to redshift destination
int waitTimeSecs = s3DestinationIntervalInSeconds == null ? DEFAULT_WAIT_INTERVAL_FOR_DATA_DELIVERY_SECS : s3DestinationIntervalInSeconds;
waitForDataDelivery(waitTimeSecs);
// Update the DeliveryStream and Put records into updated DeliveryStream, only if the flag is set
if (enableUpdateDestination) {
// Update the DeliveryStream
updateDeliveryStream();
// Wait for some interval to propagate the updated configuration options before ingesting data
LOG.info("Waiting for few seconds to propagate the updated configuration options.");
TimeUnit.SECONDS.sleep(60);
// Put records into updated DeliveryStream.
LOG.info("Putting records in updated DeliveryStream : " + deliveryStreamName + " via Put Record method.");
putRecordIntoDeliveryStream();
// Batch Put records into updated DeliveryStream.
LOG.info("Putting records in updated DeliveryStream : " + deliveryStreamName + " via Put Record Batch method.");
putRecordBatchIntoDeliveryStream();
// Wait for some interval for the DeliveryStream to write data to redshift destination
waitForDataDelivery(waitTimeSecs);
}
} catch (AmazonServiceException ase) {
LOG.error("Caught Amazon Service Exception");
LOG.error("Status Code " + ase.getErrorCode());
LOG.error("Message: " + ase.getErrorMessage(), ase);
} catch (AmazonClientException ace) {
LOG.error("Caught Amazon Client Exception");
LOG.error("Exception Message " + ace.getMessage(), ace);
}
}
/**
* Method to create delivery stream with Redshift destination configuration.
*
* @throws Exception
*/
private static void createDeliveryStream() throws Exception {
boolean deliveryStreamExists = false;
LOG.info("Checking if " + deliveryStreamName + " already exits");
List<String> deliveryStreamNames = listDeliveryStreams();
if (deliveryStreamNames != null && deliveryStreamNames.contains(deliveryStreamName)) {
deliveryStreamExists = true;
LOG.info("DeliveryStream " + deliveryStreamName + " already exists. Not creating the new delivery stream");
} else {
LOG.info("DeliveryStream " + deliveryStreamName + " does not exist");
}
if (!deliveryStreamExists) {
// Create DeliveryStream
CreateDeliveryStreamRequest createDeliveryStreamRequest = new CreateDeliveryStreamRequest();
createDeliveryStreamRequest.setDeliveryStreamName(deliveryStreamName);
S3DestinationConfiguration redshiftS3Configuration = new S3DestinationConfiguration();
redshiftS3Configuration.setBucketARN(s3BucketARN);
redshiftS3Configuration.setPrefix(s3ObjectPrefix);
BufferingHints bufferingHints = null;
if (s3DestinationSizeInMBs != null || s3DestinationIntervalInSeconds != null) {
bufferingHints = new BufferingHints();
bufferingHints.setSizeInMBs(s3DestinationSizeInMBs);
bufferingHints.setIntervalInSeconds(s3DestinationIntervalInSeconds);
}
redshiftS3Configuration.setBufferingHints(bufferingHints);
// Create and set IAM role so that firehose service has access to the S3Buckets to put data.
// Please check the trustPolicyDocument.json and permissionsPolicyDocument.json files
// for the trust and permissions policies set for the role.
String iamRoleArn = createIamRole(s3ObjectPrefix);
redshiftS3Configuration.setRoleARN(iamRoleArn);
CopyCommand copyCommand = new CopyCommand();
copyCommand.withCopyOptions(copyOptions)
.withDataTableName(dataTableName);
RedshiftDestinationConfiguration redshiftDestinationConfiguration = new RedshiftDestinationConfiguration();
redshiftDestinationConfiguration.withClusterJDBCURL(clusterJDBCUrl)
.withRoleARN(iamRoleArn)
.withUsername(username)
.withPassword(password)
.withCopyCommand(copyCommand)
.withS3Configuration(redshiftS3Configuration);
createDeliveryStreamRequest.setRedshiftDestinationConfiguration(redshiftDestinationConfiguration);
firehoseClient.createDeliveryStream(createDeliveryStreamRequest);
// The Delivery Stream is now being created.
LOG.info("Creating DeliveryStream : " + deliveryStreamName);
waitForDeliveryStreamToBecomeAvailable(deliveryStreamName);
}
}
/**
* Method to update redshift destination with updated copy options.
*/
private static void updateDeliveryStream() {
DeliveryStreamDescription deliveryStreamDescription = describeDeliveryStream(deliveryStreamName);
LOG.info("Updating DeliveryStream Destination: " + deliveryStreamName + " with new configuration options");
// get(0) -> DeliveryStream currently supports only one destination per DeliveryStream
UpdateDestinationRequest updateDestinationRequest =
new UpdateDestinationRequest()
.withDeliveryStreamName(deliveryStreamName)
.withCurrentDeliveryStreamVersionId(deliveryStreamDescription.getVersionId())
.withDestinationId(deliveryStreamDescription.getDestinations().get(0).getDestinationId());
CopyCommand updatedCopyCommand = new CopyCommand()
.withDataTableName(dataTableName)
.withCopyOptions(updatedCopyOptions);
RedshiftDestinationUpdate redshiftDestinationUpdate = new RedshiftDestinationUpdate()
.withCopyCommand(updatedCopyCommand);
updateDestinationRequest.setRedshiftDestinationUpdate(redshiftDestinationUpdate);
// Update DeliveryStream destination with new configuration options such as s3Prefix and Buffering Hints.
// Can also update Compression format, KMS key values and IAM Role.
firehoseClient.updateDestination(updateDestinationRequest);
}
}