/*
* Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Amazon Software License (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/asl/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazonaws.hbase.connector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import com.amazonaws.hbase.kinesis.utils.HBaseUtils;
import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer;
import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;
/**
* This implementation of IEmitter is used to store files from an Amazon Kinesis stream into Apache HBse. The use of
* this class requires the configuration of an Amazon EMR cluster with Apache HBase installed. When the buffer is full, this
* class's emit method adds the contents of the buffer to Apache HBase running on Amazon EMR.
*/
public class HBaseEmitter implements IEmitter<Map<String,String>> {
private static final Log LOG = LogFactory.getLog(HBaseEmitter.class);
protected final String emrEndpoint;
protected final String hbaseTableName;
protected final int hbaseRestPort;
protected final String emrPublicDns;
protected final AmazonElasticMapReduce emrClient;
public HBaseEmitter(EMRHBaseKinesisConnectorConfiguration configuration) {
// DynamoDB Config
this.emrEndpoint = configuration.EMR_ENDPOINT;
this.hbaseTableName = configuration.HBASE_TABLE_NAME;
this.hbaseRestPort = configuration.HBASE_REST_PORT;
this.emrPublicDns = configuration.EMR_CLUSTER_PUBLIC_DNS;
// Client
this.emrClient = new AmazonElasticMapReduceClient(configuration.AWS_CREDENTIALS_PROVIDER);
this.emrClient.setEndpoint(this.emrEndpoint);
LOG.info("EMRHBaseEmitter.....");
}
@Override
public List<Map<String,String>> emit(final UnmodifiableBuffer<Map<String,String>> buffer) throws IOException {
List<Map<String, String>> records = buffer.getRecords();
ListIterator<Map<String, String>> iterator = records.listIterator();
List<Put> batch = new ArrayList<Put>();
HashMap<String, String> hashMap = (HashMap<String, String>) iterator.next();
while (iterator.hasNext()) {
//start with the row key followed by column family
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("user"), Bytes.toBytes("userid"),Bytes.toBytes(hashMap.get("userid"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("user"), Bytes.toBytes("firstname"),Bytes.toBytes(hashMap.get("firstname"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("user"), Bytes.toBytes("lastname"),Bytes.toBytes(hashMap.get("lastname"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("address"), Bytes.toBytes("city"),Bytes.toBytes(hashMap.get("city"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("address"), Bytes.toBytes("state"),Bytes.toBytes(hashMap.get("state"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("contact"), Bytes.toBytes("email"),Bytes.toBytes(hashMap.get("email"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("contact"), Bytes.toBytes("phone"),Bytes.toBytes(hashMap.get("phone"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likesports"),Bytes.toBytes(hashMap.get("likesports"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("liketheatre"),Bytes.toBytes(hashMap.get("liketheatre"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likeconcerts"),Bytes.toBytes(hashMap.get("likeconcerts"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likejazz"),Bytes.toBytes(hashMap.get("likejazz"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likeclassical"),Bytes.toBytes(hashMap.get("likeclassical"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likeopera"),Bytes.toBytes(hashMap.get("likeopera"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likerock"),Bytes.toBytes(hashMap.get("likerock"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likevegas"),Bytes.toBytes(hashMap.get("likevegas"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likebroadway"),Bytes.toBytes(hashMap.get("likebroadway"))));
batch.add(new Put(Bytes.toBytes(hashMap.get("username")))
.add(Bytes.toBytes("likes"), Bytes.toBytes("likemusicals"),Bytes.toBytes(hashMap.get("likemusicals"))));
hashMap = (HashMap<String, String>) iterator.next();
}
LOG.info("EMIT: " + "records ....."+batch.size());
HBaseUtils.addRecords(hbaseTableName, emrPublicDns, hbaseRestPort, batch);
return Collections.emptyList();
//return records;
}
@Override
public void fail(List<Map<String,String>> records) {
for (Map<String,String> record : records) {
LOG.error("Record failed: " + record);
}
}
@Override
public void shutdown() {
LOG.error("Record shutting down: " );
}
}