package brickhouse.hbase;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* Retrieve from HBase by doing bulk s from an aggregate function call.
*/
@Description(name = "hbase_batch_put",
value = "_FUNC_(config_map, key, value) - Perform batch HBase updates of a table "
)
public class BatchPutUDAF extends AbstractGenericUDAFResolver {
private static final Logger LOG = Logger.getLogger(BatchPutUDAF.class);
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
for (int i = 0; i < parameters.length; ++i) {
LOG.info(" BATCH PUT PARAMETERS : " + i + " -- " + parameters[i].getTypeName() + " cat = " + parameters[i].getCategory());
System.out.println(" BATCH PUT PARAMETERS : " + i + " -- " + parameters[i].getTypeName() + " cat = " + parameters[i].getCategory());
}
return new BatchPutUDAFEvaluator();
}
public static class BatchPutUDAFEvaluator extends GenericUDAFEvaluator {
public class PutBuffer implements AggregationBuffer {
public List<Put> putList;
public PutBuffer() {
}
public void reset() {
putList = new ArrayList<Put>();
}
public void addKeyValue(String key, String val) throws HiveException {
Put thePut = new Put(key.getBytes());
thePut.add(getFamily(), getQualifier(), val.getBytes());
thePut.setWriteToWAL(false);
putList.add(thePut);
}
}
private byte[] getFamily() {
String famStr = configMap.get(HTableFactory.FAMILY_TAG);
return famStr.getBytes();
}
private byte[] getQualifier() {
String famStr = configMap.get(HTableFactory.QUALIFIER_TAG);
return famStr.getBytes();
}
private int batchSize = 10000;
private int numPutRecords = 0;
public static final String BATCH_SIZE_TAG = "batch_size";
// For PARTIAL1 and COMPLETE: ObjectInspectors for original data
private PrimitiveObjectInspector inputKeyOI;
private PrimitiveObjectInspector inputValOI;
// For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (list
// of objs)
private StandardListObjectInspector listKVOI;
private Map<String, String> configMap;
private HTable table;
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
super.init(m, parameters);
// init output object inspectors
/// input will be key, value and batch size
LOG.info(" Init mode = " + m);
System.out.println(" Init mode = " + m);
System.out.println(" parameters = = " + parameters + " Length = " + parameters.length);
configMap = new HashMap<String, String>();
for (int k = 0; k < parameters.length; ++k) {
LOG.info("Param " + k + " is " + parameters[k]);
System.out.println("Param " + k + " is " + parameters[k]);
}
if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) {
configMap = HTableFactory.getConfigFromConstMapInspector(parameters[0]);
HTableFactory.checkConfig(configMap);
inputKeyOI = (PrimitiveObjectInspector) parameters[1];
inputValOI = (PrimitiveObjectInspector) parameters[2];
try {
LOG.info(" Initializing HTable ");
table = HTableFactory.getHTable(configMap);
if (configMap.containsKey(BATCH_SIZE_TAG)) {
batchSize = Integer.parseInt(configMap.get(BATCH_SIZE_TAG));
}
} catch (IOException e) {
throw new HiveException(e);
}
} else {
listKVOI = (StandardListObjectInspector) parameters[0];
}
if (m == Mode.PARTIAL1 || m == Mode.PARTIAL2) {
return ObjectInspectorFactory
.getStandardListObjectInspector(
ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector));
} else {
/// Otherwise return a message
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
PutBuffer buff = new PutBuffer();
reset(buff);
return buff;
}
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
String key = getByteString(parameters[1], inputKeyOI);
String val = getByteString(parameters[2], inputValOI);
PutBuffer kvBuff = (PutBuffer) agg;
kvBuff.addKeyValue(key, val);
if (kvBuff.putList.size() >= batchSize) {
batchUpdate(kvBuff, false);
}
}
/**
* @param obj
* @param objInsp
* @return
*/
private String getByteString(Object obj, PrimitiveObjectInspector objInsp) {
switch (objInsp.getPrimitiveCategory()) {
case STRING:
StringObjectInspector strInspector = (StringObjectInspector) objInsp;
return strInspector.getPrimitiveJavaObject(obj);
case BINARY:
BinaryObjectInspector binInspector = (BinaryObjectInspector) objInsp;
return new String(binInspector.getPrimitiveJavaObject(obj));
/// XXX TODO interpret other types, like ints or doubled
default:
return null;
}
}
protected void batchUpdate(PutBuffer kvBuff, boolean flushCommits) throws HiveException {
try {
HTable htable = HTableFactory.getHTable(configMap);
htable.put(kvBuff.putList);
if (flushCommits)
htable.flushCommits();
numPutRecords += kvBuff.putList.size();
if (kvBuff.putList.size() > 0)
LOG.info(" Doing Batch Put " + kvBuff.putList.size() + " records; Total put records = " + numPutRecords + " ; Start = " + (new String(kvBuff.putList.get(0).getRow())) + " ; End = " + (new String(kvBuff.putList.get(kvBuff.putList.size() - 1).getRow())));
else
LOG.info(" Doing Batch Put with ZERO 0 records");
kvBuff.putList.clear();
} catch (IOException e) {
throw new HiveException(e);
}
}
@Override
public void merge(AggregationBuffer agg, Object partial)
throws HiveException {
PutBuffer myagg = (PutBuffer) agg;
List<Object> partialResult = (List<Object>) this.listKVOI.getList(partial);
ListObjectInspector subListOI = (ListObjectInspector) listKVOI.getListElementObjectInspector();
List first = subListOI.getList(partialResult.get(0));
String tableName = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(0));
configMap.put(HTableFactory.TABLE_NAME_TAG, tableName);
String zookeeper = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(1));
configMap.put(HTableFactory.ZOOKEEPER_QUORUM_TAG, zookeeper);
String family = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(2));
configMap.put(HTableFactory.FAMILY_TAG, family);
String qualifier = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(3));
configMap.put(HTableFactory.QUALIFIER_TAG, qualifier);
//// Include arbitrary configurations, by adding strings of the form k=v
for (int j = 4; j < first.size(); ++j) {
String kvStr = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(j));
String[] kvArr = kvStr.split("=");
if (kvArr.length == 2) {
configMap.put(kvArr[0], kvArr[1]);
}
}
for (int i = 1; i < partialResult.size(); ++i) {
List kvList = subListOI.getList(partialResult.get(i));
String key = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(0));
String val = ((StringObjectInspector) (subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(1));
myagg.addKeyValue(key, val);
}
if (myagg.putList.size() >= batchSize) {
batchUpdate(myagg, false);
}
}
@Override
public void reset(AggregationBuffer buff) throws HiveException {
PutBuffer putBuffer = (PutBuffer) buff;
putBuffer.reset();
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
PutBuffer myagg = (PutBuffer) agg;
batchUpdate(myagg, true);
return "Finished Batch updates ; Num Puts = " + numPutRecords;
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
PutBuffer myagg = (PutBuffer) agg;
ArrayList<List<String>> ret = new ArrayList<List<String>>();
ArrayList tname = new ArrayList<String>();
tname.add(configMap.get(HTableFactory.TABLE_NAME_TAG));
tname.add(configMap.get(HTableFactory.ZOOKEEPER_QUORUM_TAG));
tname.add(configMap.get(HTableFactory.FAMILY_TAG));
tname.add(configMap.get(HTableFactory.QUALIFIER_TAG));
for (Entry<String, String> entry : configMap.entrySet()) {
if (!entry.getKey().equals(HTableFactory.TABLE_NAME_TAG)
&& !entry.getKey().equals(HTableFactory.ZOOKEEPER_QUORUM_TAG)
&& !entry.getKey().equals(HTableFactory.FAMILY_TAG)
&& !entry.getKey().equals(HTableFactory.QUALIFIER_TAG)) {
tname.add(entry.getKey() + "=" + entry.getValue());
}
}
ret.add(tname);
for (Put thePut : myagg.putList) {
ArrayList<String> kvList = new ArrayList<String>();
kvList.add(new String(thePut.getRow()));
Map<byte[], List<KeyValue>> familyMap = thePut.getFamilyMap();
for (List<KeyValue> innerList : familyMap.values()) {
for (KeyValue kv : innerList) {
kvList.add(new String(kv.getValue()));
}
}
ret.add(kvList);
}
return ret;
}
}
}