package org.notmysock.hive;
import hyperloglog.HyperLogLog;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.BytesWritable;
@Description(name = "hyperloglog_merge", value = "_FUNC_(x)")
public class UDAFHyperLogLogMerge extends UDAFHyperLogLog {
public static final class HyperLogLogMergeEvaluator extends HyperLogLogEvaluator {
@Override
public void iterate(AggregationBuffer agg, Object[] args)
throws HiveException {
if (args[0] == null) {
return;
}
final BytesWritable bw = ((BinaryObjectInspector) inputOI).getPrimitiveWritableObject(args[0]);
HyperLogLog hll = ((HyperLogLogBuffer)agg).hll;
merge(hll, bw);
}
}
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
if (parameters.length != 1) {
throw new IllegalArgumentException("Function only takes 1 parameter");
} else if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE ||
((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory() != PrimitiveCategory.BINARY) {
throw new UDFArgumentTypeException(1,
"Only BINARY columns in HLL format are accepted but "
+ parameters[0].getTypeName() + " was passed.");
}
return new HyperLogLogMergeEvaluator();
}
}