package brickhouse.udf.hll;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.log4j.Logger;
/**
* Aggregate multiple HyerLogLog structures together.
*/
@Description(name = "union_hyperloglog",
value = "_FUNC_(x) - Merges multiple hyperloglogs together. "
)
public class UnionHyperLogLogUDAF extends AbstractGenericUDAFResolver {
private static final Logger LOG = Logger.getLogger(UnionHyperLogLogUDAF.class);
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Please specify one argument.");
}
if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0,
"Only primitive type arguments are accepted but "
+ parameters[0].getTypeName()
+ " was passed as parameter 1.");
}
if (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
throw new UDFArgumentTypeException(0,
"Only a binary argument is accepted as parameter 1, but "
+ parameters[0].getTypeName()
+ " was passed instead.");
}
if (parameters.length > 1) throw new IllegalArgumentException("Function only takes 1 parameter.");
return new MergeHyperLogLogUDAFEvaluator();
}
public static class MergeHyperLogLogUDAFEvaluator extends GenericUDAFEvaluator {
// For PARTIAL1 and COMPLETE: ObjectInspectors for original data
// For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (binary serialized hll object)
private BinaryObjectInspector inputAndPartialBinaryOI;
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
super.init(m, parameters);
LOG.debug(" MergeHyperLogLogUDAF.init() - Mode= " + m.name());
// init input object inspectors
this.inputAndPartialBinaryOI = (BinaryObjectInspector) parameters[0];
// init output object inspectors
// The partial aggregate type is the same as the final type
return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
HLLBuffer buff = new HLLBuffer();
reset(buff);
return buff;
}
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
try {
if (parameters[0] == null) {
return;
}
Object partial = parameters[0];
merge(agg, partial);
} catch (Exception e) {
LOG.error("Error", e);
throw new HiveException(e);
}
}
@Override
public void merge(AggregationBuffer agg, Object partial)
throws HiveException {
if (partial == null) {
return;
}
try {
HLLBuffer myagg = (HLLBuffer) agg;
byte[] partialBuffer = this.inputAndPartialBinaryOI.getPrimitiveJavaObject(partial);
myagg.merge(partialBuffer);
} catch (Exception e) {
LOG.error("Error", e);
throw new HiveException(e);
}
}
@Override
public void reset(AggregationBuffer buff) throws HiveException {
HLLBuffer hllBuff = (HLLBuffer) buff;
hllBuff.reset();
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
try {
HLLBuffer myagg = (HLLBuffer) agg;
return myagg.getPartial();
} catch (Exception e) {
LOG.error("Error", e);
throw new HiveException(e);
}
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
return terminate(agg);
}
}
}