package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import java.util.HashMap;
import java.util.Map;
/**
* Aggregate all maps into a single map. If there are multiple value for same
* key, result can contain any of those values.
* Because the mappers must keep all of the data in memory, if your data is
* non-trivially large you should set hive.map.aggr=false to ensure that
* UNION_MAP is only executed in the reduce phase.
* @author ikabiljo
*/
@Description(
name = "union_map",
value = "_FUNC_(col) - aggregate given maps into a single map",
extended = "Aggregate maps, returns as a HashMap.")
public class UDAFUnionMap extends AbstractGenericUDAFResolver {
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
return new Evaluator();
}
public static class State implements AggregationBuffer {
HashMap<Object, Object> map = new HashMap<Object, Object>();
}
public static class Evaluator extends GenericUDAFEvaluator {
ObjectInspector inputOI;
MapObjectInspector internalMergeOI;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
super.init(m, parameters);
if (m == Mode.COMPLETE || m == Mode.PARTIAL1) {
inputOI = (MapObjectInspector) parameters[0];
} else {
internalMergeOI = (MapObjectInspector) parameters[0];
}
return ObjectInspectorUtils.getStandardObjectInspector(parameters[0]);
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
return new State();
}
@Override
public void iterate(AggregationBuffer agg, Object[] input) throws HiveException {
if (input[0] != null) {
State state = (State) agg;
state.map.putAll((Map<?,?>)ObjectInspectorUtils.copyToStandardObject(input[0], inputOI));
}
}
@Override
public void merge(AggregationBuffer agg, Object partial) throws HiveException {
if (partial != null) {
State state = (State) agg;
Map<?,?> pset = (Map<?,?>)ObjectInspectorUtils.copyToStandardObject(partial, internalMergeOI);
state.map.putAll(pset);
}
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
((State) agg).map.clear();
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
return ((State) agg).map;
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
return ((State) agg).map;
}
}
}