package com.facebook.hive.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import java.util.ArrayList; /** * Repeats each row an arbitrary number of times. If the parameter is * null, then the row will not be repeated. * * Example usage: * hive> SELECT * FROM jonchang_iris LATERAL VIEW REPEAT_ROWS(IF(petal_width > 2.0, 3, 2)) a AS b LIMIT 7; * 6 2.9 4.5 1.5 versicolor 2 * 6 2.9 4.5 1.5 versicolor 2 * 6.9 3.1 5.1 2.3 virginica 3 * 6.9 3.1 5.1 2.3 virginica 3 * 6.9 3.1 5.1 2.3 virginica 3 * 5.4 3.4 1.5 0.4 setosa 2 * 5.4 3.4 1.5 0.4 setosa 2 */ @Description(name = "repeat_rows", value = "_FUNC_(x) - outputs x copies of each row") public class UDTFRepeatRows extends GenericUDTF { private PrimitiveObjectInspector colOI = null; @Override public void close() throws HiveException { } @Override public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { if (args.length != 1) { throw new UDFArgumentException("repeat_rows() requires exactly one argument"); } if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentException("repeat_rows() expects an integer argument"); } colOI = (PrimitiveObjectInspector)args[0]; if (colOI.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.INT) { throw new UDFArgumentException("repeat_rows() expects an integer argument"); } ArrayList<String> fieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); fieldNames.add("col0"); fieldOIs.add(colOI); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @Override public void process(Object[] o) throws HiveException { Integer val = (Integer) colOI.getPrimitiveJavaObject(o[0]); if (val == null) { return; } if (val < 0) { throw new HiveException("repeat_rows() expects a non-negative argument"); } for (int ii = 0; ii < val; ++ii) { forward(o); } } @Override public String toString() { return "repeat_rows"; } }