package org.gbif.occurrence.hive.udf; import java.util.Collections; import java.util.List; import com.google.common.collect.Lists; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; /** * A simple UDF that remove nulls from a list. */ public class ArrayNullsRemoverGenericUDF extends GenericUDF { private StandardListObjectInspector retValInspector; private PrimitiveObjectInspector primitiveObjectInspector; @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { List list = retValInspector.getList(arguments[0].get()); List result = Lists.newArrayList(); if (list != null && !list.isEmpty()) { if (primitiveObjectInspector != null && primitiveObjectInspector.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING) { result = handlerStringArray(list); } else { result = Lists.newArrayList(list); result.removeAll(Collections.singleton(null)); } } return result.isEmpty() ? null : result; } /** * Removes empty Strings and null values. */ private List handlerStringArray(List list) { List result = Lists.newArrayList(); for (Object oElement : list) { Object stdObject = ObjectInspectorUtils.copyToStandardJavaObject(oElement, primitiveObjectInspector); if (stdObject != null && !((String)stdObject).trim().isEmpty()) { result.add(stdObject); } } return result; } @Override public String getDisplayString(String[] children) { return "removeNulls( " + children[0] + ")"; } @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentException("removeNulls takes an array as argument"); } if (arguments[0].getCategory() != Category.LIST) { throw new UDFArgumentException("removeNulls takes an array as argument"); } retValInspector = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(arguments[0]); if (retValInspector.getListElementObjectInspector().getCategory() != Category.PRIMITIVE) { primitiveObjectInspector = (PrimitiveObjectInspector) retValInspector.getListElementObjectInspector(); } return retValInspector; } }