package brickhouse.udf.collect; /** * Copyright 2012 Klout, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ import java.util.List; import java.util.Set; import java.util.TreeSet; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; import org.apache.log4j.Logger; /** * Return a list of unique entries, for a given set of lists. * * {1, 2} ∪ {1, 2} = {1, 2} * {1, 2} ∪ {2, 3} = {1, 2, 3} * {1, 2, 3} ∪ {3, 4, 5} = {1, 2, 3, 4, 5} */ @Description(name = "array_union", value = "_FUNC_(array1, array2, ...) - Returns the union of a set of arrays " ) public class ArrayUnionUDF extends GenericUDF { private static final Logger LOG = Logger.getLogger(ArrayUnionUDF.class); private StandardListObjectInspector retValInspector; private ListObjectInspector[] listInspectorArr; private class InspectableObject implements Comparable { public Object o; public ObjectInspector oi; public InspectableObject(Object o, ObjectInspector oi) { this.o = o; this.oi = oi; } @Override public int hashCode() { return ObjectInspectorUtils.hashCode(o, oi); } @Override public int compareTo(Object arg0) { InspectableObject otherInsp = (InspectableObject) arg0; return ObjectInspectorUtils.compare(o, oi, otherInsp.o, otherInsp.oi); } @Override public boolean equals(Object other) { return compareTo(other) == 0; } } @Override public Object evaluate(DeferredObject[] arg0) throws HiveException { Set<InspectableObject> objects = new TreeSet<InspectableObject>(); for (int i = 0; i < arg0.length; ++i) { Object undeferred = arg0[i].get(); for (int j = 0; j < listInspectorArr[i].getListLength(undeferred); ++j) { Object nonStd = listInspectorArr[i].getListElement(undeferred, j); InspectableObject stdInsp = new InspectableObject(nonStd, listInspectorArr[i].getListElementObjectInspector()); objects.add(stdInsp); } } List retVal = (List) retValInspector.create(0); for (Object io : objects) { InspectableObject inspObj = (InspectableObject) io; Object stdObj = ObjectInspectorUtils.copyToStandardObject(inspObj.o, inspObj.oi); retVal.add(stdObj); } return retVal; } @Override public String getDisplayString(String[] arg0) { return "array_union(" + arg0[0] + ", " + arg0[1] + " )"; } @Override public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException { if (arg0.length < 2) { throw new UDFArgumentException(" Expecting at least two arrays as arguments "); } ObjectInspector first = arg0[0]; listInspectorArr = new ListObjectInspector[arg0.length]; if (first.getCategory() == Category.LIST) { listInspectorArr[0] = (ListObjectInspector) first; } else { throw new UDFArgumentException(" Expecting an array as first argument "); } for (int i = 1; i < arg0.length; ++i) { if (arg0[i].getCategory() != Category.LIST) { throw new UDFArgumentException(" Expecting arrays arguments "); } ListObjectInspector checkInspector = (ListObjectInspector) arg0[i]; if (!ObjectInspectorUtils.compareTypes(listInspectorArr[0].getListElementObjectInspector(), checkInspector.getListElementObjectInspector())) { throw new UDFArgumentException(" Array types must match " + listInspectorArr[0].getTypeName() + " != " + checkInspector.getTypeName()); } listInspectorArr[i] = checkInspector; } retValInspector = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(first); return retValInspector; } }