/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; /** * Generic UDF for tuple array sort by desired field[s] with [ordering(ASC or DESC)] * <code>SORT_ARRAY_BY(array(obj1, obj2, obj3...),'f1','f2',..,['ASC','DESC'])</code>. * * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF */ @Description(name = "sort_array_by", value = "_FUNC_(array(obj1, obj2,...),'f1','f2',...,['ASC','DESC']) - " + "Sorts the input tuple array in user specified order(ASC,DESC) by desired field[s] name" + " If sorting order is not mentioned by user then dafault sorting order is ascending", extended = "Example:\n" + " > SELECT _FUNC_(array(struct('g',100),struct('b',200)),'col1','ASC') FROM src LIMIT 1;\n" + " array(struct('b',200),struct('g',100)) ") public class GenericUDFSortArrayByField extends GenericUDF { private transient Converter[] converters; private transient PrimitiveCategory[] inputTypes; /**Output array results*/ private final List<Object> ret = new ArrayList<Object>(); private transient ListObjectInspector listObjectInspector; private transient StructObjectInspector structObjectInspector; /**All sorting fields*/ private transient StructField[] fields; /**Number of fields based on sorting will take place*/ private transient int noOfInputFields; /**All possible ordering constants*/ private enum SORT_ORDER_TYPE { ASC, DESC }; /**default sorting order*/ private transient SORT_ORDER_TYPE sortOrder = SORT_ORDER_TYPE.ASC; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); /**This UDF requires minimum 2 arguments array_name,field name*/ if (arguments.length < 2) { throw new UDFArgumentLengthException("SORT_ARRAY_BY requires minimum 2 arguments, got " + arguments.length); } /**First argument must be array*/ switch (arguments[0].getCategory()) { case LIST: listObjectInspector = (ListObjectInspector) arguments[0]; break; default: throw new UDFArgumentTypeException(0, "Argument 1 of function SORT_ARRAY_BY must be " + serdeConstants.LIST_TYPE_NAME + ", but " + arguments[0].getTypeName() + " was found."); } /**Elements inside first argument(array) must be tuple(s)*/ switch (listObjectInspector.getListElementObjectInspector().getCategory()) { case STRUCT: structObjectInspector = (StructObjectInspector) listObjectInspector.getListElementObjectInspector(); break; default: throw new UDFArgumentTypeException(0, "Element[s] of first argument array in function SORT_ARRAY_BY must be " + serdeConstants.STRUCT_TYPE_NAME + ", but " + listObjectInspector.getTypeName() + " was found."); } /**All sort fields argument name and sort order name must be in String type*/ converters = new Converter[arguments.length]; inputTypes = new PrimitiveCategory[arguments.length]; fields = new StructField[arguments.length - 1]; noOfInputFields = arguments.length - 1; for (int i = 1; i < arguments.length; i++) { checkArgPrimitive(arguments, i); checkArgGroups(arguments, i, inputTypes, PrimitiveGrouping.STRING_GROUP); if (arguments[i] instanceof ConstantObjectInspector) { String fieldName = getConstantStringValue(arguments, i); /**checking whether any sorting order (ASC,DESC) has specified in last argument*/ if (i != 1 && (i == arguments.length - 1) && (fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || fieldName.trim().toUpperCase() .equals(SORT_ORDER_TYPE.DESC.name()))) { sortOrder = SORT_ORDER_TYPE.valueOf(fieldName.trim().toUpperCase()); noOfInputFields -= 1; continue; } fields[i - 1] = structObjectInspector.getStructFieldRef(getConstantStringValue(arguments, i)); } obtainStringConverter(arguments, i, inputTypes, converters); } ObjectInspector returnOI = returnOIResolver.get(structObjectInspector); converters[0] = ObjectInspectorConverters.getConverter(structObjectInspector, returnOI); return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector); } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null) { return null; } /**Except first argument all remaining are field names and [sorting order]*/ /**Add all non constant string tuple fields based on which sorting will happen with sorting ordering information if any.*/ String field = null; /**If sorting order is set in initialize method then we are excluding last argument */ for (int i = 0; i < noOfInputFields && fields[i] == null; i++) { field = getStringValue(arguments, i + 1, converters); if (i != 0 && (i == arguments.length - 2) && (field.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || field.trim().toUpperCase() .equals(SORT_ORDER_TYPE.DESC.name()))) { noOfInputFields -= 1; sortOrder = SORT_ORDER_TYPE.valueOf(field.trim().toUpperCase()); continue; } fields[i] = structObjectInspector.getStructFieldRef(field); } Object array = arguments[0].get(); List<Object> retArray = (List<Object>) listObjectInspector.getList(array); /**Sort the tuple*/ Collections.sort(retArray, new Comparator<Object>() { @Override public int compare(Object object1, Object object2) { int result = 0; /**If multiple fields are mentioned for sorting a record then inside the loop we do will do sorting for each field*/ for (int i = 0; i < noOfInputFields; i++) { Object o1 = structObjectInspector.getStructFieldData(object1, fields[i]); Object o2 = structObjectInspector.getStructFieldData(object2, fields[i]); result = ObjectInspectorUtils.compare(o1, fields[i].getFieldObjectInspector(), o2, fields[i].getFieldObjectInspector()); if (result != 0) { /**Ordering*/ if (sortOrder == SORT_ORDER_TYPE.DESC) { result *= -1; } return result; } } return result; } }); ret.clear(); for (int i = 0; i < retArray.size(); i++) { ret.add(converters[0].convert(retArray.get(i))); } return ret; } @Override public String getDisplayString(String[] children) { return getStandardDisplayString("sort_array_by", children); } }