/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.udf.generic; import java.io.Closeable; import java.io.IOException; import java.sql.Timestamp; import java.text.ParseException; import java.util.Date; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hive.common.util.DateUtils; /** * A Generic User-defined function (GenericUDF) for the use with Hive. * * New GenericUDF classes need to inherit from this GenericUDF class. * * The GenericUDF are superior to normal UDFs in the following ways: 1. It can * accept arguments of complex types, and return complex types. 2. It can accept * variable length of arguments. 3. It can accept an infinite number of function * signature - for example, it's easy to write a GenericUDF that accepts * array<int>, array<array<int>> and so on (arbitrary levels of nesting). 4. It * can do short-circuit evaluations using DeferedObject. */ @UDFType(deterministic = true) public abstract class GenericUDF implements Closeable { private static final String[] ORDINAL_SUFFIXES = new String[] { "th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th" }; /** * A Defered Object allows us to do lazy-evaluation and short-circuiting. * GenericUDF use DeferedObject to pass arguments. */ public static interface DeferredObject { void prepare(int version) throws HiveException; Object get() throws HiveException; }; /** * A basic dummy implementation of DeferredObject which just stores a Java * Object reference. */ public static class DeferredJavaObject implements DeferredObject { private final Object value; public DeferredJavaObject(Object value) { this.value = value; } @Override public void prepare(int version) throws HiveException { } @Override public Object get() throws HiveException { return value; } } /** * The constructor. */ public GenericUDF() { } /** * Initialize this GenericUDF. This will be called once and only once per * GenericUDF instance. * * @param arguments * The ObjectInspector for the arguments * @throws UDFArgumentException * Thrown when arguments have wrong types, wrong length, etc. * @return The ObjectInspector for the return value */ public abstract ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException; /** * Additionally setup GenericUDF with MapredContext before initializing. * This is only called in runtime of MapRedTask. * * @param context context */ public void configure(MapredContext context) { } /** * Initialize this GenericUDF. Additionally, if the arguments are constant * and the function is eligible to be folded, then the constant value * returned by this UDF will be computed and stored in the * ConstantObjectInspector returned. Otherwise, the function behaves exactly * like initialize(). */ public ObjectInspector initializeAndFoldConstants(ObjectInspector[] arguments) throws UDFArgumentException { ObjectInspector oi = initialize(arguments); // If the UDF depends on any external resources, we can't fold because the // resources may not be available at compile time. if (getRequiredFiles() != null || getRequiredJars() != null) { return oi; } boolean allConstant = true; for (int ii = 0; ii < arguments.length; ++ii) { if (!ObjectInspectorUtils.isConstantObjectInspector(arguments[ii])) { allConstant = false; break; } } if (allConstant && !ObjectInspectorUtils.isConstantObjectInspector(oi) && FunctionRegistry.isDeterministic(this) && !FunctionRegistry.isStateful(this) && ObjectInspectorUtils.supportsConstantObjectInspector(oi)) { DeferredObject[] argumentValues = new DeferredJavaObject[arguments.length]; for (int ii = 0; ii < arguments.length; ++ii) { argumentValues[ii] = new DeferredJavaObject( ((ConstantObjectInspector)arguments[ii]).getWritableConstantValue()); } try { Object constantValue = evaluate(argumentValues); oi = ObjectInspectorUtils.getConstantObjectInspector(oi, constantValue); } catch (HiveException e) { throw new UDFArgumentException(e); } } return oi; } /** * The following two functions can be overridden to automatically include * additional resources required by this UDF. The return types should be * arrays of paths. */ public String[] getRequiredJars() { return null; } public String[] getRequiredFiles() { return null; } /** * Evaluate the GenericUDF with the arguments. * * @param arguments * The arguments as DeferedObject, use DeferedObject.get() to get the * actual argument Object. The Objects can be inspected by the * ObjectInspectors passed in the initialize call. * @return The */ public abstract Object evaluate(DeferredObject[] arguments) throws HiveException; /** * Get the String to be displayed in explain. */ public abstract String getDisplayString(String[] children); /** * Close GenericUDF. * This is only called in runtime of MapRedTask. */ @Override public void close() throws IOException { } /** * Some functions like comparisons may be affected by appearing order of arguments. * This is to convert a function, such as 3 > x to x < 3. The flip function of * GenericUDFOPGreaterThan is GenericUDFOPLessThan. */ public GenericUDF flip() { return this; } /** * Gets the negative function of the current one. E.g., GenericUDFOPNotEqual for * GenericUDFOPEqual, or GenericUDFOPNull for GenericUDFOPNotNull. * @return Negative function * @throws UDFArgumentException */ public GenericUDF negative() { throw new UnsupportedOperationException("Negative function doesn't exist for " + getFuncName()); } public String getUdfName() { return getClass().getName(); } /** * Some information may be set during initialize() which needs to be saved when the UDF is copied. * This will be called by FunctionRegistry.cloneGenericUDF() */ public void copyToNewInstance(Object newInstance) throws UDFArgumentException { // newInstance should always be the same type of object as this if (this.getClass() != newInstance.getClass()) { throw new UDFArgumentException("Invalid copy between " + this.getClass().getName() + " and " + newInstance.getClass().getName()); } } protected String getStandardDisplayString(String name, String[] children) { return getStandardDisplayString(name, children, ", "); } protected String getStandardDisplayString(String name, String[] children, String delim) { StringBuilder sb = new StringBuilder(); sb.append(name); sb.append("("); if (children.length > 0) { sb.append(children[0]); for (int i = 1; i < children.length; i++) { sb.append(delim); sb.append(children[i]); } } sb.append(")"); return sb.toString(); } protected String getFuncName() { return getClass().getSimpleName().substring(10).toLowerCase(); } protected void checkArgsSize(ObjectInspector[] arguments, int min, int max) throws UDFArgumentLengthException { if (arguments.length < min || arguments.length > max) { StringBuilder sb = new StringBuilder(); sb.append(getFuncName()); sb.append(" requires "); if (min == max) { sb.append(min); } else { sb.append(min).append("..").append(max); } sb.append(" argument(s), got "); sb.append(arguments.length); throw new UDFArgumentLengthException(sb.toString()); } } protected void checkArgPrimitive(ObjectInspector[] arguments, int i) throws UDFArgumentTypeException { ObjectInspector.Category oiCat = arguments[i].getCategory(); if (oiCat != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, getFuncName() + " only takes primitive types as " + getArgOrder(i) + " argument, got " + oiCat); } } protected void checkArgGroups(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, PrimitiveGrouping... grps) throws UDFArgumentTypeException { PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[i]).getPrimitiveCategory(); for (PrimitiveGrouping grp : grps) { if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputType) == grp) { inputTypes[i] = inputType; return; } } // build error message StringBuilder sb = new StringBuilder(); sb.append(getFuncName()); sb.append(" only takes "); sb.append(grps[0]); for (int j = 1; j < grps.length; j++) { sb.append(", "); sb.append(grps[j]); } sb.append(" types as "); sb.append(getArgOrder(i)); sb.append(" argument, got "); sb.append(inputType); throw new UDFArgumentTypeException(i, sb.toString()); } protected void obtainStringConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); Converter converter = ObjectInspectorConverters.getConverter( arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); converters[i] = converter; inputTypes[i] = inputType; } protected void obtainIntConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); switch (inputType) { case BYTE: case SHORT: case INT: case VOID: break; default: throw new UDFArgumentTypeException(i, getFuncName() + " only takes INT/SHORT/BYTE types as " + getArgOrder(i) + " argument, got " + inputType); } Converter converter = ObjectInspectorConverters.getConverter( arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); converters[i] = converter; inputTypes[i] = inputType; } protected void obtainLongConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); switch (inputType) { case BYTE: case SHORT: case INT: case LONG: case VOID: break; default: throw new UDFArgumentTypeException(i, getFuncName() + " only takes LONG/INT/SHORT/BYTE types as " + getArgOrder(i) + " argument, got " + inputType); } Converter converter = ObjectInspectorConverters.getConverter( arguments[i], PrimitiveObjectInspectorFactory.writableLongObjectInspector); converters[i] = converter; inputTypes[i] = inputType; } protected void obtainDoubleConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); Converter converter = ObjectInspectorConverters.getConverter( arguments[i], PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); converters[i] = converter; inputTypes[i] = inputType; } protected void obtainDateConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); ObjectInspector outOi; switch (inputType) { case STRING: case VARCHAR: case CHAR: outOi = PrimitiveObjectInspectorFactory.writableStringObjectInspector; break; case TIMESTAMP: case DATE: case VOID: case TIMESTAMPTZ: outOi = PrimitiveObjectInspectorFactory.writableDateObjectInspector; break; default: throw new UDFArgumentTypeException(i, getFuncName() + " only takes STRING_GROUP or DATE_GROUP types as " + getArgOrder(i) + " argument, got " + inputType); } converters[i] = ObjectInspectorConverters.getConverter(inOi, outOi); inputTypes[i] = inputType; } protected void obtainTimestampConverter(ObjectInspector[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws UDFArgumentTypeException { PrimitiveObjectInspector inOi = (PrimitiveObjectInspector) arguments[i]; PrimitiveCategory inputType = inOi.getPrimitiveCategory(); ObjectInspector outOi; switch (inputType) { case STRING: case VARCHAR: case CHAR: case TIMESTAMP: case DATE: case TIMESTAMPTZ: break; default: throw new UDFArgumentTypeException(i, getFuncName() + " only takes STRING_GROUP or DATE_GROUP types as " + getArgOrder(i) + " argument, got " + inputType); } outOi = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; converters[i] = ObjectInspectorConverters.getConverter(inOi, outOi); inputTypes[i] = inputType; } protected String getStringValue(DeferredObject[] arguments, int i, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } return converters[i].convert(obj).toString(); } protected Integer getIntValue(DeferredObject[] arguments, int i, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } Object writableValue = converters[i].convert(obj); int v = ((IntWritable) writableValue).get(); return v; } protected Long getLongValue(DeferredObject[] arguments, int i, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } Object writableValue = converters[i].convert(obj); long v = ((LongWritable) writableValue).get(); return v; } protected Double getDoubleValue(DeferredObject[] arguments, int i, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } Object writableValue = converters[i].convert(obj); double v = ((DoubleWritable) writableValue).get(); return v; } protected Date getDateValue(DeferredObject[] arguments, int i, PrimitiveCategory[] inputTypes, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } Date date; switch (inputTypes[i]) { case STRING: case VARCHAR: case CHAR: String dateStr = converters[i].convert(obj).toString(); try { date = DateUtils.getDateFormat().parse(dateStr); } catch (ParseException e) { return null; } break; case TIMESTAMP: case DATE: case TIMESTAMPTZ: Object writableValue = converters[i].convert(obj); date = ((DateWritable) writableValue).get(); break; default: throw new UDFArgumentTypeException(0, getFuncName() + " only takes STRING_GROUP and DATE_GROUP types, got " + inputTypes[i]); } return date; } protected Timestamp getTimestampValue(DeferredObject[] arguments, int i, Converter[] converters) throws HiveException { Object obj; if ((obj = arguments[i].get()) == null) { return null; } Object writableValue = converters[i].convert(obj); // if string can not be parsed converter will return null if (writableValue == null) { return null; } Timestamp ts = ((TimestampWritable) writableValue).getTimestamp(); return ts; } protected String getConstantStringValue(ObjectInspector[] arguments, int i) { Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue(); String str = constValue == null ? null : constValue.toString(); return str; } protected Boolean getConstantBooleanValue(ObjectInspector[] arguments, int i) throws UDFArgumentTypeException { Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue(); if (constValue == null) { return false; } if (constValue instanceof BooleanWritable) { return ((BooleanWritable) constValue).get(); } else { throw new UDFArgumentTypeException(i, getFuncName() + " only takes BOOLEAN types as " + getArgOrder(i) + " argument, got " + constValue.getClass()); } } protected Integer getConstantIntValue(ObjectInspector[] arguments, int i) throws UDFArgumentTypeException { Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue(); if (constValue == null) { return null; } int v; if (constValue instanceof IntWritable) { v = ((IntWritable) constValue).get(); } else if (constValue instanceof ShortWritable) { v = ((ShortWritable) constValue).get(); } else if (constValue instanceof ByteWritable) { v = ((ByteWritable) constValue).get(); } else { throw new UDFArgumentTypeException(i, getFuncName() + " only takes INT/SHORT/BYTE types as " + getArgOrder(i) + " argument, got " + constValue.getClass()); } return v; } protected String getArgOrder(int i) { i++; switch (i % 100) { case 11: case 12: case 13: return i + "th"; default: return i + ORDINAL_SUFFIXES[i % 10]; } } }