package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
/**
* PREV returns the value of the argument from the previous row and
* NULL for the first row. For example,
*
* hive> SELECT sepal_width, FB_PREV(sepal_width) FROM jonchang_iris LIMIT 5;
* 2.9 NULL
* 3.1 2.9
* 3.4 3.1
* 3.5 3.4
* 3.1 3.5
*
* Note that each instance of this UDF maintains its own state, i.e.,
* PREV will not return previous rows if they span different mappers.
* One application of PREV is "sessionization", that is, breaking up a
* list of events into sessions such that the time difference between
* any two events is less than some threshold. E.g.,
*
* hive> SELECT user,
* IF(time - FB_PREV(time) > THRESHOLD OR
* user <> FB_PREV(user) OR
* FB_PREV(time) IS NULL, 1, 0)
* AS new_session_marker
* FROM (
* SELECT user, time
* FROM events
* DISTRIBUTE BY user
* SORT BY user, time
* ) A
*
* The UDF CUMSUM can be used on new_session_marker to get a unique id
* for each session.
*/
@Description(name = "udfprev",
value = "_FUNC_(x) - Returns the value of x on the previous" +
"row (and NULL on the first row).")
public class UDFPrev extends GenericUDF {
Object previous;
ObjectInspector oi;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
previous = null;
oi = arguments[0];
return ObjectInspectorUtils.getStandardObjectInspector(oi);
}
@Override
public Object evaluate(DeferredObject[] arguments)
throws HiveException {
Object retval = previous;
previous = ObjectInspectorUtils.copyToStandardObject(arguments[0].get(),
oi);
return retval;
}
@Override
public String getDisplayString(String[] children) {
return new String();
}
}