package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Randomly sample an index with weight proportional to arguments. If any of
* the weights are NULL, negative, infinite, or not numerically interpretable
* then NULL is returned. The argument should be a series of doubles. If N
* denotes the number of arguments, v[i] denotes the ith argument and V denotes
* \sum_i v[i] then this function returns a sample from 0 to N-1 inclusive
* where j has probability v[j] / V of being chosen.
*/
@Description(name = "choose",
value = "_FUNC_(v1, v2, ...) - Randomly samples an element " +
"from a 0-indexed index with weight proportional to" +
"arguments.")
public class UDFChoose extends UDF {
public Integer evaluate(Double... vals) {
double sum = 0.0;
for (int ii = 0; ii < vals.length; ++ii) {
Double v = vals[ii];
if (v == null || v < 0 || v.isNaN() || v.isInfinite()) {
return null;
}
sum += vals[ii];
}
double r = Math.random() * sum ;
for (int ii = 0; ii < vals.length; ++ii) {
if (r < vals[ii]) {
return Integer.valueOf(ii);
}
r -= vals[ii] ;
}
// In case of floating point precision issues, return index.
return Integer.valueOf(vals.length - 1);
}
}