package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Compute the probability of an observation of a number of occurrances of
* at least k given an expected rate of occurrance r. In other words, evaluate
* the integral of the the Poisson distribution with lambda=r of points <= k.
*
* The first paramater, k, is the value you want to evaluate the likelihood
* of; the second paramater, r, is the expected rate. PPOIS(k, r) is
* pronounced, "p pois of k given r."
*
* k is always an integer (or you may be misunderstanding this function). r
* is potentially a double. For example, "If we usually get r=10.5 apples off of
* this tree per week, and this week we got k=15, is that a lot?"
* 1-FB_PPOIS(14, 10.5) says it is: Only 11% of the time will you get 15 or more.
*/
@Description(name = "ppois",
value = "_FUNC_(k, r) - Evaluate the likelihood of observing k given expected rate r")
public class UDFPpois extends UDF {
public Double evaluate(Integer k, Double r) {
if (k == null || r == null || k < 0 || r <= 0.0) {
return null;
}
Double result = Math.exp(-r); // result when k = 0
if (k == 0) {
return result;
}
Double logSum = 0.0; // Math.log(1)
Double logR = Math.log(r);
for (int i = 1; i <= k; i++) {
logSum = logSum + Math.log(i);
result = result + Math.exp( i * logR - r - logSum );
}
return result;
}
}