package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* Compute the SUM of row items for which a condition is true;
* "true" means "not false and also not null."
*
* This sounds like it is the same as SUM(col) with a WHERE or GROUP BY, but
* it allows for multiple columns to be tracked separately within a single
* aggregation. This is faster and cleaner than
* SUM(col * CAST(example = foo AS INT)). Also very useful for computing percentages,
* for example SUM_WHERE(col_x, thing='blah') / SUM(col_x).
*
* If either the conditional or the item to sum is null, we return null
* because we don't know what the sum is. This seems extreme, and is
* slightly inconsistent with SUM, but it can be easily escaped by adding
* the appropriate IS NULL clause into the conditional.
*/
public final class UDAFSumWhere extends UDAF {
public static class UDAFCountWhereEvaluator implements UDAFEvaluator {
Double s = null;
public UDAFCountWhereEvaluator() {
super();
init();
}
public void init() {
s = null;
}
public boolean iterate(Double item, Boolean ThisBool) {
if (item != null && ThisBool != null && ThisBool) {
if (s == null) {
s = item;
} else {
s += item;
}
}
return true;
}
public Double terminatePartial() {
return s;
}
public boolean merge(Double s2) {
if (s2 != null) {
if (s == null) {
s = s2;
} else {
s += s2;
}
}
return true;
}
public Double terminate() {
return s;
}
}
}