package com.facebook.hive.udf; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; /** * Compute a COUNT of row items in which a condition is true; * "true" means "not false and also not null." * * This sounds like it is the same as COUNT(1) with a WHERE or GROUP BY, but * it allows for multiple columns to be tracked separately within a single * aggregation. This is faster and cleaner than * SUM(CAST(example = foo AS INT)), and also appropriately returns zero * when the item in question is NULL, unlike COUNT(1) which doesn't know what * is and isn't NULL. */ public final class UDAFCountWhere extends UDAF { /** * The actual class for doing the aggregation. Hive will automatically * look for all internal classes of the UDAF that implements * UDAFEvaluator. */ public static class UDAFCountWhereEvaluator implements UDAFEvaluator { Integer c = 0; public UDAFCountWhereEvaluator() { super(); init(); } /** * Reset the state of the aggregation. */ public void init() { c = 0; } /** * Iterate through one row of original data. * * The number and type of arguments need to the same as we call this UDAF * from Hive command line. * * This function should always return true. */ public boolean iterate(Boolean ThisBool) { if (ThisBool != null && ThisBool) { c++; } return true; } /** * Terminate a partial aggregation and return the state. If the state is a * primitive, just return primitive Java classes like Integer or String. */ public Integer terminatePartial() { return c; } /** * Merge with a partial aggregation. * * This function should always have a single argument which has the same * type as the return value of terminatePartial(). */ public boolean merge(Integer c1) { c += c1; return true; } /** * Terminates the aggregation and return the final result. */ public Integer terminate() { return c; } } }