package org.araqne.logdb.query.aggregator; import java.util.List; import org.araqne.logdb.Row; import org.araqne.logdb.query.command.NumberUtil; import org.araqne.logdb.query.expr.Expression; public class Variance extends AbstractAggregationFunction { private Double m; private Double s2; private int c; public Variance(List<Expression> exprs) { super(exprs); } @Override public String getName() { return "var"; } @Override public void apply(Row map) { Expression expr = exprs.get(0); Object obj = expr.eval(map); if (obj == null || !(obj instanceof Number)) return; c++; Number delta = NumberUtil.sub(obj, m); m = NumberUtil.add(m, delta.doubleValue() / c).doubleValue(); s2 = NumberUtil.add(s2, NumberUtil.mul(delta, NumberUtil.sub(obj, m))).doubleValue(); } @Override public Object eval() { if (s2 == null) return null; if (c < 2) return (double) 0.0; else return s2 / c; } @Override public void clean() { m = null; s2 = null; c = 0; } @Override public AggregationFunction clone() { Variance f = new Variance(exprs); f.m = m; f.s2 = s2; f.c = c; return f; } @Override public Object[] serialize() { Object[] l = new Object[3]; l[0] = m; l[1] = s2; l[2] = c; return l; } @Override public void deserialize(Object[] values) { this.m = (Double) values[0]; this.s2 = (Double) values[1]; this.c = (Integer) values[2]; } @Override public void merge(AggregationFunction func) { // d should not be null here (do not allow null merge set) Variance other = (Variance) func; if (this.s2 == null) { this.s2 = other.s2; this.m = other.m; this.c = other.c; } else { Variance v1 = this; Variance v2 = other; // method 1: trivial method // double nm = (v1.m * v1.c + v2.m * v2.c) / (v1.c + v2.c); // double nv = // ((v1.s2 + v1.m * v1.m) * v1.c + (v2.s2 + v2.m * v2.m) * v2.c) / (v1.c + v2.c) - newMean * // newMean; // double ns2 = nv * (v1.c + v2.c); // // method 2: http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html double nm = (v1.m * v1.c + v2.m * v2.c) / (v1.c + v2.c); double nv = (v1.c * (v1.s2 / v1.c + Math.pow(v1.m - nm, 2)) + v2.c * (v2.s2 / v2.c + Math.pow(v2.m - nm, 2))) / (v1.c + v2.c); this.c = v1.c + v2.c; this.m = nm; this.s2 = nv * this.c; } } @Override public String toString() { return "var(" + exprs.get(0) + ")"; } @Override public boolean canBeDistributed() { return true; } @Override public AggregationFunction mapper(List<Expression> exprs) { return new VarianceMapper(exprs); } @Override public AggregationFunction reducer(List<Expression> exprs) { return new VarianceReducer(exprs); } public static class VarianceMapper extends AbstractAggregationFunction { private Double m; private Double s2; private int c; public VarianceMapper(List<Expression> exprs) { super(exprs); } @Override public String getName() { return "varMapper"; } @Override public Object eval() { Object[] l = new Object[3]; l[0] = m; l[1] = s2; l[2] = c; return l; } @Override public boolean canBeDistributed() { return false; } @Override public AggregationFunction mapper(List<Expression> exprs) { return null; } @Override public AggregationFunction reducer(List<Expression> exprs) { return null; } @Override public String toString() { return "varMapper(" + exprs.get(0) + ")"; } @Override public void apply(Row map) { Expression expr = exprs.get(0); Object obj = expr.eval(map); if (obj == null || !(obj instanceof Number)) return; c++; Number delta = NumberUtil.sub(obj, m); m = NumberUtil.add(m, delta.doubleValue() / c).doubleValue(); s2 = NumberUtil.add(s2, NumberUtil.mul(delta, NumberUtil.sub(obj, m))).doubleValue(); } @Override public void merge(AggregationFunction func) { // d should not be null here (do not allow null merge set) VarianceMapper other = (VarianceMapper) func; if (this.s2 == null) { this.s2 = other.s2; this.m = other.m; this.c = other.c; } else { VarianceMapper v1 = this; VarianceMapper v2 = other; // method 1: trivial method // double nm = (v1.m * v1.c + v2.m * v2.c) / (v1.c + v2.c); // double nv = // ((v1.s2 + v1.m * v1.m) * v1.c + (v2.s2 + v2.m * v2.m) * v2.c) / (v1.c + v2.c) - newMean * // newMean; // double ns2 = nv * (v1.c + v2.c); // // method 2: http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html double nm = (v1.m * v1.c + v2.m * v2.c) / (v1.c + v2.c); double nv = (v1.c * (v1.s2 / v1.c + Math.pow(v1.m - nm, 2)) + v2.c * (v2.s2 / v2.c + Math.pow(v2.m - nm, 2))) / (v1.c + v2.c); this.c = v1.c + v2.c; this.m = nm; this.s2 = nv * this.c; } } public Object[] serialize() { Object[] l = new Object[3]; l[0] = m; l[1] = s2; l[2] = c; return l; } @Override public void deserialize(Object[] values) { this.m = (Double) values[0]; this.s2 = (Double) values[1]; this.c = (Integer) values[2]; } @Override public void clean() { m = null; s2 = null; c = 0; } @Override public AggregationFunction clone() { VarianceMapper f = new VarianceMapper(exprs); f.m = m; f.s2 = s2; f.c = c; return f; } } public static class VarianceReducer extends AbstractAggregationFunction { private Double m; private Double s2; private int c; public VarianceReducer(List<Expression> exprs) { super(exprs); } @Override public String getName() { return "varReducer"; } @Override public void apply(Row map) { Expression expr = exprs.get(0); Object obj = expr.eval(map); if (obj == null || !(obj instanceof Object[])) return; Object[] values = (Object[]) obj; Double m = (Double) values[0]; Double s2 = (Double) values[1]; Integer c = (Integer) values[2]; if (this.s2 == null) { this.s2 = s2; this.m = m; this.c = c; } else { // method 2: http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html double nm = (this.m * this.c + m * c) / (this.c + c); double nv = (this.c * (this.s2 / this.c + Math.pow(this.m - nm, 2)) + c * (s2 / c + Math.pow(m - nm, 2))) / (this.c + c); this.c = this.c + c; this.m = nm; this.s2 = nv * this.c; } } @Override public Object eval() { if (s2 == null) return null; if (c < 2) return (double) 0.0; else return s2 / c; } @Override public void merge(AggregationFunction func) { VarianceReducer other = (VarianceReducer) func; if (this.s2 == null) { this.s2 = other.s2; this.m = other.m; this.c = other.c; } else { VarianceReducer v1 = this; VarianceReducer v2 = other; // method 2: http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html double nm = (v1.m * v1.c + v2.m * v2.c) / (v1.c + v2.c); double nv = (v1.c * (v1.s2 / v1.c + Math.pow(v1.m - nm, 2)) + v2.c * (v2.s2 / v2.c + Math.pow(v2.m - nm, 2))) / (v1.c + v2.c); this.c = v1.c + v2.c; this.m = nm; this.s2 = nv * this.c; } } @Override public Object[] serialize() { Object[] l = new Object[3]; l[0] = m; l[1] = s2; l[2] = c; return l; } @Override public void deserialize(Object[] values) { this.m = (Double) values[0]; this.s2 = (Double) values[1]; this.c = (Integer) values[2]; } @Override public void clean() { m = null; s2 = null; c = 0; } @Override public AggregationFunction clone() { VarianceReducer result = new VarianceReducer(super.exprs); result.m = this.m; result.s2 = this.s2; result.c = this.c; return result; } @Override public boolean canBeDistributed() { return false; } @Override public AggregationFunction mapper(List<Expression> exprs) { return null; } @Override public AggregationFunction reducer(List<Expression> exprs) { return null; } @Override public String toString() { return "varReducer(" + exprs.get(0) + ")"; } } }