/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.util; import com.addthis.codec.annotations.FieldConfig; import com.addthis.codec.codables.SuperCodable; import com.yammer.metrics.stats.CodableUniformSample; import com.yammer.metrics.stats.Snapshot; import static java.lang.Math.sqrt; /** * Maintains a percentile distribution for a given key. * <p/> * This code heavily borrows heavily from CodaHale's Histogram in the metrics project. * <p/> * See https://github.com/codahale/metrics/blob/master/metrics-core/src/main/java/com/yammer/metrics/core/Histogram.java * <p/> * for the original source. The main difference is that this version uses many codable * fields so that we can persist and re-hydrate the object using Codec. * <p/> * Note: Currently this class only supports uniform sampling. The value input should be a long */ public class KeyPercentileDistribution implements SuperCodable { @FieldConfig(codable = true) private long min = Long.MAX_VALUE; @FieldConfig(codable = true) private long max = Long.MIN_VALUE; @FieldConfig(codable = true) private long sum = 0; // redundant with the count stored in sample @FieldConfig(codable = true) private long count = 0; @FieldConfig(codable = true) private CodableUniformSample sample; // these three fields aren't really used @FieldConfig(codable = true) private double[] arrayCacheValue; @FieldConfig(codable = true) private double[] varianceValues; @FieldConfig(codable = true) private int sampleSize; // These are for the Welford algorithm for calculating running variance // without floating-point doom. private transient double m; private transient double s; // for CodecBin2 public KeyPercentileDistribution() {} public KeyPercentileDistribution(int sampleSize) { this.sampleSize = sampleSize; } public KeyPercentileDistribution init() { sample = new CodableUniformSample().init(sampleSize); return this; } @Override public void postDecode() { if (sample == null) { sample = new CodableUniformSample().init(sampleSize); } // these fields aren't helpful, so null them out to reduce their cost arrayCacheValue = null; varianceValues = null; } @Override public void preEncode() {} /** Adds a recorded value. */ public void update(int value) { update((long) value); } /** Adds a recorded value. */ public void update(long value) { count += 1; sample.update(value); setMax(value); setMin(value); sum += value; updateVariance(value); } /** Returns the number of values recorded. */ public long count() { return count; } /* (non-Javadoc) * @see com.yammer.metrics.core.Summarizable#max() */ public long max() { if (count() > 0) { return max; } return 0; } /* (non-Javadoc) * @see com.yammer.metrics.core.Summarizable#min() */ public long min() { if (count() > 0) { return min; } return 0; } /* (non-Javadoc) * @see com.yammer.metrics.core.Summarizable#mean() */ public double mean() { if (count() > 0) { return (double) (sum / count()); } return 0; } /* (non-Javadoc) * @see com.yammer.metrics.core.Summarizable#stdDev() */ public double stdDev() { if (count() > 0) { return sqrt(variance()); } return 0.0; } public Snapshot getSnapshot() { return sample.getSnapshot(); } private double variance() { if (count() <= 1) { return 0.0; } return s / (double) (count() - 1); } private void setMax(long potentialMax) { max = Math.max(max, potentialMax); } private void setMin(long potentialMin) { min = Math.min(min, potentialMin); } private void updateVariance(long value) { final double oldM = m; final double oldS = s; if (oldM == -1) { m = value; s = 0; } else { final double newM = oldM + ((value - oldM) / count()); final double newS = oldS + ((value - oldM) * (value - newM)); m = newM; s = newS; } } }