KeyPercentileDistribution.java example

Explorer
hydra-master
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.data.util;

import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.codables.SuperCodable;
import com.yammer.metrics.stats.CodableUniformSample;
import com.yammer.metrics.stats.Snapshot;

import static java.lang.Math.sqrt;

/**
 * Maintains a percentile distribution for a given key.
 * <p/>
 * This code heavily borrows heavily from CodaHale's Histogram in the metrics project.
 * <p/>
 * See https://github.com/codahale/metrics/blob/master/metrics-core/src/main/java/com/yammer/metrics/core/Histogram.java
 * <p/>
 * for the original source.  The main difference is that this version uses many codable
 * fields so that we can persist and re-hydrate the object using Codec.
 * <p/>
 * Note:  Currently this class only supports uniform sampling.  The value input should be a long
 */
public class KeyPercentileDistribution implements SuperCodable {

    @FieldConfig(codable = true)
    private long min = Long.MAX_VALUE;
    @FieldConfig(codable = true)
    private long max = Long.MIN_VALUE;
    @FieldConfig(codable = true)
    private long sum = 0;

    // redundant with the count stored in sample
    @FieldConfig(codable = true)
    private long count = 0;

    @FieldConfig(codable = true)
    private CodableUniformSample sample;

    // these three fields aren't really used
    @FieldConfig(codable = true)
    private double[] arrayCacheValue;
    @FieldConfig(codable = true)
    private double[] varianceValues;
    @FieldConfig(codable = true)
    private int sampleSize;

    // These are for the Welford algorithm for calculating running variance
    // without floating-point doom.
    private transient double m;
    private transient double s;

    // for CodecBin2
    public KeyPercentileDistribution() {}

    public KeyPercentileDistribution(int sampleSize) {
        this.sampleSize = sampleSize;
    }

    public KeyPercentileDistribution init() {
        sample = new CodableUniformSample().init(sampleSize);
        return this;
    }

    @Override
    public void postDecode() {
        if (sample == null) {
            sample = new CodableUniformSample().init(sampleSize);
        }
        // these fields aren't helpful, so null them out to reduce their cost
        arrayCacheValue = null;
        varianceValues = null;
    }

    @Override public void preEncode() {}

    /** Adds a recorded value. */
    public void update(int value) {
        update((long) value);
    }

    /** Adds a recorded value. */
    public void update(long value) {
        count += 1;
        sample.update(value);
        setMax(value);
        setMin(value);
        sum += value;
        updateVariance(value);
    }

    /** Returns the number of values recorded. */
    public long count() {
        return count;
    }

    /* (non-Javadoc)
    * @see com.yammer.metrics.core.Summarizable#max()
    */
    public long max() {
        if (count() > 0) {
            return max;
        }
        return 0;
    }

    /* (non-Javadoc)
    * @see com.yammer.metrics.core.Summarizable#min()
    */
    public long min() {
        if (count() > 0) {
            return min;
        }
        return 0;
    }

    /* (non-Javadoc)
    * @see com.yammer.metrics.core.Summarizable#mean()
    */
    public double mean() {
        if (count() > 0) {
            return (double) (sum / count());
        }
        return 0;
    }

    /* (non-Javadoc)
    * @see com.yammer.metrics.core.Summarizable#stdDev()
    */
    public double stdDev() {
        if (count() > 0) {
            return sqrt(variance());
        }
        return 0.0;
    }

    public Snapshot getSnapshot() {
        return sample.getSnapshot();
    }

    private double variance() {
        if (count() <= 1) {
            return 0.0;
        }
        return s / (double) (count() - 1);
    }

    private void setMax(long potentialMax) {
        max = Math.max(max, potentialMax);
    }

    private void setMin(long potentialMin) {
        min = Math.min(min, potentialMin);
    }

    private void updateVariance(long value) {
        final double oldM = m;
        final double oldS = s;
        if (oldM == -1) {
            m = value;
            s = 0;
        } else {
            final double newM = oldM + ((value - oldM) / count());
            final double newS = oldS + ((value - oldM) * (value - newM));

            m = newM;
            s = newS;
        }
    }
}