AdaptiveHistogram.java example

Explorer
jagger-master
/*
 * Copyright (c) 2010-2012 Grid Dynamics Consulting Services, Inc, All Rights Reserved
 * http://www.griddynamics.com
 *
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the Apache License; either
 * version 2.0 of the License, or any later version.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.griddynamics.jagger.util.statistics.percentiles;

import java.util.ArrayList;

// This class is a derivation of hist4j https://github.com/flaptor/hist4j

/**
 * This class implements a histogram that adapts to an unknown data distribution.
 * It keeps a more or less constant resolution throughout the data range by increasing
 * the resolution where the data is more dense.  For example, if the data has such
 * such a distribution that most of the values lie in the 0-5 range and only a few are
 * in the 5-10 range, the histogram would adapt and assign more counting buckets to
 * the 0-5 range and less to the 5-10 range.
 * This implementation provides a method to obtain the accumulative density function
 * for a given data point, and a method to obtain the data point that splits the
 * data set at a given percentile.
 * @author Jorge Handl
 */
public class AdaptiveHistogram {

    private long totalCount;     // total number of data points
    private HistogramNode root;  // root of the tree

    /**
     * Class constructor.
     */
    public AdaptiveHistogram() {
        root = null;
        reset();
    }

    /**
     * Erases all data from the histogram.
     */
    public void reset() {
        if (null != root) {
            root.reset();
            root = null;
        }
        totalCount = 0;
    }

    /**
     * Adds a data point to the histogram.
     * @param value the data point to add.
     */
    public void addValue(double value) {
        totalCount++;
        if (null == root) {
            root = new HistogramDataNode();
        }
        root = root.addValue(this, value);
    }

    /**
     * Returns the number of data points stored in the same bucket as a given value.
     * @param value the reference data point.
     * @return the number of data points stored in the same bucket as the reference point.
     */
    public long getCount(double value) {
        long count = 0;
        if (null != root) {
            count = root.getCount(value);
        }
        return count;
    }

    /**
     * Returns the cumulative density function for a given data point.
     * @param value the reference data point.
     * @return the cumulative density function for the reference point.
     */
    public long getAccumCount(double value) {
        long count = 0;
        if (null != root) {
            count = root.getAccumCount(value);
        }
        return count;
    }

    /**
     * Returns the data point that splits the data set at a given percentile.
     * @param percentile the percentile at which the data set is split.
     * @return the data point that splits the data set at the given percentile.
     */
    public Double getValueForPercentile(double percentile) {
        long targetAccumCount = (long)(totalCount * percentile/100);
        double value = 0;
        if (null != root) {
            value = root.getValueForAccumCount(new long[]{0, targetAccumCount});
        }
        return value;
    }

    /**
     * This method is used by the internal data structure of the histogram to get the
     * limit of data points that should be counted at one bucket.
     * @return the limit of data points to store a one bucket.
     */
    protected int getCountPerNodeLimit() {
        int limit = (int) (totalCount / 10);
        if (0 == limit) {
            limit = 1;
        }
        return limit;
    }

    /**
     * Auxiliary interface for inline functor object.
     */
    protected interface ValueConversion {
        /**
         * This method should implement the conversion function.
         * @param value the input value.
         * @return the resulting converted value.
         */
        double convertValue(double value);
    }

    /**
     * Return a table representing the data in this histogram.
     * Each element is a table cell containing the range limit values and the count for that range.
     */
    public ArrayList<Cell> toTable() {
        ArrayList<Cell> table = new ArrayList<Cell>();
        if (null != root) {
            root.toTable(table);
        }
        return table;
    }

}