/* * Copyright (c) 2010-2012 Grid Dynamics Consulting Services, Inc, All Rights Reserved * http://www.griddynamics.com * * This library is free software; you can redistribute it and/or modify it under the terms of * the Apache License; either * version 2.0 of the License, or any later version. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.griddynamics.jagger.util.statistics.percentiles; import java.util.ArrayList; // This class is a derivation of hist4j https://github.com/flaptor/hist4j /** * This class implements a histogram that adapts to an unknown data distribution. * It keeps a more or less constant resolution throughout the data range by increasing * the resolution where the data is more dense. For example, if the data has such * such a distribution that most of the values lie in the 0-5 range and only a few are * in the 5-10 range, the histogram would adapt and assign more counting buckets to * the 0-5 range and less to the 5-10 range. * This implementation provides a method to obtain the accumulative density function * for a given data point, and a method to obtain the data point that splits the * data set at a given percentile. * @author Jorge Handl */ public class AdaptiveHistogram { private long totalCount; // total number of data points private HistogramNode root; // root of the tree /** * Class constructor. */ public AdaptiveHistogram() { root = null; reset(); } /** * Erases all data from the histogram. */ public void reset() { if (null != root) { root.reset(); root = null; } totalCount = 0; } /** * Adds a data point to the histogram. * @param value the data point to add. */ public void addValue(double value) { totalCount++; if (null == root) { root = new HistogramDataNode(); } root = root.addValue(this, value); } /** * Returns the number of data points stored in the same bucket as a given value. * @param value the reference data point. * @return the number of data points stored in the same bucket as the reference point. */ public long getCount(double value) { long count = 0; if (null != root) { count = root.getCount(value); } return count; } /** * Returns the cumulative density function for a given data point. * @param value the reference data point. * @return the cumulative density function for the reference point. */ public long getAccumCount(double value) { long count = 0; if (null != root) { count = root.getAccumCount(value); } return count; } /** * Returns the data point that splits the data set at a given percentile. * @param percentile the percentile at which the data set is split. * @return the data point that splits the data set at the given percentile. */ public Double getValueForPercentile(double percentile) { long targetAccumCount = (long)(totalCount * percentile/100); double value = 0; if (null != root) { value = root.getValueForAccumCount(new long[]{0, targetAccumCount}); } return value; } /** * This method is used by the internal data structure of the histogram to get the * limit of data points that should be counted at one bucket. * @return the limit of data points to store a one bucket. */ protected int getCountPerNodeLimit() { int limit = (int) (totalCount / 10); if (0 == limit) { limit = 1; } return limit; } /** * Auxiliary interface for inline functor object. */ protected interface ValueConversion { /** * This method should implement the conversion function. * @param value the input value. * @return the resulting converted value. */ double convertValue(double value); } /** * Return a table representing the data in this histogram. * Each element is a table cell containing the range limit values and the count for that range. */ public ArrayList<Cell> toTable() { ArrayList<Cell> table = new ArrayList<Cell>(); if (null != root) { root.toTable(table); } return table; } }