HistogramDataNode.java example

Explorer
jagger-master
/*
 * Copyright (c) 2010-2012 Grid Dynamics Consulting Services, Inc, All Rights Reserved
 * http://www.griddynamics.com
 *
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the Apache License; either
 * version 2.0 of the License, or any later version.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.griddynamics.jagger.util.statistics.percentiles;

import java.util.ArrayList;

// This class is a derivation of hist4j https://github.com/flaptor/hist4j

/**
 * The HistogramDataNode stores the histogram data for a range of values.
 * It knows the minimum and maximum values for which it counts the number of instances.
 * When the count exceeds the allowed limit it splits itself in two, increasing the
 * histogram resolution for this range.
 * @author Jorge Handl
 */
public class HistogramDataNode implements HistogramNode {

   // Attributes of a data node.
	private Cell cell = new Cell();

    /**
     * Creates an empty data node.
     */
    public HistogramDataNode () {
        reset();
    }

    /**
     * Creates a data node for the given range with the given instance count.
     * @param count the number of data instances in the given range.
     * @param minValue the start of the range of counted values.
     * @param maxValue the end of the range of counted values.
     */
    public HistogramDataNode (long count, double minValue, double maxValue) {
        reset();
        cell.count = count;
        cell.minValue = minValue;
        cell.maxValue = maxValue;
    }

    /**
     * Clears the data node.
     */
    public void reset () {
        cell.count = 0;
        cell.minValue = Double.MAX_VALUE;
        cell.maxValue = -Double.MAX_VALUE;
    }

    /**
     * Adds a value to the data node.<p>
     * If the value falls inside of the nodes' range and the count does not exceed the imposed limit, it simply increments the count.<br>
     * If the value falls outside of the nodes' range, it expands the range.<br>
     * If the count exceeds the limit, it splits in two assuming uniform distribution inside the node.<br>
     * If the value falls outside of the nodes' range AND the count exceeds the limit, it creates a new node for that value.
     * @param root a reference to the adaptive histogram instance that uses this structure.
     * @param value the value for which the count is to be incremented.
     * @return A reference to itself if no structural change happened, or a reference to the new fork node if this node was split.
     */
    public HistogramNode addValue (AdaptiveHistogram root, double value) {
        // "self" is what is returned to the caller. If this node needs to be replaced by a fork node,
        // this variable will hold the new fork node and it will be returned to the caller.
        // Otherwise, the node returned will be this, in which case nothing changes.
        HistogramNode self = this;
        if (value >= cell.minValue && value <= cell.maxValue) {  // the value falls within this nodes' range
            if (cell.count < root.getCountPerNodeLimit()  // there is enough room in this node for the new value
                    || cell.minValue == cell.maxValue) {  // or the node defines a zero-width range so it can't be split
            	cell.count++;
            } else {  // not enough room, distribute the value count among the new nodes, assuming uniform distribution
                double splitValue = (cell.minValue + cell.maxValue) / 2;
                long rightCount = cell.count / 2;
                long leftCount = rightCount;
                boolean countWasOdd = (leftCount + rightCount < cell.count);
                // assign the new value to the corresponding side. If the count is odd, add the extra item to the other side to keep balance
                if (value > splitValue) {
                    rightCount++;
                    leftCount += (countWasOdd?1:0);
                } else {
                    leftCount++;
                    rightCount += (countWasOdd?1:0);
                }
                // create a new subtree that will replace this node
                HistogramNode leftNode = new HistogramDataNode(leftCount, cell.minValue, splitValue);
                HistogramNode rightNode = new HistogramDataNode(rightCount, splitValue, cell.maxValue);
                self = new HistogramForkNode(splitValue, leftNode, rightNode);
            }
        } else {  // the value falls outside of this nodes' range
            if (cell.count < root.getCountPerNodeLimit()) {  // there is enough room in this node for the new value
            	cell.count++;
                // extend the range of this node, assuming that the tree structure above correctly directed
                // the given value to this node and therefore it lies at one of the borders of the tree.
                if (value < cell.minValue) cell.minValue = value;
                if (value > cell.maxValue) cell.maxValue = value;
            } else {  // not enough room, create a new sibling node for the new value and put both under a new fork node
                if (value < cell.minValue) {
                	cell.minValue = Math.min(cell.minValue, (value + cell.maxValue) / 2);
                    self = new HistogramForkNode(cell.minValue, new HistogramDataNode(1,value,cell.minValue), this);
                } else {
                	cell.maxValue = Math.max(cell.maxValue, (cell.minValue + value) / 2);
                    self = new HistogramForkNode(cell.maxValue, this, new HistogramDataNode(1,cell.maxValue,value));
                }
            }
        }
        return self;
    }

    /**
     * Returns the number of data points stored in the same bucket as a given value.
     * @param value the reference data point.
     * @return the number of data points stored in the same bucket as the reference point.
     */
    public long getCount (double value) {
        long res = 0;
        if (value >= cell.minValue && value <= cell.maxValue) {
            res = cell.count;
        }
        return res;
    }

    /**
     * Returns the cumulative density function for a given data point.
     * @param value the reference data point.
     * @return the cumulative density function for the reference point.
     */
    public long getAccumCount (double value) {
        long res = 0;
        if (value >= cell.minValue) {
            res = cell.count;
        }
        return res;
    }

    // Linear interpolation for double values.
    private double interpolate (double x0, double y0, double x1, double y1, double x) {
        return y0+((x-x0)*(y1-y0))/(x1-x0);
    }


    /**
     * Returns the data point where the running cumulative count reaches the target cumulative count.
     * It uses linear interpolation over the range of the node to get a better estimate of the true value.
     * @param accumCount an array containing:<br>
     *      - accumCount[0] the running cumulative count. <br>
     *      - accumCount[1] the target cumulative count.
     * @return the data point where the running cumulative count reaches the target cumulative count.
     */
    public Double getValueForAccumCount (long[] accumCount) {
        Double res = null;
        long runningAccumCount = accumCount[0];
        long targetAccumCount = accumCount[1];
        if (runningAccumCount <= targetAccumCount && runningAccumCount + cell.count >= targetAccumCount) {
            double val = interpolate((double)runningAccumCount, cell.minValue, (double)(runningAccumCount + cell.count), cell.maxValue, (double)targetAccumCount);
            res = new Double(val);
        }
        accumCount[0] += cell.count;
        return res;
    }

    /**
     * Applies a convertion function to the values stored in the histogram.
     * @param valueConversion a class that defines a function to convert the value.
     */
    public void apply (AdaptiveHistogram.ValueConversion valueConversion) {
    	cell.minValue = valueConversion.convertValue(cell.minValue);
    	cell.maxValue = valueConversion.convertValue(cell.maxValue);
    }

    /**
     * Build the table representing the histogram data adding this node's cell to it.
     */
    public void toTable (ArrayList<Cell> table) {
    	table.add(cell);
    }

}