/*
* Copyright (c) 2010-2012 Grid Dynamics Consulting Services, Inc, All Rights Reserved
* http://www.griddynamics.com
*
* This library is free software; you can redistribute it and/or modify it under the terms of
* the Apache License; either
* version 2.0 of the License, or any later version.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.griddynamics.jagger.util.statistics.percentiles;
import java.util.ArrayList;
// This class is a derivation of hist4j https://github.com/flaptor/hist4j
/**
* The HistogramDataNode stores the histogram data for a range of values.
* It knows the minimum and maximum values for which it counts the number of instances.
* When the count exceeds the allowed limit it splits itself in two, increasing the
* histogram resolution for this range.
* @author Jorge Handl
*/
public class HistogramDataNode implements HistogramNode {
// Attributes of a data node.
private Cell cell = new Cell();
/**
* Creates an empty data node.
*/
public HistogramDataNode () {
reset();
}
/**
* Creates a data node for the given range with the given instance count.
* @param count the number of data instances in the given range.
* @param minValue the start of the range of counted values.
* @param maxValue the end of the range of counted values.
*/
public HistogramDataNode (long count, double minValue, double maxValue) {
reset();
cell.count = count;
cell.minValue = minValue;
cell.maxValue = maxValue;
}
/**
* Clears the data node.
*/
public void reset () {
cell.count = 0;
cell.minValue = Double.MAX_VALUE;
cell.maxValue = -Double.MAX_VALUE;
}
/**
* Adds a value to the data node.<p>
* If the value falls inside of the nodes' range and the count does not exceed the imposed limit, it simply increments the count.<br>
* If the value falls outside of the nodes' range, it expands the range.<br>
* If the count exceeds the limit, it splits in two assuming uniform distribution inside the node.<br>
* If the value falls outside of the nodes' range AND the count exceeds the limit, it creates a new node for that value.
* @param root a reference to the adaptive histogram instance that uses this structure.
* @param value the value for which the count is to be incremented.
* @return A reference to itself if no structural change happened, or a reference to the new fork node if this node was split.
*/
public HistogramNode addValue (AdaptiveHistogram root, double value) {
// "self" is what is returned to the caller. If this node needs to be replaced by a fork node,
// this variable will hold the new fork node and it will be returned to the caller.
// Otherwise, the node returned will be this, in which case nothing changes.
HistogramNode self = this;
if (value >= cell.minValue && value <= cell.maxValue) { // the value falls within this nodes' range
if (cell.count < root.getCountPerNodeLimit() // there is enough room in this node for the new value
|| cell.minValue == cell.maxValue) { // or the node defines a zero-width range so it can't be split
cell.count++;
} else { // not enough room, distribute the value count among the new nodes, assuming uniform distribution
double splitValue = (cell.minValue + cell.maxValue) / 2;
long rightCount = cell.count / 2;
long leftCount = rightCount;
boolean countWasOdd = (leftCount + rightCount < cell.count);
// assign the new value to the corresponding side. If the count is odd, add the extra item to the other side to keep balance
if (value > splitValue) {
rightCount++;
leftCount += (countWasOdd?1:0);
} else {
leftCount++;
rightCount += (countWasOdd?1:0);
}
// create a new subtree that will replace this node
HistogramNode leftNode = new HistogramDataNode(leftCount, cell.minValue, splitValue);
HistogramNode rightNode = new HistogramDataNode(rightCount, splitValue, cell.maxValue);
self = new HistogramForkNode(splitValue, leftNode, rightNode);
}
} else { // the value falls outside of this nodes' range
if (cell.count < root.getCountPerNodeLimit()) { // there is enough room in this node for the new value
cell.count++;
// extend the range of this node, assuming that the tree structure above correctly directed
// the given value to this node and therefore it lies at one of the borders of the tree.
if (value < cell.minValue) cell.minValue = value;
if (value > cell.maxValue) cell.maxValue = value;
} else { // not enough room, create a new sibling node for the new value and put both under a new fork node
if (value < cell.minValue) {
cell.minValue = Math.min(cell.minValue, (value + cell.maxValue) / 2);
self = new HistogramForkNode(cell.minValue, new HistogramDataNode(1,value,cell.minValue), this);
} else {
cell.maxValue = Math.max(cell.maxValue, (cell.minValue + value) / 2);
self = new HistogramForkNode(cell.maxValue, this, new HistogramDataNode(1,cell.maxValue,value));
}
}
}
return self;
}
/**
* Returns the number of data points stored in the same bucket as a given value.
* @param value the reference data point.
* @return the number of data points stored in the same bucket as the reference point.
*/
public long getCount (double value) {
long res = 0;
if (value >= cell.minValue && value <= cell.maxValue) {
res = cell.count;
}
return res;
}
/**
* Returns the cumulative density function for a given data point.
* @param value the reference data point.
* @return the cumulative density function for the reference point.
*/
public long getAccumCount (double value) {
long res = 0;
if (value >= cell.minValue) {
res = cell.count;
}
return res;
}
// Linear interpolation for double values.
private double interpolate (double x0, double y0, double x1, double y1, double x) {
return y0+((x-x0)*(y1-y0))/(x1-x0);
}
/**
* Returns the data point where the running cumulative count reaches the target cumulative count.
* It uses linear interpolation over the range of the node to get a better estimate of the true value.
* @param accumCount an array containing:<br>
* - accumCount[0] the running cumulative count. <br>
* - accumCount[1] the target cumulative count.
* @return the data point where the running cumulative count reaches the target cumulative count.
*/
public Double getValueForAccumCount (long[] accumCount) {
Double res = null;
long runningAccumCount = accumCount[0];
long targetAccumCount = accumCount[1];
if (runningAccumCount <= targetAccumCount && runningAccumCount + cell.count >= targetAccumCount) {
double val = interpolate((double)runningAccumCount, cell.minValue, (double)(runningAccumCount + cell.count), cell.maxValue, (double)targetAccumCount);
res = new Double(val);
}
accumCount[0] += cell.count;
return res;
}
/**
* Applies a convertion function to the values stored in the histogram.
* @param valueConversion a class that defines a function to convert the value.
*/
public void apply (AdaptiveHistogram.ValueConversion valueConversion) {
cell.minValue = valueConversion.convertValue(cell.minValue);
cell.maxValue = valueConversion.convertValue(cell.maxValue);
}
/**
* Build the table representing the histogram data adding this node's cell to it.
*/
public void toTable (ArrayList<Cell> table) {
table.add(cell);
}
}