package mil.nga.giat.geowave.core.store.adapter.statistics.histogram; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.nio.ByteBuffer; import mil.nga.giat.geowave.core.index.FloatCompareUtils; /** * * Fixed number of bins for a histogram. Unless configured, the range will * expand dynamically, redistributing the data as necessary into the wider bins. * * The advantage of constraining the range of the statistic is to ignore values * outside the range, such as erroneous values. Erroneous values force extremes * in the histogram. For example, if the expected range of values falls between * 0 and 1 and a value of 10000 occurs, then a single bin contains the entire * population between 0 and 1, a single bin represents the single value of * 10000. * */ public class FixedBinNumericHistogram implements NumericHistogram { private long count[] = new long[32]; private long totalCount = 0; private double minValue = Double.MAX_VALUE; private double maxValue = Double.MIN_VALUE; private boolean constrainedRange = false; /** * Creates a new histogram object. */ public FixedBinNumericHistogram() { totalCount = 0; } /** * Creates a new histogram object. */ public FixedBinNumericHistogram( final int size ) { count = new long[size]; } public FixedBinNumericHistogram( final int bins, final double minValue, final double maxValue ) { count = new long[bins]; this.minValue = minValue; this.maxValue = maxValue; constrainedRange = true; } public double[] quantile( final int bins ) { final double[] result = new double[bins]; final double binSize = 1.0 / bins; for (int bin = 0; bin < bins; bin++) { result[bin] = quantile(binSize * (bin + 1)); } return result; } public double cdf( final double val ) { return sum( val, false) / totalCount; } /** * Estimate number of values consumed up to provided value. * * @param val * @return the number of estimated points */ public double sum( final double val, boolean inclusive ) { if (val < this.minValue) { return 0.0; } final double range = maxValue - minValue; if ((range <= 0.0) || (totalCount == 0)) { return totalCount; } final int bin = Math.min( (int) Math.floor((((val - minValue) / range) * count.length)), count.length - 1); double c = 0; final double perBinSize = binSize(); for (int i = 0; i < bin; i++) { c += count[i]; } final double percentageOfLastBin = Math.min( 1.0, (val - ((perBinSize * (bin)) + minValue)) / perBinSize); c += (percentageOfLastBin * count[bin]); return c > 0 ? c : (inclusive ? 1.0 : c); } private double binSize() { final double v = (maxValue - minValue) / count.length; return (FloatCompareUtils.checkDoublesEqual( v, 0.0)) ? 1.0 : v; } public double quantile( final double percentage ) { final double fractionOfTotal = percentage * totalCount; double countThisFar = 0; int bin = 0; for (; (bin < count.length) && (countThisFar < fractionOfTotal); bin++) { countThisFar += count[bin]; } if (bin == 0) { return minValue; } final double perBinSize = binSize(); final double countUptoLastBin = countThisFar - count[bin - 1]; return minValue + ((perBinSize * bin) + (perBinSize * ((fractionOfTotal - countUptoLastBin) / count[bin - 1]))); } public double percentPopulationOverRange( final double start, final double stop ) { return cdf(stop) - cdf(start); } public long totalSampleSize() { return totalCount; } public long[] count( final int bins ) { final long[] result = new long[bins]; double start = minValue; double range = maxValue - minValue; double increment = range / bins; start += increment; long last = 0; for (int bin = 0; bin < bins; bin++, start += increment) { final long aggSum = (long) Math.ceil(sum( start, false)); result[bin] = aggSum - last; last = aggSum; } return result; } public void merge( final NumericHistogram mergeable ) { FixedBinNumericHistogram myTypeOfHist = (FixedBinNumericHistogram) mergeable; final double newMinValue = Math.min( minValue, myTypeOfHist.minValue); final double newMaxValue = Math.max( maxValue, myTypeOfHist.maxValue); this.redistribute( newMinValue, newMaxValue); myTypeOfHist.redistribute( newMinValue, newMaxValue); for (int i = 0; i < count.length; i++) { count[i] += myTypeOfHist.count[i]; } maxValue = newMaxValue; minValue = newMinValue; totalCount += myTypeOfHist.totalCount; } public int bufferSize() { return 28 + (8 * count.length); } public void toBinary( ByteBuffer buffer ) { buffer.putLong(totalCount); buffer.putDouble(minValue); buffer.putDouble(maxValue); buffer.putInt(count.length); for (int i = 0; i < count.length; i++) { buffer.putLong(count[i]); } } public void fromBinary( ByteBuffer buffer ) { totalCount = buffer.getLong(); minValue = buffer.getDouble(); maxValue = buffer.getDouble(); final int s = buffer.getInt(); count = new long[s]; for (int i = 0; i < s; i++) { count[i] = buffer.getLong(); } } /** * * @return the total number of consumed values */ public long getTotalCount() { return totalCount; } /** * * @return the number of bins used */ public int getNumBins() { return count.length; } public void add( final double num ) { add( 1L, num); } public void add( final long amount, final double num ) { if (constrainedRange && ((num < minValue) || (num > maxValue))) { return; } // entry of the the same value or first entry if ((totalCount == 0L) || FloatCompareUtils.checkDoublesEqual( minValue, num)) { count[0] += amount; minValue = num; maxValue = Math.max( num, maxValue); } // else if entry has a different value else if (FloatCompareUtils.checkDoublesEqual( maxValue, minValue)) { // && // num // is // neither if (num < minValue) { count[count.length - 1] = count[0]; count[0] = amount; minValue = num; } else if (num > maxValue) { count[count.length - 1] = amount; // count[0] is unchanged maxValue = num; } } else { if (num < minValue) { redistribute( num, maxValue); minValue = num; } else if (num > maxValue) { redistribute( minValue, num); maxValue = num; } final double range = maxValue - minValue; final double b = (((num - minValue) / range) * count.length); final int bin = Math.min( (int) Math.floor(b), count.length - 1); count[bin] += amount; } totalCount += amount; } private void redistribute( final double newMinValue, final double newMaxValue ) { redistribute( new long[count.length], newMinValue, newMaxValue); } private void redistribute( final long[] newCount, final double newMinValue, final double newMaxValue ) { final double perBinSize = binSize(); final double newRange = (newMaxValue - newMinValue); final double newPerBinsSize = newRange / count.length; double currentWindowStart = minValue; double currentWindowStop = minValue + perBinSize; for (int bin = 0; bin < count.length; bin++) { long distributionCount = 0; int destinationBin = Math.min( (int) Math.floor((((currentWindowStart - newMinValue) / newRange) * count.length)), count.length - 1); double destinationWindowStart = newMinValue + (destinationBin * newPerBinsSize); double destinationWindowStop = destinationWindowStart + newPerBinsSize; while (count[bin] > 0) { if (currentWindowStart < destinationWindowStart) { // take whatever is left over distributionCount = count[bin]; } else { final double diff = Math.min( Math.max( currentWindowStop - destinationWindowStop, 0.0), perBinSize); distributionCount = Math.round(count[bin] * (1.0 - (diff / perBinSize))); } newCount[destinationBin] += distributionCount; count[bin] -= distributionCount; if (destinationWindowStop < currentWindowStop) { destinationWindowStart = destinationWindowStop; destinationWindowStop += newPerBinsSize; destinationBin += 1; if ((destinationBin == count.length) && (count[bin] > 0)) { newCount[bin] += count[bin]; count[bin] = 0; } } } currentWindowStart = currentWindowStop; currentWindowStop += perBinSize; } count = newCount; } public double getMaxValue() { return maxValue; }; public double getMinValue() { return minValue; }; public static class FixedBinNumericHistogramFactory implements NumericHistogramFactory { @Override public NumericHistogram create( int bins ) { return new FixedBinNumericHistogram( bins); } @Override public NumericHistogram create( int bins, double minValue, double maxValue ) { return new FixedBinNumericHistogram( bins, minValue, maxValue); } } }