// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.numeric.histogram;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Histogram statistics bean.
*
* @author zhao
*
*/
public class HistogramStatistics {
private double min, max;
private int numBins;
private long countBelowMin, countAboveMax;
private long[] result = new long[numBins];
private BigDecimal binSize;
int scale = 1000;
/**
* Set the parameters of the statistics <br>
* Note that max must be greater than min
*
* @param max
* @param min
* @param numBins number of bins , It should be a none zero integer.
*/
public void setParameters(double max, double min, int numBins) {
if (max <= min) {
throw new RuntimeException("max must be greater than min");
}
if (numBins <= 0) {
throw new RuntimeException("invalid numBins value :" + numBins + " , numBins must be a none zero integer");
}
this.max = max;
this.min = min;
this.numBins = numBins;
binSize = BigDecimal.valueOf(max - min).divide(BigDecimal.valueOf(numBins), 10, RoundingMode.UP);
result = new long[numBins];
}
public void add(double d) {
double bin = BigDecimal.valueOf(d - min).divide(binSize, 10, RoundingMode.UP).doubleValue();
if (bin < 0) { /* this data is smaller than min */
countBelowMin++;
} else if (bin > numBins) { /* this data point is bigger than max */
countAboveMax++;
} else {
if (Double.compare(bin, numBins) == 0) {
result[(int) bin - 1] += 1; // Include count of the upper boundary.
} else {
result[(int) bin] += 1;
}
}
}
/**
* Get histograms as a map
*
* @return the histogram map where Key is the range and value is the freqency. <br>
* Note that the returned ranges are in pattern of [Min,
* Min+binSize),[Min+binSize,Min+binSize*2)...[Max-binSize,Max<b>]</b>
*/
public Map<Range, Long> getHistogram() {
Map<Range, Long> histogramMap = new LinkedHashMap<Range, Long>();
double currentMin = min;
for (int i = 0; i < numBins; i++) {
double currentMax = currentMin + binSize.doubleValue();
if ((i + 1) == numBins) {
currentMax = max;
}
Range r = new Range(currentMin, currentMax);
histogramMap.put(r, result[i]);
currentMin = currentMin + binSize.doubleValue();
}
return histogramMap;
}
/**
* @return returns when all values in the histogram (no value is outside the given range)
*/
public boolean isComplete() {
return countBelowMin == 0 && countAboveMax == 0;
}
public long getCountBelowMin() {
return countBelowMin;
}
public long getCountAboveMax() {
return countAboveMax;
}
}