package mil.nga.giat.geowave.core.store.adapter.statistics;
import java.nio.ByteBuffer;
import java.text.MessageFormat;
import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.Mergeable;
import mil.nga.giat.geowave.core.store.adapter.statistics.histogram.FixedBinNumericHistogram.FixedBinNumericHistogramFactory;
import mil.nga.giat.geowave.core.store.base.DataStoreEntryInfo;
import mil.nga.giat.geowave.core.store.adapter.statistics.histogram.NumericHistogram;
import mil.nga.giat.geowave.core.store.adapter.statistics.histogram.NumericHistogramFactory;
/**
*
* Fixed number of bins for a histogram. Unless configured, the range will
* expand dynamically, redistributing the data as necessary into the wider bins.
*
* The advantage of constraining the range of the statistic is to ignore values
* outside the range, such as erroneous values. Erroneous values force extremes
* in the histogram. For example, if the expected range of values falls between
* 0 and 1 and a value of 10000 occurs, then a single bin contains the entire
* population between 0 and 1, a single bin represents the single value of
* 10000. If there are extremes in the data, then use
* {@link FeatureNumericHistogramStatistics} instead.
*
*
* The default number of bins is 32.
*
*/
public abstract class FixedBinNumericStatistics<T> extends
AbstractDataStatistics<T>
{
public static final ByteArrayId STATS_TYPE = new ByteArrayId(
"FIXED_BIN_NUMERIC_HISTOGRAM");
// private static final NumericHistogramFactory HistFactory = new
// MinimalBinDistanceHistogramFactory();
private static final NumericHistogramFactory HistFactory = new FixedBinNumericHistogramFactory();
NumericHistogram histogram = HistFactory.create(1024);
protected FixedBinNumericStatistics() {
super();
}
public FixedBinNumericStatistics(
final ByteArrayId adapterId,
final ByteArrayId statisticsId ) {
super(
adapterId,
statisticsId);
}
public FixedBinNumericStatistics(
final ByteArrayId adapterId,
final ByteArrayId statisticsId,
final int bins ) {
super(
adapterId,
statisticsId);
histogram = HistFactory.create(bins);
}
public FixedBinNumericStatistics(
final ByteArrayId adapterId,
final ByteArrayId statisticsId,
final int bins,
final double minValue,
final double maxValue ) {
super(
adapterId,
statisticsId);
histogram = HistFactory.create(
bins,
minValue,
maxValue);
}
public double[] quantile(
final int bins ) {
return histogram.quantile(bins);
}
public double cdf(
final double val ) {
return histogram.cdf(val);
}
public double quantile(
final double percentage ) {
return histogram.quantile(percentage);
}
public double percentPopulationOverRange(
final double start,
final double stop ) {
return cdf(stop) - cdf(start);
}
public long totalSampleSize() {
return histogram.getTotalCount();
}
public long[] count(
final int binSize ) {
return histogram.count(binSize);
}
@Override
public void merge(
final Mergeable mergeable ) {
if (mergeable instanceof FixedBinNumericStatistics) {
final FixedBinNumericStatistics tobeMerged = (FixedBinNumericStatistics) mergeable;
histogram.merge(tobeMerged.histogram);
}
}
@Override
public byte[] toBinary() {
final ByteBuffer buffer = super.binaryBuffer(histogram.bufferSize());
histogram.toBinary(buffer);
final byte result[] = new byte[buffer.position()];
buffer.rewind();
buffer.get(result);
return result;
}
@Override
public void fromBinary(
final byte[] bytes ) {
final ByteBuffer buffer = super.binaryBuffer(bytes);
histogram.fromBinary(buffer);
}
@Override
public abstract void entryIngested(
final DataStoreEntryInfo entryInfo,
final T entry );
protected void add(
final long amount,
final double num ) {
this.histogram.add(
amount,
num);
}
public abstract String getFieldIdentifier();
@Override
public String toString() {
final StringBuffer buffer = new StringBuffer();
buffer.append(
"histogram[adapter=").append(
super.getDataAdapterId().getString());
buffer.append(
", identifier=").append(
getFieldIdentifier());
final MessageFormat mf = new MessageFormat(
"{0,number,#.######}");
buffer.append(", range={");
buffer.append(
mf.format(new Object[] {
Double.valueOf(histogram.getMinValue())
})).append(
' ');
buffer.append(mf.format(new Object[] {
Double.valueOf(histogram.getMaxValue())
}));
buffer.append("}, bins={");
for (final double v : this.quantile(10)) {
buffer.append(
mf.format(new Object[] {
Double.valueOf(v)
})).append(
' ');
}
buffer.deleteCharAt(buffer.length() - 1);
buffer.append("}, counts={");
for (final long v : count(10)) {
buffer.append(
mf.format(new Object[] {
Long.valueOf(v)
})).append(
' ');
}
buffer.deleteCharAt(buffer.length() - 1);
buffer.append("}]");
return buffer.toString();
}
/**
* Convert Fixed Bin Numeric statistics to a JSON object
*/
public JSONObject toJSONObject()
throws JSONException {
JSONObject jo = new JSONObject();
jo.put(
"type",
STATS_TYPE.getString());
jo.put(
"field_identifier",
getFieldIdentifier());
jo.put(
"range_min",
histogram.getMinValue());
jo.put(
"range_max",
histogram.getMaxValue());
JSONArray binsArray = new JSONArray();
for (final double v : this.quantile(10)) {
binsArray.add(v);
}
jo.put(
"bins",
binsArray);
JSONArray countsArray = new JSONArray();
for (final long v : count(10)) {
countsArray.add(v);
}
jo.put(
"counts",
countsArray);
return jo;
}
}