package mil.nga.giat.geowave.adapter.vector.stats; import java.nio.ByteBuffer; import java.text.MessageFormat; import java.util.Date; import java.util.zip.DataFormatException; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.core.index.Mergeable; import mil.nga.giat.geowave.core.store.adapter.statistics.AbstractDataStatistics; import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatistics; import mil.nga.giat.geowave.core.store.base.DataStoreEntryInfo; import org.HdrHistogram.AbstractHistogram; import org.HdrHistogram.DoubleHistogram; import org.HdrHistogram.Histogram; import org.opengis.feature.simple.SimpleFeature; /** * Dynamic histogram provide very high accuracy for CDF and quantiles over the a * numeric attribute. * */ public class FeatureNumericHistogramStatistics extends AbstractDataStatistics<SimpleFeature> implements FeatureStatistic { public static final ByteArrayId STATS_TYPE = new ByteArrayId( "ATT_HISTOGRAM"); private DoubleHistogram positiveHistogram = new LocalDoubleHistogram(); private DoubleHistogram negativeHistogram = null; // Max value is determined by the level of accuracy required, using a // formula provided // HdrHistogram private double maxValue = Math.pow( 2, 63) / Math.pow( 2, 14) - 1; private double minValue = -(maxValue); protected FeatureNumericHistogramStatistics() { super(); } public FeatureNumericHistogramStatistics( final ByteArrayId dataAdapterId, final String fieldName ) { super( dataAdapterId, composeId( STATS_TYPE.getString(), fieldName)); } public static final ByteArrayId composeId( final String fieldName ) { return composeId( STATS_TYPE.getString(), fieldName); } @Override public String getFieldName() { return decomposeNameFromId(getStatisticsId()); } @Override public DataStatistics<SimpleFeature> duplicate() { return new FeatureNumericHistogramStatistics( dataAdapterId, getFieldName()); } private double percentageNegative() { final long nc = negativeHistogram == null ? 0 : negativeHistogram.getTotalCount(); final long tc = positiveHistogram.getTotalCount() + nc; return (double) nc / (double) tc; } public double[] quantile( final int bins ) { final double[] result = new double[bins]; final double binSize = 1.0 / bins; for (int bin = 0; bin < bins; bin++) { result[bin] = quantile(binSize * (bin + 1)); } return result; } public double cdf( final double val ) { final double percentageNegative = percentageNegative(); if (val < 0 || (1.0 - percentageNegative) < 0.000000001) { // subtract one from percentage since negative is negated so // percentage is inverted return (percentageNegative > 0) ? percentageNegative * (1.0 - (negativeHistogram.getPercentileAtOrBelowValue(-val) / 100.0)) : 0.0; } else { return percentageNegative + (1.0 - percentageNegative) * (positiveHistogram.getPercentileAtOrBelowValue(val) / 100.0); } } public double quantile( final double percentage ) { final double percentageNegative = percentageNegative(); if (percentage < percentageNegative) { // subtract one from percentage since negative is negated so // percentage is inverted return -negativeHistogram.getValueAtPercentile((1.0 - (percentage / percentageNegative)) * 100.0); } else { return positiveHistogram.getValueAtPercentile((percentage / (1.0 - percentageNegative)) * 100.0); } } public double percentPopulationOverRange( final double start, final double stop ) { return cdf(stop) - cdf(start); } public long totalSampleSize() { return positiveHistogram.getTotalCount() + (negativeHistogram == null ? 0 : negativeHistogram.getTotalCount()); } public long[] count( final int bins ) { final long[] result = new long[bins]; final double max = positiveHistogram.getMaxValue(); final double min = negativeHistogram == null ? positiveHistogram.getMinValue() : -negativeHistogram .getMaxValue(); final double binSize = (max - min) / (bins); long last = 0; final long tc = totalSampleSize(); for (int bin = 0; bin < bins; bin++) { final double val = cdf(min + ((bin + 1.0) * binSize)) * tc; final long next = (long) val - last; result[bin] = next; last += next; } return result; } @Override public void merge( final Mergeable mergeable ) { if (mergeable instanceof FeatureNumericHistogramStatistics) { positiveHistogram.add(((FeatureNumericHistogramStatistics) mergeable).positiveHistogram); if (((FeatureNumericHistogramStatistics) mergeable).negativeHistogram != null) { if (negativeHistogram != null) { negativeHistogram.add(((FeatureNumericHistogramStatistics) mergeable).negativeHistogram); } else { negativeHistogram = ((FeatureNumericHistogramStatistics) mergeable).negativeHistogram; } } } } @Override public byte[] toBinary() { final int positiveBytes = positiveHistogram.getEstimatedFootprintInBytes(); final int bytesNeeded = positiveBytes + (negativeHistogram == null ? 0 : negativeHistogram.getEstimatedFootprintInBytes()); final ByteBuffer buffer = super.binaryBuffer(bytesNeeded + 5); final int startPosition = buffer.position(); buffer.putInt(startPosition); // buffer out an int positiveHistogram.encodeIntoCompressedByteBuffer(buffer); final int endPosition = buffer.position(); buffer.position(startPosition); buffer.putInt(endPosition); buffer.position(endPosition); if (negativeHistogram != null) { buffer.put((byte) 0x01); negativeHistogram.encodeIntoCompressedByteBuffer(buffer); } else { buffer.put((byte) 0x00); } final byte result[] = new byte[buffer.position() + 1]; buffer.rewind(); buffer.get(result); return result; } @Override public void fromBinary( final byte[] bytes ) { final ByteBuffer buffer = super.binaryBuffer(bytes); final int endPosition = buffer.getInt(); try { positiveHistogram = DoubleHistogram.decodeFromCompressedByteBuffer( buffer, LocalInternalHistogram.class, 0); buffer.position(endPosition); positiveHistogram.setAutoResize(true); if (buffer.get() == (byte) 0x01) { negativeHistogram = DoubleHistogram.decodeFromCompressedByteBuffer( buffer, LocalInternalHistogram.class, 0); negativeHistogram.setAutoResize(true); } } catch (final DataFormatException e) { throw new RuntimeException( "Cannot decode statistic", e); } } @Override public void entryIngested( final DataStoreEntryInfo entryInfo, final SimpleFeature entry ) { final Object o = entry.getAttribute(getFieldName()); if (o == null) { return; } if (o instanceof Date) add(((Date) o).getTime()); else if (o instanceof Number) add(((Number) o).doubleValue()); } protected void add( double num ) { if (num < minValue || num > maxValue || Double.isNaN(num)) return; if (num >= 0) positiveHistogram.recordValue(num); else { getNegativeHistogram().recordValue( -num); } } public String toString() { StringBuffer buffer = new StringBuffer(); buffer.append( "histogram[adapter=").append( super.getDataAdapterId().getString()); buffer.append( ", field=").append( getFieldName()); buffer.append(", bins={"); final MessageFormat mf = new MessageFormat( "{0,number,#.######}"); for (double v : this.quantile(10)) { buffer.append( mf.format(new Object[] { Double.valueOf(v) })).append( ' '); } buffer.deleteCharAt(buffer.length() - 1); buffer.append(", counts={"); for (long v : this.count(10)) { buffer.append( mf.format(new Object[] { Long.valueOf(v) })).append( ' '); } buffer.deleteCharAt(buffer.length() - 1); buffer.append("}]"); return buffer.toString(); } private DoubleHistogram getNegativeHistogram() { if (this.negativeHistogram == null) negativeHistogram = new LocalDoubleHistogram(); return negativeHistogram; } public static class LocalDoubleHistogram extends DoubleHistogram { public LocalDoubleHistogram() { super( 2, 4, LocalInternalHistogram.class); super.setAutoResize(true); } /** * */ private static final long serialVersionUID = 5504684423053828467L; } @edu.umd.cs.findbugs.annotations.SuppressFBWarnings(value = { "HE_INHERITS_EQUALS_USE_HASHCODE" }) public static class LocalInternalHistogram extends Histogram { /** * */ private static final long serialVersionUID = 4369054277576423915L; public LocalInternalHistogram( AbstractHistogram source ) { super( source); source.setAutoResize(true); super.setAutoResize(true); } public LocalInternalHistogram( int numberOfSignificantValueDigits ) { super( numberOfSignificantValueDigits); super.setAutoResize(true); } public LocalInternalHistogram( long highestTrackableValue, int numberOfSignificantValueDigits ) { super( highestTrackableValue, numberOfSignificantValueDigits); super.setAutoResize(true); } public LocalInternalHistogram( long lowestDiscernibleValue, long highestTrackableValue, int numberOfSignificantValueDigits ) { super( lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits); super.setAutoResize(true); } } public static class FeatureNumericHistogramConfig implements StatsConfig<SimpleFeature> { /** * */ private static final long serialVersionUID = 6309383518148391565L; @Override public DataStatistics<SimpleFeature> create( final ByteArrayId dataAdapterId, final String fieldName ) { return new FeatureNumericHistogramStatistics( dataAdapterId, fieldName); } } }