package mil.nga.giat.geowave.adapter.vector.stats;
import java.nio.ByteBuffer;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.Mergeable;
import mil.nga.giat.geowave.core.store.adapter.statistics.AbstractDataStatistics;
import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatistics;
import mil.nga.giat.geowave.core.store.base.DataStoreEntryInfo;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import org.opengis.feature.simple.SimpleFeature;
import com.clearspring.analytics.stream.frequency.CountMinSketch;
import com.clearspring.analytics.stream.frequency.FrequencyMergeException;
/**
*
* Maintains an estimate of how may of each attribute value occurs in a set of
* data.
*
* Default values:
*
* Error factor of 0.001 with probability 0.98 of retrieving a correct estimate.
* The Algorithm does not under-state the estimate.
*
*/
public class FeatureCountMinSketchStatistics extends
AbstractDataStatistics<SimpleFeature> implements
FeatureStatistic
{
public static final ByteArrayId STATS_TYPE = new ByteArrayId(
"ATT_SKETCH");
private CountMinSketch sketch = null;
protected FeatureCountMinSketchStatistics() {
super();
sketch = new CountMinSketch(
0.001,
0.98,
7364181);
}
public FeatureCountMinSketchStatistics(
final ByteArrayId dataAdapterId,
final String statisticsId ) {
super(
dataAdapterId,
composeId(
STATS_TYPE.getString(),
statisticsId));
sketch = new CountMinSketch(
0.001,
0.98,
7364181);
}
public FeatureCountMinSketchStatistics(
final ByteArrayId dataAdapterId,
final String statisticsId,
final double errorFactor,
final double probabilityOfCorrectness ) {
super(
dataAdapterId,
composeId(
STATS_TYPE.getString(),
statisticsId));
sketch = new CountMinSketch(
errorFactor,
probabilityOfCorrectness,
7364181);
}
public static final ByteArrayId composeId(
final String fieldName ) {
return composeId(
STATS_TYPE.getString(),
fieldName);
}
@Override
public String getFieldName() {
return decomposeNameFromId(getStatisticsId());
}
@Override
public DataStatistics<SimpleFeature> duplicate() {
return new FeatureCountMinSketchStatistics(
dataAdapterId,
getFieldName());
}
public long totalSampleSize() {
return sketch.size();
}
public long count(
String item ) {
return sketch.estimateCount(item);
}
@Override
public void merge(
final Mergeable mergeable ) {
if (mergeable instanceof FeatureCountMinSketchStatistics) {
try {
sketch = CountMinSketch.merge(
sketch,
((FeatureCountMinSketchStatistics) mergeable).sketch);
}
catch (FrequencyMergeException e) {
throw new RuntimeException(
"Unable to merge sketches",
e);
}
}
}
@Override
public byte[] toBinary() {
byte[] data = CountMinSketch.serialize(sketch);
final ByteBuffer buffer = super.binaryBuffer(4 + data.length);
buffer.putInt(data.length);
buffer.put(data);
return buffer.array();
}
@Override
public void fromBinary(
final byte[] bytes ) {
final ByteBuffer buffer = super.binaryBuffer(bytes);
final byte[] data = new byte[buffer.getInt()];
buffer.get(data);
sketch = CountMinSketch.deserialize(data);
}
@Override
public void entryIngested(
final DataStoreEntryInfo entryInfo,
final SimpleFeature entry ) {
final Object o = entry.getAttribute(getFieldName());
if (o == null) {
return;
}
sketch.add(
o.toString(),
1);
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(
"sketch[adapter=").append(
super.getDataAdapterId().getString());
buffer.append(
", field=").append(
getFieldName());
buffer.append(
", size=").append(
sketch.size());
buffer.append("]");
return buffer.toString();
}
/**
* Convert FeatureCountMinSketch statistics to a JSON object
*/
public JSONObject toJSONObject()
throws JSONException {
JSONObject jo = new JSONObject();
jo.put(
"type",
STATS_TYPE.getString());
jo.put(
"statisticsID",
statisticsId.getString());
jo.put(
"field_identifier",
getFieldName());
jo.put(
"size",
sketch.size());
return jo;
}
public static class FeatureCountMinSketchConfig implements
StatsConfig<SimpleFeature>
{
/**
*
*/
private static final long serialVersionUID = 6309383518148391565L;
private double errorFactor;
private double probabilityOfCorrectness;
public FeatureCountMinSketchConfig() {
}
public FeatureCountMinSketchConfig(
double errorFactor,
double probabilityOfCorrectness ) {
super();
this.errorFactor = errorFactor;
this.probabilityOfCorrectness = probabilityOfCorrectness;
}
public void setErrorFactor(
double errorFactor ) {
this.errorFactor = errorFactor;
}
public void setProbabilityOfCorrectness(
double probabilityOfCorrectness ) {
this.probabilityOfCorrectness = probabilityOfCorrectness;
}
public double getErrorFactor() {
return errorFactor;
}
public double getProbabilityOfCorrectness() {
return probabilityOfCorrectness;
}
@Override
public DataStatistics<SimpleFeature> create(
final ByteArrayId dataAdapterId,
final String fieldName ) {
return new FeatureCountMinSketchStatistics(
dataAdapterId,
fieldName,
errorFactor,
probabilityOfCorrectness);
}
}
}