package mil.nga.giat.geowave.adapter.vector.stats;
import java.io.IOException;
import java.nio.ByteBuffer;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.Mergeable;
import mil.nga.giat.geowave.core.store.adapter.statistics.AbstractDataStatistics;
import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatistics;
import mil.nga.giat.geowave.core.store.base.DataStoreEntryInfo;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.opengis.feature.simple.SimpleFeature;
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
/**
* Hyperloglog provides an estimated cardinality of the number of unique values
* for an attribute.
*
*
*/
public class FeatureHyperLogLogStatistics extends
AbstractDataStatistics<SimpleFeature> implements
FeatureStatistic
{
private final static Logger LOGGER = LoggerFactory.getLogger(FeatureHyperLogLogStatistics.class);
public static final ByteArrayId STATS_TYPE = new ByteArrayId(
"ATT_HYPERLLP");
private HyperLogLogPlus loglog;
private int precision;
protected FeatureHyperLogLogStatistics() {
super();
}
/**
*
* @param dataAdapterId
* @param fieldName
* @param precision
* number of bits to support counting. 2^p is the maximum count
* value per distinct value. 1 <= p <= 32
*/
public FeatureHyperLogLogStatistics(
final ByteArrayId dataAdapterId,
final String statisticsId,
int precision ) {
super(
dataAdapterId,
composeId(
STATS_TYPE.getString(),
statisticsId));
loglog = new HyperLogLogPlus(
precision);
this.precision = precision;
}
public static final ByteArrayId composeId(
final String statisticsId ) {
return composeId(
STATS_TYPE.getString(),
statisticsId);
}
@Override
public String getFieldName() {
return decomposeNameFromId(getStatisticsId());
}
@Override
public DataStatistics<SimpleFeature> duplicate() {
return new FeatureHyperLogLogStatistics(
dataAdapterId,
getFieldName(),
precision);
}
public long cardinality() {
return loglog.cardinality();
}
@Override
public void merge(
final Mergeable mergeable ) {
if (mergeable instanceof FeatureHyperLogLogStatistics) {
try {
loglog = (HyperLogLogPlus) ((FeatureHyperLogLogStatistics) mergeable).loglog.merge(loglog);
}
catch (CardinalityMergeException e) {
throw new RuntimeException(
"Unable to merge counters",
e);
}
}
}
@Override
public byte[] toBinary() {
try {
byte[] data = loglog.getBytes();
final ByteBuffer buffer = super.binaryBuffer(4 + data.length);
buffer.putInt(data.length);
buffer.put(data);
return buffer.array();
}
catch (IOException e) {
LOGGER.error(
"Exception while writing statistic",
e);
}
return new byte[0];
}
@Override
public void fromBinary(
final byte[] bytes ) {
final ByteBuffer buffer = super.binaryBuffer(bytes);
final byte[] data = new byte[buffer.getInt()];
buffer.get(data);
try {
loglog = HyperLogLogPlus.Builder.build(data);
}
catch (IOException e) {
LOGGER.error(
"Exception while reading statistic",
e);
}
}
@Override
public void entryIngested(
final DataStoreEntryInfo entryInfo,
final SimpleFeature entry ) {
final Object o = entry.getAttribute(getFieldName());
if (o == null) {
return;
}
loglog.offer(o.toString());
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(
"hyperloglog[adapter=").append(
super.getDataAdapterId().getString());
buffer.append(
", field=").append(
getFieldName());
buffer.append(
", cardinality=").append(
loglog.cardinality());
buffer.append("]");
return buffer.toString();
}
/**
* Convert FeatureCountMinSketch statistics to a JSON object
*/
public JSONObject toJSONObject()
throws JSONException {
JSONObject jo = new JSONObject();
jo.put(
"type",
STATS_TYPE.getString());
jo.put(
"statisticsID",
statisticsId.getString());
jo.put(
"field_identifier",
getFieldName());
jo.put(
"cardinality",
loglog.cardinality());
jo.put(
"precision",
precision);
return jo;
}
public static class FeatureHyperLogLogConfig implements
StatsConfig<SimpleFeature>
{
/**
*
*/
private static final long serialVersionUID = 6309383518148391565L;
private int precision = 16;
public FeatureHyperLogLogConfig() {
}
public FeatureHyperLogLogConfig(
int precision ) {
super();
this.precision = precision;
}
public int getPrecision() {
return precision;
}
public void setPrecision(
int precision ) {
this.precision = precision;
}
@Override
public DataStatistics<SimpleFeature> create(
final ByteArrayId dataAdapterId,
final String fieldName ) {
return new FeatureHyperLogLogStatistics(
dataAdapterId,
fieldName,
precision);
}
}
}