// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.cardinality;
import java.util.List;
import org.apache.commons.lang.NotImplementedException;
import org.talend.dataquality.common.inference.Analyzer;
import org.talend.dataquality.common.inference.ResizableList;
import com.clearspring.analytics.stream.cardinality.HyperLogLog;
/**
* Using hyperloglog estimating cardinalities (distinct count)<br/>
* Parameter <code>rsd</code> can be set in order to have a better balance between precision and space. by {
* {@link #setRelativeStandardDeviation(int)}<br/>
* See more description about this parameter at <a href=
* "https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/stream/cardinality/HyperLogLog.java#L93"
* >Hyper log log parameter</a>
*
* @author zhao
*/
public class CardinalityHLLAnalyzer implements Analyzer<CardinalityHLLStatistics> {
private static final long serialVersionUID = -5813206492367921798L;
int rsd = 20; // relative standard deviation
private ResizableList<CardinalityHLLStatistics> cardinalityStatistics = null;
@Override
public void init() {
cardinalityStatistics = new ResizableList<>(CardinalityHLLStatistics.class);
}
/**
* Set the hyper log log parameter
*
* @param rsd
*/
public void setRelativeStandardDeviation(int rsd) {
this.rsd = rsd;
}
@Override
public boolean analyze(String... record) {
if (record == null) {
return true;
}
cardinalityStatistics.resize(record.length);
for (int i = 0; i < record.length; i++) {
final CardinalityHLLStatistics cardStats = cardinalityStatistics.get(i);
if (cardStats.getHyperLogLog() == null) {
cardStats.setHyperLogLog(new HyperLogLog(rsd));
}
cardStats.add(record[i]);
cardStats.incrementCount();
}
return true;
}
@Override
public void end() {
}
@Override
public Analyzer<CardinalityHLLStatistics> merge(Analyzer<CardinalityHLLStatistics> another) {
throw new NotImplementedException();
}
@Override
public List<CardinalityHLLStatistics> getResult() {
return cardinalityStatistics;
}
@Override
public void close() throws Exception {
}
}