/*
* Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.util.IntIntHashMap;
import org.h2.value.Value;
import org.h2.value.ValueInt;
/**
* Data stored while calculating a SELECTIVITY aggregate.
*/
class AggregateDataSelectivity extends AggregateData {
private long count;
private IntIntHashMap distinctHashes;
private double m2;
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
//是基于某个表达式(多数是单个字段)算不重复的记录数所占总记录数的百分比
//org.h2.engine.Constants.SELECTIVITY_DISTINCT_COUNT默认是1万,这个值不能改,
//对统计值影响很大。通常这个值越大,统计越精确,但是会使用更多内存。
//SELECTIVITY越大,说明重复的记录越少,在选择索引时更有利。
count++;
if (distinctHashes == null) {
distinctHashes = new IntIntHashMap();
}
int size = distinctHashes.size();
if (size > Constants.SELECTIVITY_DISTINCT_COUNT) {
distinctHashes = new IntIntHashMap();
m2 += size;
}
int hash = v.hashCode();
// the value -1 is not supported
distinctHashes.put(hash, 1);
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
if (distinct) {
count = 0; //加distinct时,意思就是没有重复的字段值了,所以SELECTIVITY就是0
}
Value v = null;
int s = 0;
if (count == 0) {
s = 0;
} else {
m2 += distinctHashes.size();
m2 = 100 * m2 / count;
s = (int) m2;
s = s <= 0 ? 1 : s > 100 ? 100 : s;
}
v = ValueInt.get(s);
return v.convertTo(dataType);
}
}