package com.alimama.mdrill.distinct;
import gnu.trove.set.hash.TIntHashSet;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.WeakHashMap;
import java.util.zip.CRC32;
import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DistinctCount implements Writable {
public static Logger LOG = LoggerFactory.getLogger(DistinctCount.class);
private TIntHashSet uniq = DistinctCount.createmap();
private Integer maxUniqSize = 10000;
private Integer currentTimes = 1;
private Integer TimesStep = 2;
private DistinctCountAutoAjuest autoAjust=null;
public void setAutoAjust(DistinctCountAutoAjuest autoAjust) {
this.autoAjust = autoAjust;
}
private static TIntHashSet createmap()
{
return new TIntHashSet(10,0.75f,0);
}
@Override
public void readFields(DataInput in) throws IOException {
this.maxUniqSize = in.readInt();
this.currentTimes = in.readInt();
this.TimesStep = in.readInt();
this.uniq.clear();
int usize = in.readInt();
for (int i = 0; i < usize; i++) {
this.uniq.add(in.readInt());
}
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(this.maxUniqSize);
out.writeInt(this.currentTimes);
out.writeInt(this.TimesStep);
out.writeInt(this.uniq.size());
for (Integer uin : this.uniq.toArray()) {
out.writeInt(uin);
}
}
public DistinctCount(byte[] zipdata) {
if (zipdata.length <= 0) {
return;
}
try {
ByteArrayInputStream bis2 = new ByteArrayInputStream(zipdata);
DataInputStream in2 = new DataInputStream(bis2);
this.readFields(in2);
in2.close();
bis2.close();
} catch (Exception e) {
}
}
public byte[] toBytes() {
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dout = new DataOutputStream(bos);
this.write(dout);
byte[] data = bos.toByteArray();
bos.close();
return data;
} catch (IOException e) {
e.printStackTrace();
}
return new byte[0];
}
public DistinctCount() {
}
public void set(String item) {
CRC32 crc32 = new CRC32();
crc32.update(new String(item).getBytes());
long crcvalue = crc32.getValue();
this.add((int) crcvalue);
}
public void set(double item) {
CRC32 crc32 = new CRC32();
crc32.update(ByteUtil.getBytes(item));
long crcvalue = crc32.getValue();
this.add((int) crcvalue);
}
public void set(int item) {
CRC32 crc32 = new CRC32();
crc32.update(ByteUtil.getBytes(item));
long crcvalue = crc32.getValue();
this.add((int) crcvalue);
}
public Long getValue() {
return (long) this.uniq.size() * currentTimes;
}
int last_increateTime=1;
public int getIncreateTimes(boolean fromcache)
{
if(fromcache)
{
return this.last_increateTime;
}
int increateTime=1;
int times=this.currentTimes;
while(times>1)
{
times=times/TimesStep;
increateTime++;
}
this.last_increateTime=increateTime;
return increateTime;
}
private void add(Integer crc) {
if (this.isallow(crc)) {
boolean isadd = this.uniq.add(crc);
if(!isadd)
{
return ;
}
if(autoAjust!=null)
{
if (this.uniq.size() > getMaxUniqSize()) {
autoAjust.ajust();
}
}
if (this.uniq.size() > getMaxUniqSize()) {
int newtimes = currentTimes * TimesStep;
this.reFilter(newtimes);
}
}
}
public void reAjuest()
{
while (this.uniq.size() > getMaxUniqSize()) {
int newtimes = currentTimes * TimesStep;
this.reFilter(newtimes);
}
}
private boolean isallow(long crc) {
if (crc % this.currentTimes == 0) {
return true;
}
return false;
}
private void reFilter(int times) {
if (this.currentTimes == times) {
return;
}
this.currentTimes = times;
TIntHashSet data = DistinctCount.createmap();
for (Integer ucrc : this.uniq.toArray()) {
if (this.isallow(ucrc)) {
data.add(ucrc);
}
}
this.uniq = data;
}
public Integer getMaxUniqSize() {
return maxUniqSize;
}
public void setMaxUniqSize(Integer maxUniqSize) {
this.maxUniqSize = maxUniqSize;
}
public void merge(DistinctCount dc) {
int newtimes = Math.max(dc.currentTimes, this.currentTimes);
this.reFilter(newtimes);
dc.reFilter(newtimes);
for (Integer ucrc : dc.uniq.toArray()) {
this.add(ucrc);
}
}
public static class DistinctCountAutoAjuest{
private WeakHashMap<Object,DistinctCount> ajust=new WeakHashMap<Object,DistinctCount>();
int size=100000;
public DistinctCountAutoAjuest(int size)
{
this.size=size;
}
public DistinctCount create(Object key)
{
DistinctCount dist=new DistinctCount();
ajust.put(key, dist);
this.autoAjust();
dist.setAutoAjust(this);
return dist;
}
public DistinctCount put(Object key,DistinctCount dist)
{
if(dist==null)
{
return null;
}
ajust.put(key, dist);
this.autoAjust();
dist.setAutoAjust(this);
return dist;
}
public DistinctCount remove(Object key)
{
if(key==null)
{
return null;
}
DistinctCount rtn= ajust.remove(key);
this.autoAjust();
return rtn;
}
public void ajust()
{
int zjustsize=Math.max(ajust.size(), 1);
int persize=size/zjustsize;
if(persize<20)
{
persize=20;
}
// LOG.info("autoAjust ajust:"+persize+",size:"+size+",zjustsize:"+zjustsize+",last_persize:"+this.last_persize);
this.last_persize=persize;
this._ajust(zjustsize, persize);
}
private void _ajust(int zjustsize,int persize)
{
ArrayList<DistinctCount> list=new ArrayList<DistinctCount>(this.size+1);
list.addAll(this.ajust.values());
long totalsize=0l;
for(DistinctCount d:list)
{
int t=Math.max(d.getIncreateTimes(false), 1);
totalsize+=t;
}
double pre_uniqsize=Math.max((totalsize*1.0/zjustsize), 1d);
double pre_uniqsize_max=pre_uniqsize*3;
double pre_uniqsize_min=pre_uniqsize/2;
totalsize=0l;
for(DistinctCount d:list)
{
int t=d.getIncreateTimes(true);
if(t>pre_uniqsize_max)
{
t=(int) pre_uniqsize_max;
}else if(t<pre_uniqsize_min)
{
t=(int) pre_uniqsize_min;
}
t=Math.max(t, 1);
totalsize+=t;
}
pre_uniqsize=Math.max((totalsize*1.0/zjustsize), 1);
int allowSize=0;
try{
for(DistinctCount d:list)
{
int t=d.getIncreateTimes(true);
if(t>pre_uniqsize_max)
{
t=(int) pre_uniqsize_max;
}else if(t<pre_uniqsize_min)
{
t=(int)pre_uniqsize_min;
}
t=Math.max(t, 1);
double times=t/pre_uniqsize;
int uniqsize=(int)(persize*times);
if(uniqsize<20)
{
uniqsize=20;
}
if(uniqsize>this.size)
{
uniqsize=this.size;
}
allowSize+=uniqsize;
int lastuniqsize=d.getMaxUniqSize();
d.setMaxUniqSize(uniqsize);
if(lastuniqsize>uniqsize)
{
d.reAjuest();
}
}
}catch(Throwable e){}
LOG.info("autoAjust _ajust:"+persize+",size:"+size+",zjustsize:"+zjustsize+",avg:"+pre_uniqsize+",max:"+pre_uniqsize_max+",min:"+pre_uniqsize_min+",allowSize:"+allowSize);
}
int last_persize=0;
private void autoAjust()
{
int zjustsize=Math.max(ajust.size(), 1);
int persize=size/zjustsize;
int diff=Math.abs((zjustsize*this.last_persize)-size);
if(diff<102400)
{
return ;
}
if(persize<20)
{
persize=20;
}
if(persize==this.last_persize)
{
return ;
}
// LOG.info("autoAjust persize:"+persize+",size:"+size+",zjustsize:"+zjustsize+",last_persize:"+this.last_persize+",diff:"+diff);
this.last_persize=persize;
this._ajust(zjustsize, persize);
}
}
}