package tr.gov.ulakbim.jDenetX.clusterers.streamkm;
/**
* @author Marcel R. Ackermann, Christiane Lammersen, Marcus Maertens, Christoph Raupach,
* Christian Sohler, Kamil Swierkot
*/
public class BucketManager {
protected class Bucket {
int cursize;
Point[] points;
Point[] spillover;
public Bucket(int d, int maxsize) {
this.cursize = 0;
this.points = new Point[maxsize];
this.spillover = new Point[maxsize];
for (int i = 0; i < maxsize; i++) {
this.points[i] = new Point(d);
this.spillover[i] = new Point(d);
}
}
}
protected int numberOfBuckets;
protected int maxBucketsize;
protected Bucket[] buckets;
protected MTRandom clustererRandom;
protected TreeCoreset treeCoreset;
/**
* initializes a bucketmanager for n points with bucketsize maxsize and dimension d
*/
public BucketManager(int n, int d, int maxsize, MTRandom random) {
this.clustererRandom = random;
this.numberOfBuckets = (int) Math.ceil(Math.log((double) n / (double) maxsize) / Math.log(2)) + 2;
this.maxBucketsize = maxsize;
this.buckets = new Bucket[this.numberOfBuckets + 1];
for (int i = 0; i < this.numberOfBuckets + 1; i++) {
this.buckets[i] = new Bucket(d, maxsize);
}
this.treeCoreset = new TreeCoreset();
//printf("Created manager with %d buckets of dimension %d \n",this.numberOfBuckets,d);
}
/**
* inserts a single point into the bucketmanager
*/
void insertPoint(Point p) {
//check if there is enough space in the first bucket
int cursize = this.buckets[0].cursize;
if (cursize >= this.maxBucketsize) {
//printf("Bucket 0 full \n");
//start spillover process
int curbucket = 0;
int nextbucket = 1;
//check if the next bucket is empty
if (this.buckets[nextbucket].cursize == 0) {
//copy the bucket
int i;
for (i = 0; i < this.maxBucketsize; i++) {
this.buckets[nextbucket].points[i] = this.buckets[curbucket].points[i].clone();
//copyPointWithoutInit: we should not copy coordinates?
}
//bucket is now full
this.buckets[nextbucket].cursize = this.maxBucketsize;
//first bucket is now empty
this.buckets[curbucket].cursize = 0;
cursize = 0;
} else {
//printf("Bucket %d full \n",nextbucket);
//copy bucket to spillover and continue
int i;
for (i = 0; i < this.maxBucketsize; i++) {
this.buckets[nextbucket].spillover[i] = this.buckets[curbucket].points[i].clone();
//copyPointWithoutInit: we should not copy coordinates?
}
this.buckets[0].cursize = 0;
cursize = 0;
curbucket++;
nextbucket++;
/*
as long as the next bucket is full output the coreset to the spillover of the next bucket
*/
while (nextbucket < this.numberOfBuckets && this.buckets[nextbucket].cursize == this.maxBucketsize) {
//printf("Bucket %d full \n",nextbucket);
this.treeCoreset.unionTreeCoreset(this.maxBucketsize, this.maxBucketsize,
this.maxBucketsize, p.dimension,
this.buckets[curbucket].points, this.buckets[curbucket].spillover,
this.buckets[nextbucket].spillover, this.clustererRandom);
//bucket now empty
this.buckets[curbucket].cursize = 0;
curbucket++;
nextbucket++;
}
this.treeCoreset.unionTreeCoreset(this.maxBucketsize, this.maxBucketsize,
this.maxBucketsize, p.dimension,
this.buckets[curbucket].points, this.buckets[curbucket].spillover,
this.buckets[nextbucket].points, this.clustererRandom);
this.buckets[curbucket].cursize = 0;
this.buckets[nextbucket].cursize = this.maxBucketsize;
}
}
//insert point into the first bucket
this.buckets[0].points[cursize] = p.clone();
//copyPointWithoutInit: we should not copy coordinates?
this.buckets[0].cursize++;
}
/**
* It may happen that the manager is not full (since n is not always a power of 2). In this case we extract the coreset
* from the manager by computing a coreset of all nonempty buckets
* <p/>
* Case 1: the last bucket is full
* => n is a power of 2 and we return the contents of the last bucket
* <p/>
* Case2: the last bucket is not full
* => we compute a coreset of all nonempty buckets
* <p/>
* this operation should only be called after the streaming process is finished
*/
Point[] getCoresetFromManager(int d) {
Point[] coreset = new Point[d];
int i = 0;
if (this.buckets[this.numberOfBuckets - 1].cursize == this.maxBucketsize) {
coreset = this.buckets[this.numberOfBuckets - 1].points;
} else {
//find the first nonempty bucket
for (i = 0; i < this.numberOfBuckets; i++) {
if (this.buckets[i].cursize != 0) {
coreset = this.buckets[i].points;
break;
}
}
//as long as there is a nonempty bucket compute a coreset
int j;
for (j = i + 1; j < this.numberOfBuckets; j++) {
if (this.buckets[j].cursize != 0) {
//output the coreset into the spillover of bucket j
this.treeCoreset.unionTreeCoreset(this.maxBucketsize, this.maxBucketsize,
this.maxBucketsize, d,
this.buckets[j].points, coreset,
this.buckets[j].spillover, this.clustererRandom);
coreset = this.buckets[j].spillover;
}
}
}
return coreset;
}
}