package org.streaminer.stream.quantile.rss;
import org.streaminer.stream.quantile.ExactQuantiles;
import org.streaminer.stream.quantile.QuantilesException;
import java.io.Serializable;
import java.util.LinkedList;
/**
* Buckets are the main ingredient for sliding windows.
* They hold a specified number of elements, and join them as one.
*
* @author Carsten Przyluczky
*/
public class Bucket implements Serializable {
private static final long serialVersionUID = 1L;
double epsilon = 0.0f;
double delta = 0.0f;
int elementCount = 0;
int maxValue;
int subsetCount = 0;
int elementsInGroupCount = 0;
int groupCount = 0;
SubsetTree subsetTrees[];
// ---------------------------------------------------------------------------------- constructor
public Bucket(float epsilon, float delta, int maxValue){
this.delta = delta;
this.epsilon = epsilon;
this.maxValue = maxValue;
calculateSubsetCount();
createNewSubsets();
}
// ---------------------------------------------------------------------------------- main functions
/**
* This method creates the needed count of {@link SubsetTree}'s
*/
void createNewSubsets(){
subsetTrees = new SubsetTree[subsetCount];
for(int i = 0; i < subsetCount;i++){
subsetTrees[i] = new SubsetTree((float)epsilon,(float)delta, maxValue);
}
}
/**
* This method adds the item to all of its subsetTrees
*
* @param item this item will be processed i.e. added to the data structure
*/
public void process(double item){
elementCount++;
for(SubsetTree subsetTree : subsetTrees){
subsetTree.addElement(item);
}
}
/**
* This method calculates the needed subset count, to satisfy the needs
* for the maximum.
*/
void calculateSubsetCount(){
double logU = log2(maxValue);
double term1 = 3.0 * log2(logU / delta);
double term2 = 8.0 * logU / (epsilon * epsilon);
elementsInGroupCount = (int)term2 / 100;
groupCount = (int)term1;
subsetCount = groupCount * elementsInGroupCount ;
}
/**
* This method receives a list of intervals, and estimates their element count
* based on the rss technique.
*
* @param intervals the list of intervals that should be estimated
* @return the final estimation for the bucket
*/
public Double estimateIntervals(LinkedList<Interval> intervals ) throws QuantilesException {
int subsetTreeIndex = 0;
ExactQuantiles exactQuantiles = new ExactQuantiles();
// build the averages of all groups
for(int g = 0; g < groupCount; g++){
double groupAverage = 0.0;
for(int e = 0; e < elementsInGroupCount;e++){
double groupEstimation = 0.0f;
for(Interval interval : intervals){
double elementEstimation = subsetTrees[subsetTreeIndex].estimateIntervall(interval);
if(elementEstimation != RSSQuantiles.CANT_ESTIMATE){
groupEstimation += elementEstimation;
}
}
groupAverage += groupEstimation;
subsetTreeIndex++;
if(subsetTreeIndex > subsetTrees.length){
System.out.println(subsetTrees.length + " index = "+ subsetTreeIndex + " count " + subsetCount);
}
}
groupAverage /= (double)elementsInGroupCount;
exactQuantiles.offer(groupAverage);
}
// the median of all group results is the answer
return exactQuantiles.getQuantile(0.5f);
}
/**
* returns true if the bucket exceeded its capacity
* @return true if the bucket exceeded its capacity
*/
public boolean IsFull(){
return elementCount >= RSSQuantiles.ELEMENTS_PER_BUCKET;
}
// ---------------------------------------------------------------------------------- getter and setter
public int getElementCount(){
return elementCount;
}
public static double log2(double number){
return Math.log10(number)/Math.log10(2d);
}
}