GKQuantiles.java example

Explorer

streaminer-master
- src
  - main
    - java
      - org
        streaminer
        stream
        avg
        ExponentialMovingAverage.java
        IAverage.java
        MovingAverage.java
        SimpleEWMA.java
        TEWMA.java
        VariableEWMA.java
        cardinality
        AdaptiveCounting.java
        BJKST.java
        CardinalityMergeException.java
        CountThenEstimate.java
        FlajoletMartin.java
        HyperLogLog.java
        HyperLogLogPlus.java
        IBaseCardinality.java
        IRichCardinality.java
        KMinValues.java
        LinearCounting.java
        LogLog.java
        RegisterSet.java
        change
        AbsoluteChange.java
        AbstractChange.java
        AbstractCusum.java
        CusumNP.java
        RelativeChange.java
        VariationalChange.java
        classifier
        AROWClassifier.java
        AbstractClassifier.java
        AbstractMultiClassifier.java
        BWinnowClassifier.java
        Classifier.java
        MajorityClass.java
        MultiClassPAClassifier.java
        MultiRandomClassifier.java
        PAClassifier.java
        Perceptron.java
        RandomClassifier.java
        SimpleClassifier.java
        WinnowClassifier.java
        bayes
        BoundedBayes.java
        LossyBayes.java
        MultiBayes.java
        MultiLossyBayes.java
        MultiTopkBayes.java
        NaiveBayes.java
        NaiveBayesWOP.java
        StickySamplingBayes.java
        TopKBayes.java
        tree
        BTreeNode.java
        BinaryTreeNode.java
        ChernoffSplitCriterion.java
        ChernoffStatistics.java
        HoeffdingTree.java
        HoeffdingTreeModel.java
        HoeffdingTreeNode.java
        InnerNode.java
        LeafNode.java
        LinearRegression.java
        ModelTree.java
        NodeInfo.java
        QualityCriterion.java
        RTree.java
        RTreeStatistics.java
        Range.java
        RegressionTree.java
        RegressionTreeModel.java
        RegressionTreeNode.java
        RegressionTreeStatistics.java
        SplitCriterion.java
        TreeNode.java
        Visitor.java
        clustering
        IClusterer.java
        KMeans.java
        birch
        CFEntry.java
        CFEntryPair.java
        CFNode.java
        CFTree.java
        data
        Data.java
        DataImpl.java
        DataUtils.java
        entropy
        EntropyExact.java
        EntropySketch.java
        IEntropy.java
        eval
        ConfusionMatrix.java
        TableOfConfusion.java
        frequency
        AMSSketch.java
        BaseFrequency.java
        CCFCSketch.java
        CGT.java
        CountMinSketch.java
        CountMinSketchAlt.java
        CountSketch.java
        FrequencyException.java
        IBaseFrequency.java
        IFrequencyList.java
        IRichFrequency.java
        ISimpleFrequency.java
        ITimeDecayFrequency.java
        LossyCounting.java
        Majority.java
        MisraGries.java
        RealCounting.java
        SimpleTopKCounting.java
        SpaceSaving.java
        StickySampling.java
        TimeDecayCountMinSketch.java
        TimeDecayRealCounting.java
        decay
        DecayFormula.java
        ExpDecayFormula.java
        LinDecayFormula.java
        LogDecayFormula.java
        Quantity.java
        topk
        ConcurrentStreamSummary.java
        Frequent.java
        ITopK.java
        StochasticTopper.java
        StreamSummary.java
        util
        CountEntry.java
        CountEntryWithMaxError.java
        Counter.java
        ISampleSet.java
        SampleSet.java
        ScoredItem.java
        histogram
        spdt
        ArrayBinReservoir.java
        ArrayCategoricalTarget.java
        Bin.java
        BinReservoir.java
        BinUpdateException.java
        CategoricalTarget.java
        Gap.java
        GroupTarget.java
        Histogram.java
        MapCategoricalTarget.java
        MixedInsertException.java
        NumericTarget.java
        SimpleTarget.java
        SumOutOfRangeException.java
        SumResult.java
        Target.java
        TreeBinReservoir.java
        learner
        AbstractRegressor.java
        Learner.java
        LearnerUtils.java
        Regressor.java
        mapper
        AddMax.java
        AddMinMax.java
        AttributeFilter.java
        ExtractDouble.java
        IMapper.java
        Identity.java
        KeyMapper.java
        Normalization.java
        NumericalBinning.java
        SelectFeaturesMapper.java
        membership
        AbstractFilter.java
        BloomCalculations.java
        BloomFilter.java
        BloomFilterAlt.java
        CountingBloomFilter.java
        CuckooFilter.java
        DLeftCountingBloomFilter.java
        DynamicBloomFilter.java
        HashFunction.java
        IFilter.java
        Key.java
        ODTDBloomFilter.java
        RemoveScheme.java
        RetouchedBloomFilter.java
        StableBloomFilter.java
        TimingBloomFilter.java
        VarCountingBloomFilter.java
        misc
        MovingWindowDelta.java
        Rate.java
        Significance.java
        Statistics.java
        model
        DescriptionModel.java
        Distribution.java
        HyperplaneModel.java
        Model.java
        NominalDistributionModel.java
        NumericalDistributionModel.java
        PredictionModel.java
        SelectiveDescriptionModel.java
        SlidingWindow.java
        StreamDistribution.java
        norm
        StableSketch.java
        quantile
        CKMSQuantiles.java
        EnsembleQuantiles.java
        ExactQuantiles.java
        ExactQuantilesAll.java
        Frugal2U.java
        GKQuantiles.java
        GroupTree.java
        IQuantiles.java
        MPQuantiles.java
        QDigest.java
        QuantilesException.java
        SimpleQuantiles.java
        SumQuantiles.java
        TDigest.java
        WindowSketchQuantiles.java
        rss
        Bucket.java
        Interval.java
        RSSQuantiles.java
        Subset.java
        SubsetTree.java
        sampler
        BernoulliSampler.java
        ChainSampler.java
        FrequentSampler.java
        ISampleList.java
        ISampler.java
        L0Sampler.java
        ReservoirSampler.java
        SpaceSavingSampler.java
        SystematicSampler.java
        WRSampler.java
        WeightedRandomSampler.java
        gamma
        GammaFunction.java
        R.java
        Z.java
        sre
        OneSparseRecoveryEstimator.java
        SSparseRecoveryEstimator.java
        wavelet
        HaarWaveletTransform.java
        IWavelet.java
        util
        AbstractIterator.java
        ArrayUtils.java
        Bits.java
        ByteArrayTable.java
        ByteUtil.java
        DoublyLinkedList.java
        ExternalizableUtil.java
        IBuilder.java
        ListNode2.java
        Lists.java
        Pair.java
        Preconditions.java
        QuotedStringTokenizer.java
        SizeOf.java
        UnsignedIntComparator.java
        Varint.java
        distance
        CosineDistance.java
        Kernel.java
        LinearDistance.java
        Radius.java
        SquaredDistance.java
        distribution
        ZipfDistribution.java
        hash
        Hash.java
        HashUtils.java
        JenkinsHash.java
        Lookup3Hash.java
        MurmurHash.java
        MurmurHash3.java
        SpookyHash.java
        SpookyHash32.java
        SpookyHash64.java
        factory
        HashFunctionFactory.java
        SimpleHashFactory.java
        TwoUniversalHashFactory.java
        function
        HashFunction.java
        JenkinsHashFunction.java
        MurmurHashFunction.java
        SimpleHashFunction.java
        TwoUniversalHashFunction.java
        math
        MathHelper.java
        MathUtil.java
        NumberUtil.java
        PowerOfTwo.java
        Prime.java
  - test
    - java
      - org
        streaminer
        stream
        avg
        TEWMATest.java
        benchmark
        CountMinSketchBenchmark.java
        cardinality
        BJKSTTest.java
        FlajoletMartinTest.java
        KMinValuesTest.java
        RegisterSetTest.java
        TestAdaptiveCounting.java
        TestCountThenEstimate.java
        TestHyperLogLog.java
        TestHyperLogLogPlus.java
        TestICardinality.java
        TestLinearCounting.java
        TestLogLog.java
        change
        AbsoluteChangeTest.java
        ExactSolution.java
        StreamGenerator.java
        classifier
        bayes
        NaiveBayesTest.java
        entropy
        EntropySketchTest.java
        frequency
        AMSSketchTest.java
        CCFCSketchTest.java
        CGTTest.java
        CountMinSketchAltTest.java
        SimpleTopKCountingTest.java
        SpaceSavingTest.java
        StreamGenerator.java
        TimeDecayCountMinSketchTest.java
        topk
        TestConcurrentStreamSummary.java
        TestStochasticTopper.java
        TestStreamSummary.java
        util
        TestSampleSet.java
        membership
        BloomFilterAltTest.java
        BloomFilterTest.java
        CountingBloomFilterTest.java
        CuckooFilterTest.java
        DLeftCountingBloomFilterTest.java
        ODTDBloomFilterTest.java
        StableBloomFilterTest.java
        TimingBloomFilterTest.java
        VarCountingBloomFilterTest.java
        model
        NominalDistributionModelTest.java
        norm
        StableSketchTest.java
        quantile
        CKMSQuantilesTest.java
        Frugal2UTest.java
        GroupTreeTest.java
        MPQuantilesTest.java
        QDigestTest.java
        TDigestTest.java
        sampler
        ChainSamplerTest.java
        util
        TestDoublyLinkedList.java
        hash
        Lookup3HashTest.java
        MurmurHashTest.java
        SpookyHash32Test.java
        SpookyHash64Test.java

package org.streaminer.stream.quantile;

import java.io.Serializable;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;

/**
 * This class is an implementation of the Greenwald-Khanna algorithm for computing
 * epsilon-approximate quantiles of large data sets. In its pure form it is an offline
 * algorithm. But it is used as a black box by many online algorithms for computing
 * epsilon-approximate quantiles on data streams.<br>
 * Our implementation widely adapts the original idea published by <i>Michael Greenwald
 * </i> and <i>Sanjeev Khanna</i> in their paper <i>"Space-Efficient Online Computation
 * of Quantile Summaries"</i>. Contrary to their idea this implementation uses a list 
 * rather than a tree structure to maintain the elements.
 * 
 * @author Markus Kokott, Carsten Przyluczky
 *
 */
public class GKQuantiles implements IQuantiles<Double> {
    private List<Tuple> summary;
    private double minimum;
    private double maximum;
    private int stepsUntilMerge;
    
    /**
     * GK needs 1 / (2 * epsilon) elements to complete it's initial phase
     */
    private boolean initialPhase;
    private Integer count;
    
    /**
     * This value specifies the error bound.
     */
    protected double epsilon;
    
    public GKQuantiles() {
        this(0.05);
    }

    /**
     * Creates a new GKQuantiles object that computes epsilon-approximate quantiles.
     *  
     * @param epsilon The maximum error bound for quantile estimation.
     */
    public GKQuantiles(double epsilon) {
        if (epsilon <= 0 || epsilon >= 1) {
            throw new RuntimeException("An appropriate epsilon value must lay between 0 and 1.");
        }
        
        setEpsilon(epsilon);
    }
    
    public void setEpsilon(double epsilon) {
        this.epsilon = epsilon;
        this.minimum = Double.MAX_VALUE;
        this.maximum = Double.MIN_VALUE;
        Double mergingSteps = Math.floor(1.0 / (2.0 * epsilon));
        this.stepsUntilMerge = mergingSteps.intValue();
        this.summary = new CopyOnWriteArrayList<Tuple>();
        this.count = 0;
        this.initialPhase = true;
    }
    
    @Override
    public void offer(Double value) {
        insertItem(value);
        incrementCount();
        if (count % stepsUntilMerge == 0 && !initialPhase){
            compress();
        }
    }

    /**
     * Estimates appropriate quantiles (i.e. values that holds epsilon accuracy). Note that if
     * the query parameter doesn't lay in [0,1] <code>Double.NaN</code> is returned! The same
     * result will be returned if an empty instance of GK is queried.
     * 
     * @param q a <code>float</code> value
     * @return an estimated quantile represented by a {@link Double}. Will return {@link Double#NaN}
     * if <code>phi</code> isn't between 0 and 1 or this instance of <code>GKQuantiles</code> is empty.
     */
    @Override
    public Double getQuantile(double q) {
        /*--------------------------------------------------------
         * special cases if some queries occur in a very early state
         */
        if (count == 0 || q < 0 || q > 1) {
            return Double.NaN;
        }
        if (count == 1) {
            return minimum;
        }
        if (count == 2) {
            if (q < 0.5){
                return minimum;
            }
            if (q >= 0.5){
                return maximum;
            }
        }
        //---------------------------------------------------------


       int wantedRank = (int) ((q * count.floatValue()));
       int currentMinRank = 0;
       int currentMaxRank = 0;
       Double tolerance = (epsilon * count.doubleValue());

       // if the wanted range is as most epsilon * count ranks smaller than the maximum the maximum
       // will always be an appropriate estimate
       if ( wantedRank > count - tolerance ) {
               return maximum;
       }

       // if the wanted range is as most epsilon * count ranks greater than the minimum the minimum
       // will always be an appropriate estimate
       if ( wantedRank < tolerance ){
               return minimum;
       }

       Tuple lastTuple = summary.get(0);

       Object[] copyOfSummary = summary.toArray();

       // usually a range is estimated during this loop. it's element's value will be returned
       for (int i = 0; i < copyOfSummary.length; i++){
               Tuple currentTuple = (Tuple) copyOfSummary[i];
               currentMinRank += currentTuple.getOffset();
               currentMaxRank = currentMinRank + currentTuple.getRange();

               if (currentMaxRank - wantedRank <= tolerance){
                       lastTuple = currentTuple;
                       if (wantedRank - currentMinRank <= tolerance){

                               return currentTuple.getValue();
                       }
               }
       }

       return lastTuple.getValue();
    }
    
    
    
    /**
     * Checks whether <code>item</code> is a new extreme value (i.e. minimum or maximum) or lays between those values
     * and calls the appropriate insert method.  
     * 
     * @param item {@link Double} value of current element
     */
    private void insertItem(Double item) {
        if (item < minimum) {
            insertAsNewMinimum(item);
            return;
        }

        if (item >= maximum) {
            insertAsNewMaximum(item);
            return;
        }

        insertInBetween(item);
    }
	
    /**
     * This method will be called every time an element arrives whose value is smaller than the value
     * of the current minimum. Contrary to "normal" elements, the minimum's range have to be zero.
     * 
     * @param item - new element with a {@link Double} value smaller than the current minimum of the summary.
     */
    private void insertAsNewMinimum(Double item) {
        minimum = item;
        Tuple newTuple = new Tuple(item, 1, 0);
        summary.add(0, newTuple);
    }
	
    /**
     * This method will be called every time an element arrives whose value is greater than the value
     * of the current maximum. Contrary to "normal" elements, the maximum's range have to be zero.
     * 
     * @param item - new element with a {@link Double} value greater than the current maximum of the summary.
     */
    private void insertAsNewMaximum(Double item) {
        if (item == maximum){
            Tuple newTuple = new Tuple(item, 1, computeRangeForNewTuple(summary.get(summary.size() - 1)));
            summary.add(summary.size() - 2 , newTuple);
        } else {
            maximum = item;
            Tuple newTuple = new Tuple(item, 1, 0);
            summary.add(newTuple);	
        }
    }

    /**
     * Every time a new element gets processed this method is called to insert this element into
     * the summary. During initial phase element's ranges have to be zero. After this phase every
     * new element's range depends on its successor. 
     * 
     * @param item - a new arrived element represented by a {@link Double} value.
     */
    private void insertInBetween(Double item) {		
        Tuple newTuple = new Tuple (item, 1, 0);	

        for (int i = 0; i < summary.size() - 1; i++) {
            Tuple current = summary.get(i);
            Tuple next = summary.get(i + 1);

            if (item >= current.getValue() && item < next.getValue()) {
                // while GK have seen less than 1 / (2*epsilon) elements, all elements must have an
                // offset of 0
                if (!initialPhase){
                    newTuple.setRange(computeRangeForNewTuple(next));
                }

                summary.add(i + 1, newTuple);
                return;
            }
        }
    }

    /**
     * Increments <code>count</code> and ends the initial phase if enough elements have been seen.
     */
    private void incrementCount() {
        count++;
        if (count.equals(stepsUntilMerge)) {
            initialPhase = false;
        }
    }
    
    /**
     * Due to space efficiency the summary is compressed periodically 
     */
    private void compress() {
        List<List<Tuple>> partitions = new LinkedList<List<Tuple>>();
        partitions = getPartitionsOfSummary();

        List<Tuple> mergedSummary = new CopyOnWriteArrayList<Tuple>();

        // just merge tuples per partition and concatenate the single resulting working sets

        mergedSummary.addAll(partitions.get(partitions.size() - 1));

        for (int i=partitions.size() - 2; i > 0; i--){
                mergedSummary.addAll(mergeWorkingSet(partitions.get(i)));
        }

        mergedSummary.addAll(partitions.get(0));

        mergedSummary = sortWorkingSet(mergedSummary);
        summary = mergedSummary;
    }

    /**
     * merges a whole partition and therefore saves space. 
     * 
     * @param workingSet a partition (created by {@link #getPartitionsOfSummary()}) or parts of it
     * @return a {@link LinkedList} of {@link Tuple} containing the merged working set.
     */
    private List<Tuple> mergeWorkingSet(List<Tuple> workingSet) {
        // recursion stops here
        if (workingSet.size() < 2) {
            return workingSet;
        }

        LinkedList<Tuple> mergedWorkingSet = new LinkedList<Tuple>();			// resulting working set
        LinkedList<Tuple> currentWorkingSet = new LinkedList<Tuple>();			// elements for this step of recursion
        LinkedList<Tuple> remainingWorkingSet = new LinkedList<Tuple>();		// remaining elements after this step of recursion
        remainingWorkingSet.addAll(workingSet);

        int index = 1;
        int bandOfChildren = computeBandOfTuple(workingSet.get(0));
        int bandOfParent = computeBandOfTuple(workingSet.get(index));
        currentWorkingSet.add(workingSet.get(0));
        remainingWorkingSet.removeFirst();

        // we are looking for the next tuple that have a greater band than the first element because that
        // element will be the limit for the first element to get merged into
        while (bandOfChildren == bandOfParent && workingSet.size() - 1 > index) {
            // the working set will be partitioned into a working set for the current step of recursion and
            // a partition that contains all elements that have to be processed in later steps
            currentWorkingSet.add(workingSet.get(index));
            remainingWorkingSet.remove(workingSet.get(index));

            index++;
            bandOfParent = computeBandOfTuple(workingSet.get(index));
        }
        Tuple parent = workingSet.get(index);

        // there is no real parent. all elements have the same band
        if (bandOfParent == bandOfChildren) {
            currentWorkingSet.add(parent);
            mergedWorkingSet.addAll(mergeSiblings(currentWorkingSet));
            return mergedWorkingSet;
        }

        int capacityOfParent = computeCapacityOfTuple(parent);

        // an element can be merged into it's parent if the resulting tuple isn't full (i.e. capacityOfParent > 1 after merging)
        while (capacityOfParent > currentWorkingSet.getLast().getOffset() && currentWorkingSet.size() > 1) {
            merge(currentWorkingSet.getLast(), parent);
            currentWorkingSet.removeLast();
            capacityOfParent = computeCapacityOfTuple(parent);
        }

        // checking whether all children were merged into parent or some were left over
        if (currentWorkingSet.isEmpty()) {
            mergedWorkingSet.addAll(mergeWorkingSet(remainingWorkingSet));
        }
        // if there are some children left, some of them can probably be merged into siblings.
        // if there is any child left over, parent can't be merged into any other tuple, so it must be removed
        // from the elements in the remaining working set.
        else {
            remainingWorkingSet.remove(parent);
            mergedWorkingSet.addAll(mergeSiblings(currentWorkingSet));
            mergedWorkingSet.add(parent);
            mergedWorkingSet.addAll(mergeWorkingSet(remainingWorkingSet));
        }

        return mergedWorkingSet;
    }

    /**
     * this method merges elements that have the same band
     * @param workingSet - a {@link LinkedList} of {@link Tuple}
     * @return a {@link LinkedList} of {@link Tuple} with smallest possible size in respect to
     * GKs merging operation.
     */
    private LinkedList<Tuple> mergeSiblings(LinkedList<Tuple> workingSet) {
        // nothing left to merge
        if (workingSet.size() < 2) {
            return workingSet;
        }

        LinkedList<Tuple> mergedSiblings = new LinkedList<Tuple>();

        // it is only possible to merge an element into a sibling, if this sibling is the element's
        // direct neighbor to the right
        Tuple lastSibling = workingSet.getLast();
        workingSet.removeLast();
        boolean canStillMerge = true;

        // as long as the rightmost element can absorb elements, it will absorb his sibling to the left
        while (canStillMerge && !workingSet.isEmpty()) {
            if (this.areMergeable(workingSet.getLast(), lastSibling)) {
                merge(workingSet.getLast(), lastSibling);
                workingSet.removeLast();
            } else {
                canStillMerge = false;
            }
        }
        mergedSiblings.add(lastSibling);

        // recursion
        mergedSiblings.addAll(mergeSiblings(workingSet));

        return mergedSiblings;
    }

    /**
     * call this method to merge the element <code>left</code> into the element <code>right</code>.
     * Please note, that only elements with smaller value and a band not greater than <code>right
     * </code> can be element <code>left</code>.
     * 
     * @param left - element the will be deleted after merging
     * @param right - element that will contain the offset of element <code>left</code> after merging
     */
    private void merge(Tuple left, Tuple right) {
        right.setOffset(right.getOffset() + left.getOffset());
    }
    
    /**
     * The range of an element depends on range and offset of it's succeeding element.
     * This methods computes the current element's range.
     * 
     * @return range of current element as {@link Integer} value
     */
    private Integer computeRangeForNewTuple(Tuple successor) {
        if (initialPhase) {
            return 0;
        }

        //this is how it's done during algorithm detail in the paper
        Double range = 2.0 * epsilon * count.doubleValue();
        range = Math.floor(range);

        //this is the more adequate version presented at section "empirical measurements"
        int successorRange = successor.getRange();
        int successorOffset = successor.getOffset();
        if (successorRange + successorOffset - 1 >= 0) {
            return (successorRange + successorOffset -1);
        }

        return range.intValue();
    }

    /**
     * Partitions a list into {@link LinkedList}s of {@link Tuple}, so that bands of elements 
     * in a single {@link LinkedList} are monotonically increasing.
     * 
     * @return a {@link LinkedList} containing {@link LinkedList}s of {@link Double} which are
     * the partitions of {@link #summary}
     */
    private List<List<Tuple>> getPartitionsOfSummary() {
        List<List<Tuple>> partitions = new LinkedList<List<Tuple>>();
        List<Tuple> workingSet = summary;
        LinkedList<Tuple> currentPartition = new LinkedList<Tuple>();
        Tuple lastTuple;
        Tuple lastButOneTuple;

        // assuring that the minimum and maximum won't appear in a partition with other elements
        Tuple minimum = workingSet.get(0);
        Tuple maximum = workingSet.get(workingSet.size() - 1);
        workingSet.remove(0);
        workingSet.remove(workingSet.size() - 1);

        // adding the minimum as the first element into partitions
        currentPartition = new LinkedList<Tuple>();
        currentPartition.add(minimum);
        partitions.add(currentPartition);
        currentPartition = new LinkedList<Tuple>();

        // nothing left to partitioning
        if (workingSet.size() < 2) {
            partitions.add(workingSet);
            // adding the maximum as the very last element into partitions
            currentPartition = new LinkedList<Tuple>();
            currentPartition.add(maximum);
            partitions.add(currentPartition);
            return partitions;
        }

        // we process the working set from the very last element to the very first one 
        while (workingSet.size() >= 2) {
            lastTuple = workingSet.get(workingSet.size() - 1);
            lastButOneTuple = workingSet.get(workingSet.size() - 2);
            currentPartition.addFirst(lastTuple);

            // every time we find an element whose band is greater than the current one the current partition
            // ended and we have to add a new partition to the resulting list
            if (isPartitionBorder(lastButOneTuple, lastTuple)) {
                partitions.add(currentPartition);
                currentPartition = new LinkedList<Tuple>();
            } else {
                // here got's the last element inserted into an partition
                if (workingSet.size() == 2) {
                    currentPartition.addFirst(lastButOneTuple);
                }
            }
            workingSet.remove(workingSet.size() - 1);
        }

        partitions.add(currentPartition);

        // adding the maximum as a partition of it's own at the very last position
        currentPartition = new LinkedList<Tuple>();
        currentPartition.add(maximum);
        partitions.add(currentPartition);

        return partitions;
    }

    /**
     * Call this method to get the current capacity of an element.
     * 
     * @param tuple - a {@link Tuple}
     * @return {@link Integer} value representing the <code>tuple</code>'s capacity
     */
    private Integer computeCapacityOfTuple(Tuple tuple) {
        Integer offset = tuple.getOffset();
        Double currentMaxCapacity = Math.floor(2.0 * epsilon * count);
        return (currentMaxCapacity.intValue() - offset);	
    }

    /**
     * A tuple's band depend on the number of seen elements (<code>count</code>) and the 
     * tuple's range.
     * <ul>
     * <li> While GK hasn't finished it's initial phase, all elements have to be put into a
     * band of their own. This is done using a band -1.
     * <li> If count and range are logarithmically equal the tuple's band will be 0
     * <li> Else the tuple's band will be a value between 1 and <i>log(2*epsilon*count)</i>
     * </ul>
     * Please refer to the paper if you are interested in the formula for computing bands. 
     * 
     * @param tuple - a {@link Tuple}
     * @return {@link Integer} value specifying <code>tuple</code>'s band
     */
    private Integer computeBandOfTuple(Tuple tuple) {
        Double p = Math.floor(2 * epsilon * count);

        // this will be true for new tuples
        if (areLogarithmicallyEqual(p, tuple.getRange().doubleValue())) {
            return 0;
        }

        // initial phase
        if (tuple.getRange() == 0) {
            return -1;
        }

        double alpha = 0;
        double lowerBound = 0d;
        double upperBound = 0d;

        while (alpha < (Math.log(p) / Math.log(2))) {
            alpha++;
            lowerBound = p - Math.pow(2, alpha) - (p % Math.pow(2, alpha));

            if (lowerBound <= tuple.getRange()) {
                upperBound = p - Math.pow(2, alpha - 1) - (p % Math.pow(2, alpha - 1));

                if (upperBound >= tuple.getRange()) {
                        return (int) alpha;
                }
            }
        }

        return (int) alpha;
    }

    /**
     * Checks if two given values are logarithmically equal, i.e. the floored logarithm of
     * <code>valueOne</code> equals the floored logarithm of <code>valueTwo</code>. 
     * @param valueOne - a {@link Double} representing a {@link Tuple}s band
     * @param valueTwo - a {@link Double} representing a {@link Tuple}s band
     * @return <code>true</code> if both values are logarithmically equal
     */
    private boolean areLogarithmicallyEqual(Double valueOne, Double valueTwo) {
        if (Math.floor(Math.log(valueOne)) == Math.floor(Math.log(valueTwo))) {
            return true;
        } else {
            return false;
        }
    }
    /**
     * To check whether a pair of elements are mergeable or not you should use this method. Its
     * decision takes into account the bands and values of the given elements.
     *  
     * @param tuple The element that will be deleted after merging.
     * @param parent The element that will absorb <code>tuple</code> during merge.
     * @return <code>true</code> if given elements are mergeable or <code>false</code> else.
     */
    private boolean areMergeable(Tuple tuple, Tuple parent) {
        int capacityOfParent = computeCapacityOfTuple(parent);

        // return true if parent's capacity suffices to absorb tuple and tuple's band isn't greater than parent's
        if (capacityOfParent > tuple.getOffset() && computeBandOfTuple(parent) >= computeBandOfTuple(tuple)) {
            return true;
        }

        return false;
    }

    /**
     * Bands of elements in a partition are monotonically increasing from the first to the last element.
     * So a partition border is found if a preceding element has a greater band than the current
     * element. This method checks this condition for given elements.
     *  
     * @param left preceding element.
     * @param right current element.
     * @return <code>true</code> if a partition boarder exists between the given elements or <code>
     * false</code> else.
     */
    private boolean isPartitionBorder(Tuple left, Tuple right) {
        if (computeBandOfTuple(left) > computeBandOfTuple(right)) {
            return true;
        }
        return false;
    }

    /**
     * Sorts a {@link LinkedList} of {@link Tuple}.
     * @param workingSet - partitions of summary as a {@link LinkedList} of {@link Tuple}.
     * @return the given working set in ascending order.
     */
    private List<Tuple> sortWorkingSet(List<Tuple> workingSet){
        List<Tuple> sortedWorkingSet = new CopyOnWriteArrayList<Tuple>();

        while (workingSet.size() > 1) {
            Tuple currentMinimum = workingSet.get(0);

            for (int i=0; i<workingSet.size(); i++) {
                if (currentMinimum.getValue() > workingSet.get(i).getValue()) {
                    currentMinimum = workingSet.get(i);
                }
            }
            workingSet.remove(currentMinimum);

            sortedWorkingSet.add(currentMinimum);
        }
        
        sortedWorkingSet.add(workingSet.get(0));
        return sortedWorkingSet;
    }

    public Integer getCount() {
        return this.count;
    }


    @Override
    public String toString() {
        StringBuffer s = new StringBuffer();
        s.append( getClass().getCanonicalName() );
        s.append( " {" );
        s.append( " epsilon=");
        s.append( epsilon );
        s.append( " }" );
        return s.toString();
    }
    
    /**
     * This is just a wrapper class to hold all needed informations of an element. It contains the following
     * informations:
     * <ul>
     * <li><b>value</b>: the value of the element</li>
     * <li><b>offset</b>: the difference between the least rank of this element and the rank of the preceding
     * element.</li>
     * <li><b>range</b>: the span between this elements least and most rank</li>
     * <ul>
     *
     */
    private class Tuple implements Serializable {
        private static final long serialVersionUID = 1L;
        private Double  value;
        private Integer offset;
        private Integer range;

        public Tuple(Double value, Integer offset, Integer range) {
            this.value = value;
            this.offset = offset;
            this.range = range;
        }

        public Double getValue() {
            return value;
        }
        public Integer getOffset() {
            return offset;
        }
        public void setOffset(Integer offset) {
            this.offset = offset;
        }
        public Integer getRange() {
            return range;
        }
        public void setRange(Integer range) {
            this.range = range;
        }

        @Override
        public String toString() {
            String out = "( " + value + ", " + offset + ", " + range + " )";
            return out;
        }
    }
    
}