package org.streaminer.stream.quantile; import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.concurrent.CopyOnWriteArrayList; /** * * This implementation computes approximate phi-quantiles that differ at most epsilon*N ranks from the element * that is the real quantile. epsilon is known a priori and affects N, which is the minimum size of the set of * observations. To get a hunch how many examples are need I'm providing some examples: * <ul> * <li> epsilon = 0.25 - buffer size = 48</li> * <li> epsilon = 0.10 - buffer size = 100</li> * <li> epsilon = 0.05 - buffer size = 190</li> * <li> epsilon = 0.01 - buffer size = 910</li> * </ul> * <b>You can use this class to estimate quantiles, as long as the distribution of observations doesn't have a * heavy tail.</b> * * @author Markus Kokott * */ public class SimpleQuantiles implements IQuantiles<Double> { private boolean initialPhase; private Integer bufferSize; private CopyOnWriteArrayList<Double> buffer; /** * This value specifies the error bound. */ private double epsilon; /** * Standard constructor... * * @param epsilon This quantile estimator determines a phi-quantile that differs at most epsilon*N positions * from the real quantile */ public SimpleQuantiles (double epsilon) { if (epsilon <= 0 || epsilon >= 1) { throw new RuntimeException("An appropriate epsilon value must lay between 0 and 1."); } this.epsilon = epsilon; this.bufferSize = this.computeBufferSize(); this.initialPhase = true; this.buffer = new CopyOnWriteArrayList<Double>(); } @Override public void offer(Double value) { this.buffer.add(value); if (this.buffer.size() >= this.bufferSize) { this.initialPhase = false; this.buffer.remove(0); } } @Override public Double getQuantile(double q) throws QuantilesException { if (this.buffer.size() != this.bufferSize) { if (this.bufferSize == 0) { return Double.NaN; } return this.buffer.get((int) Math.floor(q*this.buffer.size())); } Integer lowerBound = this.computeLowerBound(q); Integer upperBound = this.computeUpperBound(q); LinkedList<Double> copyOfBuffer = new LinkedList<Double>(); Iterator<Double> tempBuffer = this.buffer.iterator(); while (tempBuffer.hasNext()) { copyOfBuffer.add(tempBuffer.next()); } Collections.sort(copyOfBuffer); return copyOfBuffer.get((int) Math.floor(lowerBound + upperBound) / 2); } /** * Before a quantile can be computed, the first buffer has to be filled. During that initial phase * every call of {@link #getQuantile(double)} will return {@link Double#NaN}. You can call this method * to check whether the initial phase has ended or not. * * @return <code>true</code> if no quantiles can be computed at the moment. */ public boolean isInInitialPhase(){ return this.initialPhase; } /** * The size of the buffer depends on the given epsilon. This method computes an appropriate size * for the buffer. * * @return an <code>int</code> value that serves as the maximum length for the buffer */ private int computeBufferSize() { Double temp = Math.ceil(9/(epsilon*(1-epsilon))); return temp.intValue(); } /** * Because the quantile lays in an interval there have to be bounds on the ranks. This method computes the lower one. * * @param phi The specific quantile. * @return A lower bound for the rank of the wanted quantile. */ private Integer computeLowerBound(double phi){ double p = 1 - (1-this.epsilon) / 2; if (epsilon < 0.5){ p = 1 - this.epsilon / 2; } double t = Math.sqrt(-2*Math.log(1-p)); double gaussianPQuantile = t - (2.515517 + 0.802853*t + 0.010328*Math.pow(t, 2)) / (1 + 1.432788*t + 0.189269*Math.pow(t, 2) + 0.001308*Math.pow(t, 3)); Double bound = 1d; bound = Math.ceil(this.bufferSize * phi - gaussianPQuantile * Math.sqrt(this.bufferSize * phi * (1-phi))); if (bound <= 0) { bound = 1d; } return bound.intValue(); } /** * Because the quantile lays in an interval there have to be bounds on the rank. This method computes the upper one. * * @param phi The specific quantile. * @return An upper bound for the rank of the wanted quantile. */ private Integer computeUpperBound(double phi){ double p = 1 - (1-this.epsilon) / 2; if (epsilon < 0.5) { p = 1 - this.epsilon / 2; } double t = Math.sqrt(-2*Math.log(1-p)); double gaussianPQuantile = t - (2.515517 + 0.802853*t + 0.010328*Math.pow(t, 2)) / (1 + 1.432788*t + 0.189269*Math.pow(t, 2) + 0.001308*Math.pow(t, 3)); Double bound = 1d; bound = Math.ceil(this.bufferSize * phi + gaussianPQuantile * Math.sqrt(this.bufferSize * phi * (1-phi))); if (bound >= this.bufferSize) { bound = 1d; } return bound.intValue(); } }