/**
* Copyright 2011 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.streaminer.stream.quantile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* An implementation of the Munro-Paterson one-pass quantile estimation
* algorithm, available via:
* http://scholar.google.com/scholar?q=munro+paterson
*
* <p>This implementation follows the implementation in the szl compiler:
* http://code.google.com/p/szl/source/browse/trunk/src/emitters/szlquantile.cc
*
*/
public class MPQuantiles implements IQuantiles<Double> {
private static final long MAX_TOT_ELEMS = 1024L * 1024L * 1024L * 1024L;
private final List<List<Double>> buffer = new ArrayList<List<Double>>();
private final int maxElementsPerBuffer;
private final int numQuantiles;
private int totalElements;
private double min;
private double max;
public MPQuantiles(int numQuantiles) {
this.numQuantiles = Math.max(2, numQuantiles);
this.maxElementsPerBuffer = computeMaxElementsPerBuffer();
}
@Override
public void offer(Double value) {
if (totalElements == 0 || value < min) {
min = value;
}
if (totalElements == 0 || max < value) {
max = value;
}
if (totalElements > 0 && totalElements % (2 * maxElementsPerBuffer) == 0) {
Collections.sort(buffer.get(0));
Collections.sort(buffer.get(1));
recursiveCollapse(buffer.get(0), 1);
}
ensureBuffer(0);
ensureBuffer(1);
int index = buffer.get(0).size() < maxElementsPerBuffer ? 0 : 1;
buffer.get(index).add(value);
totalElements++;
}
@Override
public Double getQuantile(double q) throws QuantilesException {
double quantileKey = 0.0;
for (double quantileValue : getQuantiles()) {
if (round(quantileKey) == q)
return quantileValue;
quantileKey += 1.0/(this.numQuantiles-1);
}
return 0.0;
}
public void clear() {
buffer.clear();
totalElements = 0;
}
public Map<Double, Double> getFullQuantiles() {
Map<Double, Double> quantiles = new HashMap<Double, Double>();
double quantileKey = 0.0;
for (double quantileValue : getQuantiles()) {
quantiles.put(round(quantileKey), quantileValue);
quantileKey += 1.0/(this.numQuantiles-1);
}
return quantiles;
}
public List<Double> getQuantiles() {
List<Double> quantiles = new ArrayList<Double>();
quantiles.add(min);
if (buffer.get(0) != null) {
Collections.sort(buffer.get(0));
}
if (buffer.get(1) != null) {
Collections.sort(buffer.get(1));
}
int[] index = new int[buffer.size()];
long S = 0;
for (int i = 1; i <= numQuantiles - 2; i++) {
long targetS = (long) Math.ceil(i * (totalElements / (numQuantiles - 1.0)));
while (true) {
double smallest = max;
int minBufferId = -1;
for (int j = 0; j < buffer.size(); j++) {
if (buffer.get(j) != null && index[j] < buffer.get(j).size()) {
if (!(smallest < buffer.get(j).get(index[j]))) {
smallest = buffer.get(j).get(index[j]);
minBufferId = j;
}
}
}
long incrementS = minBufferId <= 1 ? 1L : (0x1L << (minBufferId - 1));
if (S + incrementS >= targetS) {
quantiles.add(smallest);
break;
} else {
index[minBufferId]++;
S += incrementS;
}
}
}
quantiles.add(max);
return quantiles;
}
private int computeMaxElementsPerBuffer() {
double epsilon = 1.0 / (numQuantiles - 1.0);
int b = 2;
while ((b - 2) * (0x1L << (b - 2)) + 0.5 <= epsilon * MAX_TOT_ELEMS) {
++b;
}
return (int) (MAX_TOT_ELEMS / (0x1L << (b - 1)));
}
private void ensureBuffer(int level) {
while (buffer.size() < level + 1) {
buffer.add(null);
}
if (buffer.get(level) == null) {
buffer.set(level, new ArrayList<Double>());
}
}
private void collapse(List<Double> a, List<Double> b, List<Double> out) {
int indexA = 0, indexB = 0, count = 0;
Double smaller = null;
while (indexA < maxElementsPerBuffer || indexB < maxElementsPerBuffer) {
if (indexA >= maxElementsPerBuffer ||
(indexB < maxElementsPerBuffer && a.get(indexA) >= b.get(indexB))) {
smaller = b.get(indexB++);
} else {
smaller = a.get(indexA++);
}
if (count++ % 2 == 0) {
out.add(smaller);
}
}
a.clear();
b.clear();
}
private void recursiveCollapse(List<Double> buf, int level) {
ensureBuffer(level + 1);
List<Double> merged;
if (buffer.get(level + 1).isEmpty()) {
merged = buffer.get(level + 1);
} else {
merged = new ArrayList<Double>(maxElementsPerBuffer);
}
collapse(buffer.get(level), buf, merged);
if (buffer.get(level + 1) != merged) {
recursiveCollapse(merged, level + 1);
}
}
private static double round(double d) {
return Math.round(d*100000.0)/100000.0;
}
}