/** * Copyright 2011 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.hank.partition_server; import java.text.DecimalFormat; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Random; import com.google.common.collect.Lists; import com.liveramp.hank.generated.LatencySampleSummary; public class DoublePopulationStatisticsAggregator { private static int POPULATION_RANDOM_SAMPLE_MAX_SIZE = 100; private static DecimalFormat format3 = new DecimalFormat("#.###"); private static DecimalFormat format1 = new DecimalFormat("#.#"); private static DecimalFormat format0 = new DecimalFormat("#"); private double minimum; private double maximum; private long numValues; private double total; private final ReservoirSample reservoirSample = new ReservoirSample(POPULATION_RANDOM_SAMPLE_MAX_SIZE); private final Random random = new Random(); public DoublePopulationStatisticsAggregator() { clear(); } public DoublePopulationStatisticsAggregator(double minimum, double maximum, long numValues, double total, double[] randomSample) { this.minimum = minimum; this.maximum = maximum; this.numValues = numValues; this.total = total; this.reservoirSample.sample(randomSample, random); } public void clear() { minimum = Double.MAX_VALUE; maximum = Double.MIN_VALUE; numValues = 0; total = 0.0; reservoirSample.clear(); } public void aggregate(double minimum, double maximum, long numValues, double total, double[] values) { if (maximum > this.maximum) { this.maximum = maximum; } if (minimum < this.minimum) { this.minimum = minimum; } this.numValues += numValues; this.total += total; this.reservoirSample.sample(values, random); } public static DoublePopulationStatisticsAggregator combine(Collection<DoublePopulationStatisticsAggregator> aggregators) { Random random = new Random(); DoublePopulationStatisticsAggregator result = new DoublePopulationStatisticsAggregator(); for (DoublePopulationStatisticsAggregator aggregator : aggregators) { if (aggregator.maximum > result.maximum) { result.maximum = aggregator.maximum; } if (aggregator.minimum < result.minimum) { result.minimum = aggregator.minimum; } result.numValues += aggregator.numValues; result.total += aggregator.total; } // Now we can create a stream of values based on a // random sample of the aggregators reservoirs but // with the number of values in their streams a weight // and reservoir sample that. This assumes that all given reservoirs // have the same size. for (DoublePopulationStatisticsAggregator aggregator : aggregators) { double ratio = (double) aggregator.numValues / (double) result.numValues; for (double value : aggregator.reservoirSample.getReservoir()) { if ((Math.abs(random.nextDouble()) / Double.MAX_VALUE) < ratio) { result.reservoirSample.sample(value, random); } } } return result; } public Double getMaximum() { if (maximum == Double.MIN_VALUE) { return null; } return maximum; } public Double getMinimum() { if (minimum == Double.MAX_VALUE) { return null; } return minimum; } public double getMean() { if (numValues != 0) { return total / numValues; } else { return 0; } } public double[] computeDeciles() { double[] result = new double[9]; Arrays.fill(result, 0.0); if (reservoirSample.getSize() > 0) { // Sort valid reservoir values first Arrays.sort(reservoirSample.getReservoir(), 0, reservoirSample.getSize()); // Compute deciles for (int i = 0; i < 9; ++i) { result[i] = getSortedPopulationDecile(reservoirSample.getReservoir(), i + 1, reservoirSample.getSize()); } } return result; } public LatencySampleSummary getStatistics(){ LatencySampleSummary summary = new LatencySampleSummary(); summary.set_minimum(minimum); summary.set_maximum(maximum); summary.set_num_values(numValues); summary.set_total(total); List<Double> deciles = Lists.newArrayList(); for (double v : computeDeciles()) { deciles.add(v); } summary.set_deciles(deciles); return summary; } public static String toString(DoublePopulationStatisticsAggregator populationStatistics) { StringBuilder result = new StringBuilder(); result.append(populationStatistics.minimum); result.append(' '); result.append(populationStatistics.maximum); result.append(' '); result.append(populationStatistics.numValues); result.append(' '); result.append(populationStatistics.total); for (int i = 0; i < populationStatistics.reservoirSample.getSize(); ++i) { result.append(' '); result.append(populationStatistics.reservoirSample.getReservoir()[i]); } return result.toString(); } public static String formatDouble(double value) { if (value < 1) { return format3.format(value); } else if (value < 100) { return format1.format(value); } else { return format0.format(value); } } public String format() { double[] deciles = computeDeciles(); StringBuilder result = new StringBuilder(); // Compute median double median = numValues == 0 ? 0 : deciles[4]; // Compute 90% percentile double ninetiethPercentile = numValues == 0 ? 0 : deciles[8]; result.append(formatDouble(getMean())); result.append(" / "); result.append(formatDouble(median)); result.append(" / "); result.append(formatDouble(ninetiethPercentile)); result.append(" ms"); return result.toString(); } public class ReservoirSample { private final double[] reservoir; private int size; private int count; public ReservoirSample(int reservoirMaxSize) { reservoir = new double[reservoirMaxSize]; clear(); } public void sample(double[] values, Random random) { for (double value : values) { sample(value, random); } } public void sample(double value, Random random) { if (count < reservoir.length) { reservoir[size++] = value; } else { if (random.nextInt(count) < reservoir.length) { reservoir[random.nextInt(reservoir.length)] = value; } } ++count; } public void sample(ReservoirSample other, Random random) { for (int i = 0; i < other.getSize(); ++i) { sample(other.getReservoir()[i], random); } } public double[] getReservoir() { return reservoir; } public int getSize() { return size; } public void clear() { size = 0; count = 0; } } public static double getSortedPopulationDecile(double[] population, int decile, int endIndex) { return getInterpolatedValueAtIndex(population, getDecileIndex(endIndex, decile)); } public static double getSortedPopulationDecile(long[] population, int decile, int endIndex) { return getInterpolatedValueAtIndex(population, getDecileIndex(endIndex, decile)); } public static double getSortedPopulationDecile(double[] population, int decile) { return getInterpolatedValueAtIndex(population, getDecileIndex(population.length, decile)); } public static double getSortedPopulationDecile(long[] population, int decile) { return getInterpolatedValueAtIndex(population, getDecileIndex(population.length, decile)); } public static double getDecileIndex(long size, int decile) { if (decile < 1 || decile > 9) { throw new RuntimeException("Invalid decile: " + decile); } return ((size - 1) / 10.0) * decile; } public static double getInterpolatedValueAtIndex(double[] population, double rank) { double rankFloored = Math.floor(rank); double remainder = rank - rankFloored; if (remainder == 0 || ((int) rankFloored) == population.length - 1) { return population[(int) rankFloored]; } // Return interpolated value at index return population[(int) rankFloored] + (remainder * (population[(int) rankFloored + 1] - population[(int) rankFloored])); } public static double getInterpolatedValueAtIndex(long[] population, double rank) { double rankFloored = Math.floor(rank); double remainder = rank - rankFloored; if (remainder == 0 || ((int) rankFloored) == population.length - 1) { return population[(int) rankFloored]; } // Return interpolated value at index return population[(int) rankFloored] + (remainder * (population[(int) rankFloored + 1] - population[(int) rankFloored])); } }