/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.utils; import java.util.Arrays; /** * Simple class for constructing an EsimtatedHistogram from a set of predetermined values */ public class HistogramBuilder { public static final long[] EMPTY_LONG_ARRAY = new long[]{}; public static final long[] ZERO = new long[]{ 0 }; public HistogramBuilder() {} public HistogramBuilder(long[] values) { for (long value : values) { add(value); } } private long[] values = new long[10]; int count = 0; public void add(long value) { if (count == values.length) values = Arrays.copyOf(values, values.length << 1); values[count++] = value; } /** * See {@link #buildWithStdevRangesAroundMean(int)} * @return buildWithStdevRangesAroundMean(3) */ public EstimatedHistogram buildWithStdevRangesAroundMean() { return buildWithStdevRangesAroundMean(3); } /** * Calculate the min, mean, max and standard deviation of the items in the builder, and * generate an EstimatedHistogram with upto <code>maxdev</code> stdev size ranges either * side of the mean, until min/max are hit; if either min/max are not reached a further range is * inserted at the relevant ends. e.g., with a <code>maxdevs</code> of 3, there may be <i>up to</i> 8 ranges * (between 9 boundaries, the middle being the mean); the middle 6 will have the same size (stdev) * with the outermost two stretching out to min and max. * * @param maxdevs * @return */ public EstimatedHistogram buildWithStdevRangesAroundMean(int maxdevs) { if (maxdevs < 0) throw new IllegalArgumentException("maxdevs must be greater than or equal to zero"); final int count = this.count; final long[] values = this.values; if (count == 0) return new EstimatedHistogram(EMPTY_LONG_ARRAY, ZERO); long min = Long.MAX_VALUE, max = Long.MIN_VALUE; double sum = 0, sumsq = 0; for (int i = 0 ; i < count ; i++) { final long value = values[i]; sum += value; sumsq += value * value; if (value < min) min = value; if (value > max) max = value; } final long mean = Math.round(sum / count); final double stdev = Math.sqrt((sumsq / count) - (mean * (double) mean)); // build the ranges either side of the mean final long[] lowhalf = buildRange(mean, min, true, stdev, maxdevs); final long[] highhalf = buildRange(mean, max, false, stdev, maxdevs); // combine the ranges final long[] ranges = new long[lowhalf.length + highhalf.length + 1]; System.arraycopy(lowhalf, 0, ranges, 0, lowhalf.length); ranges[lowhalf.length] = mean; System.arraycopy(highhalf, 0, ranges, lowhalf.length + 1, highhalf.length); final EstimatedHistogram hist = new EstimatedHistogram(ranges, new long[ranges.length + 1]); for (int i = 0 ; i < count ; i++) hist.add(values[i]); return hist; } private static long[] buildRange(long mean, long minormax, boolean ismin, double stdev, int maxdevs) { if (minormax == mean) // minormax == mean we have no range to produce, but given the exclusive starts // that begin at zero by default (or -Inf) in EstimatedHistogram we have to generate a min range // to indicate where we start from return ismin ? new long[] { mean - 1 } : EMPTY_LONG_ARRAY; if (stdev < 1) { // deal with stdevs too small to generate sensible ranges return ismin ? new long[] { minormax - 1, mean - 1 } : new long[] { minormax }; } long larger, smaller; if (ismin) { larger = mean; smaller = minormax; } else { larger = minormax; smaller = mean; } double stdevsTo = (larger - smaller) / stdev; if (stdevsTo > 0 && stdevsTo < 1) // always round up if there's just one non-empty range stdevsTo = 1; else // otherwise round to the nearest half stdev, to avoid tiny ranges at the start/end stdevsTo = Math.round(stdevsTo); // limit to 4 stdev ranges - last range will contain everything to boundary final int len = Math.min(maxdevs + 1, (int) stdevsTo); final long[] range = new long[len]; long next = ismin ? minormax - 1 : minormax; for (int i = 0 ; i < range.length ; i++) { long delta = (range.length - (i + 1)) * (long) stdev; if (ismin) { range[i] = next; next = mean - delta; } else { range[len - 1 - i] = next; next = mean + delta; } } return range; } }