/* * ModeShape (http://www.modeshape.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.modeshape.common.statistic; import java.math.BigDecimal; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import org.modeshape.common.annotation.NotThreadSafe; import org.modeshape.common.math.MathOperations; import org.modeshape.common.util.HashCode; import org.modeshape.common.util.StringUtil; /** * A representation of a histogram of values. * * @param <T> the type of value */ @NotThreadSafe public class Histogram<T extends Number> { public static final int DEFAULT_BUCKET_COUNT = 10; public static final int DEFAULT_SIGNIFICANT_FIGURES = 4; protected final MathOperations<T> math; protected final List<T> values; private int bucketCount = DEFAULT_BUCKET_COUNT; private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES; private BigDecimal bucketWidth; private LinkedList<Bucket> buckets; private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy(); private BucketingStrategy bucketingStrategy = actualValueStrategy; public Histogram( MathOperations<T> operations, List<T> values ) { this.math = operations; this.values = new LinkedList<T>(values); this.buckets = new LinkedList<Bucket>(); this.bucketWidth = null; // Sort the data using natural order ... Collections.sort(this.values, this.math.getComparator()); } @SafeVarargs public Histogram( MathOperations<T> operations, T... values ) { this(operations, Arrays.asList(values)); } public BucketingStrategy getStrategy() { return this.bucketingStrategy; } public MathOperations<T> getMathOperations() { return this.math; } /** * Set the histogram to use the standard deviation to determine the bucket sizes. * * @param median * @param standardDeviation * @param sigma */ public void setStrategy( double median, double standardDeviation, int sigma ) { this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma); this.bucketWidth = null; } /** * Set the histogram to use the supplied minimum and maximum values to determine the bucket size. * * @param minimum * @param maximum */ public void setStrategy( T minimum, T maximum ) { this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum); this.bucketWidth = null; } /** * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes. */ public void setStrategyToDefault() { this.bucketingStrategy = this.actualValueStrategy; this.bucketWidth = null; } public int getSignificantFigures() { return significantFigures; } /** * Set the number of significant figures used in the calculation of the bucket widths. * * @param significantFigures the number of significant figures for the bucket widths * @return this histogram, useful for method-chaining * @see #DEFAULT_SIGNIFICANT_FIGURES */ public Histogram<T> setSignificantFigures( int significantFigures ) { if (significantFigures != this.significantFigures) { this.significantFigures = significantFigures; this.bucketWidth = null; this.buckets.clear(); } return this; } /** * Return the number of buckets in this histogram. * * @return the number of buckets. */ public int getBucketCount() { return bucketCount; } /** * Set the number of buckets that this histogram will use. * * @param count the number of buckets * @return this histogram, useful for method-chaining * @see #DEFAULT_BUCKET_COUNT */ public Histogram<T> setBucketCount( int count ) { if (count != this.bucketCount) { this.bucketCount = count; this.bucketWidth = null; this.buckets.clear(); } return this; } /** * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated. * The resulting list should not be modified. * * @return the histogram buckets. */ public List<Bucket> getBuckets() { compute(); return this.buckets; } protected void compute() { // Only compute if there is not already a histogram ... if (this.bucketWidth != null) return; // Find the lower and upper bounds of the histogram using the strategy ... T lowerBound = this.bucketingStrategy.getLowerBound(); T upperBound = this.bucketingStrategy.getUpperBound(); // Find the actual minimum and maximum values ... T actualMinimum = this.actualValueStrategy.getLowerBound(); T actualMaximum = this.actualValueStrategy.getUpperBound(); // Create the buckets ... List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures); this.buckets.clear(); int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1; for (int i = 0; i != numBuckets; ++i) { this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1))); } // Create the histogram by adding values to each range ... Iterator<Bucket> intervalIterator = this.buckets.iterator(); Bucket currentInterval = null; for (T value : this.values) { while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) { if (!intervalIterator.hasNext()) break; currentInterval = intervalIterator.next(); } if (currentInterval != null) currentInterval.addValue(value); } } /** * Return the total number of values that have gone into this histogram. * * @return the total number of values * @see Bucket#getPercentageOfValues() */ public long getTotalNumberOfValues() { return this.values.size(); } protected float getMaximumPercentage() { float maxPercentage = 0.0f; for (Bucket bucket : this.buckets) { maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues()); } return maxPercentage; } protected long getMaximumCount() { long maxCount = 0l; for (Bucket bucket : this.buckets) { maxCount = Math.max(maxCount, bucket.getNumberOfValues()); } return maxCount; } /** * Generate a textual (horizontal) bar graph of this histogram. * * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts * @return the strings that make up the histogram */ public List<String> getTextGraph( int maxBarLength ) { compute(); if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount(); final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage(); final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent); List<String> result = new LinkedList<String>(); // First calculate the labels and the max length ... int maxLowerBoundLength = 0; int maxUpperBoundLength = 0; for (Bucket bucket : this.buckets) { maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength); maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength); } // Create the header ... int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1; int barWidth = maxBarLength + 20; result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution"); result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth)); for (Bucket bucket : this.buckets) { float percent = bucket.getPercentageOfValues(); long number = bucket.getNumberOfValues(); StringBuilder sb = new StringBuilder(); sb.append("["); sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' ')); sb.append(" - "); sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' ')); sb.append("] "); int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0); if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets sb.append(fullLengthBar.substring(0, barLength)); if (number != 0) { sb.append(" "); sb.append(number); sb.append(" ("); sb.append(new DecimalFormat("###.#").format(percent)); sb.append("%)"); } result.add(sb.toString()); } return result; } protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) { lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound; upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound; if (math.compare(lowerBound, upperBound) == 0) { List<T> boundaries = new ArrayList<T>(); boundaries.add(lowerBound); boundaries.add(upperBound); return boundaries; } final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0; final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0; if (extraLowerBucketNeeded) --bucketCount; if (extraUpperBucketNeeded) --bucketCount; // Compute the delta between the lower and upper bound ... T totalWidth = math.subtract(upperBound, lowerBound); int totalWidthScale = math.getExponentInScientificNotation(totalWidth); // Modify the lower bound by rounding down to the next lower meaningful value, // using the scale of the totalWidth to determine how to round down. T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale); T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale); // Create the ranges ... double finalLowerBound = math.doubleValue(roundedLowerBound); double finalUpperBound = math.doubleValue(roundedUpperBound); double finalBucketCount = bucketCount; double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount; // DoubleOperations doubleOps = new DoubleOperations(); // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs); List<T> boundaries = new ArrayList<T>(); if (bucketWidth > 0.0d) { if (extraLowerBucketNeeded) boundaries.add(actualMinimum); double nextBoundary = finalLowerBound; for (int i = 0; i != bucketCount; ++i) { boundaries.add(math.create(nextBoundary)); nextBoundary = nextBoundary + bucketWidth; // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs ); } boundaries.add(roundedUpperBound); if (extraUpperBucketNeeded) boundaries.add(actualMaximum); } return boundaries; } /** * Represents a bucket in a histogram. */ public class Bucket implements Comparable<Bucket> { private final T lowerBound; private final T upperBound; private final T width; private long numValues; protected Bucket( T lowerBound, T upperBound ) { this.lowerBound = lowerBound; this.upperBound = upperBound; this.width = Histogram.this.math.subtract(upperBound, lowerBound); } /** * Get the lower bound of this bucket. * * @return the lower bound */ public T getLowerBound() { return lowerBound; } /** * Get the upper bound of this bucket. * * @return the upper bound */ public T getUpperBound() { return upperBound; } /** * Get the width of this bucket. * * @return the width */ public T getWidth() { return this.width; } /** * Return the percentage of values in the histogram that appear in this bucket. * * @return the percentage of all values in the histogram that appear in this bucket. */ public float getPercentageOfValues() { float total = Histogram.this.getTotalNumberOfValues(); if (total == 0.0f) return 0.0f; float numValuesFloat = this.numValues; return 100.0f * numValuesFloat / total; } /** * Add a value to this bucket * * @param value */ protected void addValue( T value ) { ++this.numValues; } /** * Get the number of values in this bucket. * * @return the number of values */ public long getNumberOfValues() { return this.numValues; } /** * Check whether the value fits in this bucket. * * @param value the value to check * @param isLast * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later * bucket */ public int checkValue( T value, boolean isLast ) { if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1; if (isLast) { if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1; } else { if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1; } return 0; } @Override public int compareTo( Bucket that ) { // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ... if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1; if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1; // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower // bound ... if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1; if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1; return 0; } protected Class<T> getNumberClass() { return Histogram.this.math.getOperandClass(); } @Override public int hashCode() { // Equals asserts that two buckets are equal when all values are equal ... return HashCode.compute(lowerBound, upperBound, width); } @SuppressWarnings( "unchecked" ) @Override public boolean equals( Object obj ) { if (obj instanceof Histogram.Bucket) { Bucket that = (Bucket)obj; if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) { if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false; if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false; if (Histogram.this.math.compare(this.width, that.width) != 0) return false; return true; } } return false; } @Override public String toString() { return "[" + this.lowerBound + "," + this.upperBound + ")"; } } public abstract class BucketingStrategy { public List<T> getValues() { return Histogram.this.values; } public abstract T getLowerBound(); public abstract T getUpperBound(); } public class DefaultBucketingStrategy extends BucketingStrategy { @Override public T getLowerBound() { if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); return getValues().get(0); } @Override public T getUpperBound() { if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); return getValues().get(getValues().size() - 1); } } public class ExplicitBucketingStrategy extends BucketingStrategy { private final T lowerBound; private final T upperBound; protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) { this.lowerBound = lowerBound; this.upperBound = upperBound; } @Override public T getLowerBound() { return this.lowerBound; } @Override public T getUpperBound() { return this.upperBound; } } public class StandardDeviationBucketingStrategy extends BucketingStrategy { private final double median; private final double standardDeviation; private final int numberOfDeviationsAboveAndBelow; protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) { this.median = median; this.standardDeviation = Math.abs(standardDeviation); this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow); } @Override public T getLowerBound() { double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow); return Histogram.this.math.create(lower); } @Override public T getUpperBound() { double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow); return Histogram.this.math.create(upper); } } }