/* * ModeShape (http://www.modeshape.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.modeshape.common.statistic; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.concurrent.locks.Lock; import org.modeshape.common.annotation.ThreadSafe; import org.modeshape.common.math.MathOperations; import org.modeshape.common.text.Inflector; import org.modeshape.common.util.StringUtil; /** * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)}, * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the * {@link #getHistogram() histogram} of the values. * <p> * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the * standard deviation, and S is a variable used in the calculation of sigma: * * <pre> * M(1) = x(1) * S(1) = 0 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k)) * </pre> * * Then, the standard deviation for n values in x is * * <pre> * sigma = sqrt(S(n) / n) * </pre> * * </p> * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower) * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated. * This fact should be kept in mind when performing statistics on large numbers of values. * </p> * <p> * This class is threadsafe. * </p> * @param <T> the number type for these statistics */ @ThreadSafe public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> { private T median; private Double medianValue; private double s = 0.0d; // used in the calculation of standard deviation (sigma) private double sigma = 0.0d; private final List<T> values = new LinkedList<T>(); private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values); private Histogram<T> histogram; public DetailedStatistics( MathOperations<T> operations ) { super(operations); this.medianValue = 0.0d; this.median = this.math.createZeroValue(); } /** * Get the values that have been recorded in these statistics. The contents of this list may change if new values are * {@link #add(Number) added} in another thread. * @return the unmodifiable collection of values, in insertion order */ public List<T> getValues() { return this.unmodifiableValues; } @Override protected void doAddValue( T value ) { if (value == null) { return; } double previousMean = this.getMeanValue(); super.doAddValue(value); this.values.add(value); this.medianValue = null; // Calculate the mean and standard deviation ... int count = getCount(); if (count == 1) { this.s = 0.0d; this.sigma = 0.0d; } else { double dValue = value.doubleValue(); double dCount = count; // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k double meanValue = previousMean + ((dValue - previousMean) / dCount); // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) ) this.s = this.s + (dValue - previousMean) * (dValue - meanValue); // sigma = sqrt( S(n) / (n-1) ) this.sigma = Math.sqrt(this.s / dCount); } } /** * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }. * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0 */ public T getMedian() { getMedianValue(); return this.median; } /** * Return the median value. * @return the median value, or 0.0 if the {@link #getCount() count} is 0 * @see #getMedian() */ public double getMedianValue() { Lock lock = this.getLock().writeLock(); try { lock.lock(); int count = this.values.size(); if (count == 0) { return 0.0d; } if (this.medianValue == null) { // Sort the values in numerical order.. Comparator<T> comparator = this.math.getComparator(); Collections.sort(this.values, comparator); this.medianValue = 0.0d; // If there is only one value, then the median is that value ... if (count == 1) { this.medianValue = this.values.get(0).doubleValue(); } // If there is an odd number of values, find value that is in the middle .. else if (count % 2 != 0) { this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue(); } // Otherwise, there is an even number of values, so find the average of the middle two values ... else { int upperMiddleValueIndex = count / 2; int lowerMiddleValueIndex = upperMiddleValueIndex - 1; double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue(); double upperValue = this.values.get(upperMiddleValueIndex).doubleValue(); this.medianValue = (lowerValue + upperValue) / 2.0d; } this.median = this.math.create(this.medianValue); this.histogram = null; } } finally { lock.unlock(); } return this.medianValue; } /** * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a * lower standard deviation has less variance in the values than a series of values with a higher standard deviation. * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same. */ public double getStandardDeviation() { Lock lock = this.getLock().readLock(); lock.lock(); try { return this.sigma; } finally { lock.unlock(); } } /** * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations, * see {@link #getHistogram(int)}. * @return the histogram * @see #getHistogram(int) */ public Histogram<T> getHistogram() { return getHistogram(0); } /** * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the * 'numSigmas' should be set to 0. See {@link #getHistogram()}. * <p> * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values * outside of this range are placed in the first and last bucket. * </p> * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the * histogram should be evenly distributed * @return the histogram * @see #getHistogram() */ public Histogram<T> getHistogram( int numSigmas ) { Lock lock = this.getLock().writeLock(); lock.lock(); try { Histogram<T> hist = new Histogram<T>(this.math, this.values); if (numSigmas > 0) { // The 'getMediaValue()' method will reset the current histogram, so don't set it... hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas); } this.histogram = hist; return this.histogram; } finally { lock.unlock(); } } @Override protected void doReset() { super.doReset(); this.medianValue = 0.0d; this.median = this.math.createZeroValue(); this.s = 0.0d; this.sigma = 0.0d; this.values.clear(); } @Override public String toString() { int count = this.getCount(); String samples = Inflector.getInstance().pluralize("sample", count); return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(), this.getMaximum()); } }