/* * Copyright (C) 2012 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.common.math; import static com.google.common.base.Preconditions.checkState; import static com.google.common.math.DoubleUtils.ensureNonNegative; import static com.google.common.primitives.Doubles.isFinite; import static java.lang.Double.NaN; import static java.lang.Double.isNaN; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtIncompatible; import java.util.Iterator; /** * A mutable object which accumulates double values and tracks some basic statistics over all the * values added so far. The values may be added singly or in groups. This class is not thread safe. * * @author Pete Gillin * @author Kevin Bourrillion * @since 20.0 */ @Beta @GwtIncompatible public final class StatsAccumulator { // These fields must satisfy the requirements of Stats' constructor as well as those of the stat // methods of this class. private long count = 0; private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum private double sumOfSquaresOfDeltas = 0.0; private double min = NaN; // any value will do private double max = NaN; // any value will do /** * Adds the given value to the dataset. */ public void add(double value) { if (count == 0) { count = 1; mean = value; min = value; max = value; if (!isFinite(value)) { sumOfSquaresOfDeltas = NaN; } } else { count++; if (isFinite(value) && isFinite(mean)) { // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) double delta = value - mean; mean += delta / count; sumOfSquaresOfDeltas += delta * (value - mean); } else { mean = calculateNewMeanNonFinite(mean, value); sumOfSquaresOfDeltas = NaN; } min = Math.min(min, value); max = Math.max(max, value); } } /** * Adds the given values to the dataset. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) */ public void addAll(Iterable<? extends Number> values) { for (Number value : values) { add(value.doubleValue()); } } /** * Adds the given values to the dataset. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) */ public void addAll(Iterator<? extends Number> values) { while (values.hasNext()) { add(values.next().doubleValue()); } } /** * Adds the given values to the dataset. * * @param values a series of values */ public void addAll(double... values) { for (double value : values) { add(value); } } /** * Adds the given values to the dataset. * * @param values a series of values */ public void addAll(int... values) { for (int value : values) { add(value); } } /** * Adds the given values to the dataset. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) */ public void addAll(long... values) { for (long value : values) { add(value); } } /** * Adds the given statistics to the dataset, as if the individual values used to compute the * statistics had been added directly. */ public void addAll(Stats values) { if (values.count() == 0) { return; } if (count == 0) { count = values.count(); mean = values.mean(); sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas(); min = values.min(); max = values.max(); } else { count += values.count(); if (isFinite(mean) && isFinite(values.mean())) { // This is a generalized version of the calculation in add(double) above. double delta = values.mean() - mean; mean += delta * values.count() / count; sumOfSquaresOfDeltas += values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count(); } else { mean = calculateNewMeanNonFinite(mean, values.mean()); sumOfSquaresOfDeltas = NaN; } min = Math.min(min, values.min()); max = Math.max(max, values.max()); } } /** * Returns an immutable snapshot of the current statistics. */ public Stats snapshot() { return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); } /** * Returns the number of values. */ public long count() { return count; } /** * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the * values. The count must be non-zero. * * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of * the arithmetic mean of the population. * * <h3>Non-finite values</h3> * * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double mean() { checkState(count != 0); return mean; } /** * Returns the sum of the values. * * <h3>Non-finite values</h3> * * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. */ public final double sum() { return mean * count; } /** * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population * variance</a> of the values. The count must be non-zero. * * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. * It is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * * <h3>Non-finite values</h3> * * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationVariance() { checkState(count != 0); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } if (count == 1) { return 0.0; } return ensureNonNegative(sumOfSquaresOfDeltas) / count; } /** * Returns the * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> * population standard deviation</a> of the values. The count must be non-zero. * * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. * It is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * * <h3>Non-finite values</h3> * * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationStandardDeviation() { return Math.sqrt(populationVariance()); } /** * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample * variance</a> of the values. If this dataset is a sample drawn from a population, this is an * unbiased estimator of the population variance of the population. The count must be greater than * one. * * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * * <h3>Non-finite values</h3> * * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleVariance() { checkState(count > 1); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); } /** * Returns the * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a * population, this is an estimator of the population standard deviation of the population which * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on * the distribution). The count must be greater than one. * * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * * <h3>Non-finite values</h3> * * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleStandardDeviation() { return Math.sqrt(sampleVariance()); } /** * Returns the lowest value in the dataset. The count must be non-zero. * * <h3>Non-finite values</h3> * * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite * values only then the result is the lowest finite value. If it contains * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double min() { checkState(count != 0); return min; } /** * Returns the highest value in the dataset. The count must be non-zero. * * <h3>Non-finite values</h3> * * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite * values only then the result is the highest finite value. If it contains * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double max() { checkState(count != 0); return max; } double sumOfSquaresOfDeltas() { return sumOfSquaresOfDeltas; } /** * Calculates the new value for the accumulated mean when a value is added, in the case where at * least one of the previous mean and the value is non-finite. */ static double calculateNewMeanNonFinite(double previousMean, double value) { /* * Desired behaviour is to match the results of applying the naive mean formula. In particular, * the update formula can subtract infinities in cases where the naive formula would add them. * * Consequently: * 1. If the previous mean is finite and the new value is non-finite then the new mean is that * value (whether it is NaN or infinity). * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged * (whether it is NaN or infinity). * 3. If both the previous mean and the new value are non-finite and... * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. */ if (isFinite(previousMean)) { // This is case 1. return value; } else if (isFinite(value) || previousMean == value) { // This is case 2. or 3b. return previousMean; } else { // This is case 3a. or 3c. return NaN; } } }