/*
* GeoTools - The Open Source Java GIS Toolkit
* http://geotools.org
*
* (C) 2001-2008, Open Source Geospatial Foundation (OSGeo)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
package org.geotools.math;
import java.io.Serializable;
import java.util.Locale;
import org.geotools.io.TableWriter;
import org.geotools.resources.i18n.Descriptions;
import org.geotools.resources.i18n.DescriptionKeys;
import org.opengis.util.Cloneable;
/**
* Holds some statistics about a series of sample values. Given a series of sample values
* <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>, <var>s<sub>2</sub></var>,
* <var>s<sub>3</sub></var>..., this class computes {@linkplain #minimum minimum},
* {@linkplain #maximum maximum}, {@linkplain #mean mean}, {@linkplain #rms root mean square}
* and {@linkplain #standardDeviation standard deviation}. Statistics are computed on the fly;
* the sample values are never stored in memory.
* <p>
* An instance of {@code Statistics} is initially empty (i.e. all statistical values are set
* to {@link Double#NaN NaN}). The statistics are updated every time an {@link #add(double)}
* method is invoked with a non-{@linkplain Double#NaN NaN} value. A typical usage of this
* class is:
*
* <blockquote><pre>
* double[] data = new double[1000];
* // (Compute some data values here...)
*
* Statistics stats = new Statistics();
* for (int i=0; i<data.length; i++) {
* stats.add(data[i]);
* }
* System.out.println(stats);
* </pre></blockquote>
*
* @since 2.0
* @source $URL$
* @version $Id$
* @author Martin Desruisseaux (IRD)
*/
public class Statistics implements Cloneable, Serializable {
/**
* Serial number for compatibility with different versions.
*/
private static final long serialVersionUID = -22884277805533726L;
/**
* Valeur minimale qui aie été transmise à la méthode
* {@link #add(double)}. Lors de la construction, ce
* champs est initialisé à NaN.
*/
private double min = Double.NaN;
/**
* Valeur maximale qui aie été transmise à la méthode
* {@link #add(double)}. Lors de la construction, ce
* champs est initialisé à NaN.
*/
private double max = Double.NaN;
/**
* Somme de toutes les valeurs qui ont été transmises à
* la méthode {@link #add(double)}. Lors de la construction,
* ce champs est initialisé à 0.
*/
private double sum = 0;
/**
* Somme des carrés de toutes les valeurs qui ont été
* transmises à la méthode {@link #add(double)}. Lors
* de la construction, ce champs est initialisé à 0.
*/
private double sum2 = 0;
/**
* Nombre de données autres que NaN qui ont été transmises
* à la méthode {@link #add(double)}. Lors de la construction,
* ce champs est initialisé à 0.
*/
private int n = 0;
/**
* Nombre de données égales à NaN qui ont été transmises à
* la méthode {@link #add(double)}. Les NaN sont ingorés lors
* du calcul des statistiques, mais on les compte quand même
* au passage. Lors de la construction ce champs est initialisé à 0.
*/
private int nNaN = 0;
/**
* Constructs an initially empty set of statistics.
* All statistical values are initialized to {@link Double#NaN}.
*/
public Statistics() {
}
/**
* Resets the statistics to their initial {@link Double#NaN NaN} values.
* This method reset this object state as if it was just created.
*/
public void reset() {
min = Double.NaN;
max = Double.NaN;
sum = 0;
sum2 = 0;
n = 0;
nNaN = 0;
}
/**
* Updates statistics for the specified sample. This {@code add}
* method is usually invoked inside a {@code for} loop.
*
* @param sample The sample value. {@link Double#NaN NaN} values are ignored.
*
* @see #add(long)
* @see #add(Statistics)
*/
public void add(final double sample) {
if (!Double.isNaN(sample)) {
/*
* Les deux prochaines lignes utilisent !(a>=b) au
* lieu de (a<b) afin de prendre en compte les NaN.
*/
if (!(min<=sample)) min=sample;
if (!(max>=sample)) max=sample;
sum2 += (sample*sample);
sum += sample;
n++;
} else {
nNaN++;
}
}
/**
* Updates statistics for the specified sample. This {@code add}
* method is usually invoked inside a {@code for} loop.
*
* @param sample The sample value.
*
* @see #add(double)
* @see #add(Statistics)
*/
public void add(final long sample) {
final double fdatum = sample;
if (!(min<=fdatum)) min=fdatum;
if (!(max>=fdatum)) max=fdatum;
sum2 += (fdatum*fdatum);
sum += fdatum;
n++;
}
/**
* Updates statistics with all samples from the specified {@code stats}. Invoking this
* method is equivalent (except for rounding errors) to invoking {@link #add(double) add}
* for all samples that were added to {@code stats}.
*
* @param stats The statistics to be added to {@code this}, or {@code null} if none.
*/
public void add(final Statistics stats) {
if (stats != null) {
// "if (a<b)" équivaut à "if (!isNaN(a) && a<b)".
if (Double.isNaN(min) || stats.min<min) min=stats.min;
if (Double.isNaN(max) || stats.max>max) max=stats.max;
sum2 += stats.sum2;
sum += stats.sum;
n += stats.n;
nNaN += stats.nNaN;
}
}
/**
* Returns the number of {@link Double#NaN NaN} samples. {@code NaN} samples are
* ignored in all other statitical computation. This method count them for information
* purpose only.
*/
public int countNaN() {
return Math.max(nNaN, 0);
}
/**
* Returns the number of samples, excluding {@link Double#NaN NaN} values.
*/
public int count() {
return n;
}
/**
* Returns the minimum sample value, or {@link Double#NaN NaN} if none.
*
* @see #maximum
*/
public double minimum() {
return min;
}
/**
* Returns the maximum sample value, or {@link Double#NaN NaN} if none.
*
* @see #minimum
*/
public double maximum() {
return max;
}
/**
* Returns the range of sample values. This is equivalent to <code>{@link #maximum maximum} -
* {@link #minimum minimum}</code>, except for rounding error. If no samples were added,
* then returns {@link Double#NaN NaN}.
*
* @see #minimum
* @see #maximum
*/
public double range() {
return max-min;
}
/**
* Returns the mean value, or {@link Double#NaN NaN} if none.
*/
public double mean() {
return sum/n;
}
/**
* Returns the root mean square, or {@link Double#NaN NaN} if none.
*/
public double rms() {
return Math.sqrt(sum2/n);
}
/**
* Retourne l'écart type des échantillons par rapport à la moyenne. Si les données
* fournies aux différentes méthodes {@code add(...)} se distribuent selon une
* loi normale, alors l'écart type est la distance de part et d'autre de la moyenne
* dans lequel se trouveraient environ 84% des données. Le tableau ci-dessous donne
* le pourcentage approximatif des données que l'on trouve de part et d'autre de la
* moyenne à des distances telles que 2 ou 3 fois l'écart-type.
*
* <table align=center>
* <tr><td> 0.5 </td><td> 69.1% </td></tr>
* <tr><td> 1.0 </td><td> 84.2% </td></tr>
* <tr><td> 1.5 </td><td> 93.3% </td></tr>
* <tr><td> 2.0 </td><td> 97.7% </td></tr>
* <tr><td> 3.0 </td><td> 99.9% </td></tr>
* </table>
*
* @param allPopulation La valeur {@code true} indique que les données fournies
* aux différentes méthodes {@code add(...)} représentent l'ensemble de
* la polulation. La valeur {@code false} indique que ces données ne
* représentent qu'un échantillon de la population, ce qui est généralement le
* cas. Si le nombre de données est élevé, alors les valeurs {@code true}
* et {@code false} donneront sensiblement les mêmes résultats.
*/
public double standardDeviation(final boolean allPopulation) {
return Math.sqrt((sum2 - sum*sum/n) / (allPopulation ? n : n-1));
}
/**
* Returns a clone of this statistics.
*/
@Override
public Statistics clone() {
try {
return (Statistics) super.clone();
} catch (CloneNotSupportedException exception) {
// Should not happen since we are cloneable
throw new AssertionError(exception);
}
}
/**
* Tests this statistics with the specified object for equality.
*/
@Override
public boolean equals(final Object obj) {
if (obj!=null && getClass().equals(obj.getClass())) {
final Statistics cast = (Statistics) obj;
return n==cast.n &&
Double.doubleToLongBits(min ) == Double.doubleToLongBits(cast.min) &&
Double.doubleToLongBits(max ) == Double.doubleToLongBits(cast.max) &&
Double.doubleToLongBits(sum ) == Double.doubleToLongBits(cast.sum) &&
Double.doubleToLongBits(sum2) == Double.doubleToLongBits(cast.sum2);
}
return false;
}
/**
* Returns a hash code value for this statistics.
*/
@Override
public int hashCode() {
final long code = (Double.doubleToLongBits(min) +
37*(Double.doubleToLongBits(max) +
37*(Double.doubleToLongBits(sum) +
37*(Double.doubleToLongBits(sum2)))));
return (int) code ^ (int) (code >>> 32) ^ n;
}
/**
* Returns a string representation of this statistics. This method invokes
* {@link #toString(Locale, boolean)} using the default locale and spaces
* separator.
*/
@Override
public final String toString() {
return toString(null, false);
}
/**
* Returns a localized string representation of this statistics. This string
* will span multiple lines, one for each statistical value. For example:
*
* <blockquote><pre>
* Compte: 8726
* Minimum: 6.853
* Maximum: 8.259
* Moyenne: 7.421
* RMS: 7.846
* Écart-type: 6.489
* </pre></blockquote>
*
* If {@code tabulations} is true, then labels (e.g. "Minimum") and values
* (e.g. "6.853") are separated by tabulations. Otherwise, they are separated
* by spaces.
*/
public String toString(final Locale locale, final boolean tabulations) {
String text = Descriptions.getResources(locale).getString(
DescriptionKeys.STATISTICS_TO_STRING_$6, new Number[] {
count(), minimum(), maximum(), mean(), rms(), standardDeviation(false)
});
if (!tabulations) {
final TableWriter tmp = new TableWriter(null, 1);
tmp.write(text);
tmp.setColumnAlignment(1, TableWriter.ALIGN_RIGHT);
text = tmp.toString();
}
return text;
}
/**
* Holds some statistics about a series of sample values and the difference between them.
* Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>,
* <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this class computes statistics
* in the same way than {@link Statistics} and additionnaly computes statistics for
* <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>,
* <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>,
* <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>...,
* which are stored in a {@link #getDeltaStatistics delta} statistics object.
*
* @version $Id$
* @author Martin Desruisseaux (IRD)
*/
public static class Delta extends Statistics {
/**
* Serial number for compatibility with different versions.
*/
private static final long serialVersionUID = 3464306833883333219L;
/**
* Statistics about the differences between consecutive sample values.
*/
private Statistics delta;
/**
* Last value given to an {@link #add(double) add} method as
* a {@code double}, or {@link Double#NaN NaN} if none.
*/
private double last = Double.NaN;
/**
* Last value given to an {@link #add(long) add}
* method as a {@code long}, or 0 if none.
*/
private long lastAsLong;
/**
* Constructs an initially empty set of statistics.
* All statistical values are initialized to {@link Double#NaN}.
*/
public Delta() {
delta = new Statistics();
delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
}
/**
* Constructs an initially empty set of statistics using the specified
* object for {@link #getDeltaStatistics delta} statistics. This method
* allows chaining different kind of statistics objects. For example, one
* could write:
*
* <blockquote><pre>
* new Statistics.Delta(new Statistics.Delta());
* </pre></blockquote>
*
* Which would compute statistics of sample values, statistics of difference between
* consecutive sample values, and statistics of difference of difference between
* consecutive sample values. Other kinds of {@link Statistics} object could be
* chained as well.
*/
public Delta(final Statistics delta) {
this.delta = delta;
delta.reset();
delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
}
/**
* Returns the statistics about difference between consecutives values.
* Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>,
* <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this is statistics for
* <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>,
* <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>,
* <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>...,
*/
public Statistics getDeltaStatistics() {
return delta;
}
/**
* Resets the statistics to their initial {@link Double#NaN NaN} values.
* This method reset this object state as if it was just created.
*/
@Override
public void reset() {
super.reset();
delta.reset();
delta.nNaN = -1; // Do not count the first NaN, which will always be the first value.
last = Double.NaN;
lastAsLong = 0;
}
/**
* Updates statistics for the specified sample. The {@link #getDeltaStatistics delta}
* statistics are updated with <code>sample - sample<sub>last</sub></code> value,
* where <code>sample<sub>last</sub></code> is the last value given to the previous
* call of an {@code add(...)} method.
*/
@Override
public void add(final double sample) {
super.add(sample);
delta.add(sample - last);
last = sample;
lastAsLong = (long)sample;
}
/**
* Updates statistics for the specified sample. The {@link #getDeltaStatistics delta}
* statistics are updated with <code>sample - sample<sub>last</sub></code> value,
* where <code>sample<sub>last</sub></code> is the last value given to the previous
* call of an {@code add(...)} method.
*/
@Override
public void add(final long sample) {
super.add(sample);
if (last == (double) lastAsLong) {
// 'lastAsLong' may have more precision than 'last' since the cast to the
// 'double' type may loose some digits. Invoke the 'delta.add(long)' version.
delta.add(sample - lastAsLong);
} else {
// The sample value is either fractional, outside 'long' range,
// infinity or NaN. Invoke the 'delta.add(double)' version.
delta.add(sample - last);
}
last = sample;
lastAsLong = sample;
}
/**
* Update statistics with all samples from the specified {@code stats}. Invoking this
* method is equivalent (except for rounding errors) to invoking {@link #add(double) add}
* for all samples that were added to {@code stats}. The {@code stats} argument
* must be an instance of {@code Statistics.Delta}.
*
* @param stats The statistics to be added to {@code this},
* or {@code null} if none.
* @throws ClassCastException If {@code stats} is not an instance of
* {@code Statistics.Delta}.
*/
@Override
public void add(final Statistics stats) throws ClassCastException {
if (stats != null) {
final Delta toAdd = (Delta) stats;
if (toAdd.delta.nNaN >= 0) {
delta.add(toAdd.delta);
last = toAdd.last;
lastAsLong = toAdd.lastAsLong;
super.add(stats);
}
}
}
/**
* Returns a clone of this statistics.
*/
@Override
public Delta clone() {
Delta copy = (Delta) super.clone();
copy.delta = copy.delta.clone();
return copy;
}
/**
* Tests this statistics with the specified object for equality.
*/
@Override
public boolean equals(final Object obj) {
return super.equals(obj) && delta.equals(((Delta) obj).delta);
}
/**
* Returns a hash code value for this statistics.
*/
@Override
public int hashCode() {
return super.hashCode() + 37*delta.hashCode();
}
}
}