/* * GeoTools - The Open Source Java GIS Toolkit * http://geotools.org * * (C) 2001-2008, Open Source Geospatial Foundation (OSGeo) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotools.math; import java.io.Serializable; import java.util.Locale; import org.geotools.io.TableWriter; import org.geotools.resources.i18n.Descriptions; import org.geotools.resources.i18n.DescriptionKeys; import org.opengis.util.Cloneable; /** * Holds some statistics about a series of sample values. Given a series of sample values * <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>, <var>s<sub>2</sub></var>, * <var>s<sub>3</sub></var>..., this class computes {@linkplain #minimum minimum}, * {@linkplain #maximum maximum}, {@linkplain #mean mean}, {@linkplain #rms root mean square} * and {@linkplain #standardDeviation standard deviation}. Statistics are computed on the fly; * the sample values are never stored in memory. * <p> * An instance of {@code Statistics} is initially empty (i.e. all statistical values are set * to {@link Double#NaN NaN}). The statistics are updated every time an {@link #add(double)} * method is invoked with a non-{@linkplain Double#NaN NaN} value. A typical usage of this * class is: * * <blockquote><pre> * double[] data = new double[1000]; * // (Compute some data values here...) * * Statistics stats = new Statistics(); * for (int i=0; i<data.length; i++) { * stats.add(data[i]); * } * System.out.println(stats); * </pre></blockquote> * * @since 2.0 * @source $URL$ * @version $Id$ * @author Martin Desruisseaux (IRD) */ public class Statistics implements Cloneable, Serializable { /** * Serial number for compatibility with different versions. */ private static final long serialVersionUID = -22884277805533726L; /** * Valeur minimale qui aie été transmise à la méthode * {@link #add(double)}. Lors de la construction, ce * champs est initialisé à NaN. */ private double min = Double.NaN; /** * Valeur maximale qui aie été transmise à la méthode * {@link #add(double)}. Lors de la construction, ce * champs est initialisé à NaN. */ private double max = Double.NaN; /** * Somme de toutes les valeurs qui ont été transmises à * la méthode {@link #add(double)}. Lors de la construction, * ce champs est initialisé à 0. */ private double sum = 0; /** * Somme des carrés de toutes les valeurs qui ont été * transmises à la méthode {@link #add(double)}. Lors * de la construction, ce champs est initialisé à 0. */ private double sum2 = 0; /** * Nombre de données autres que NaN qui ont été transmises * à la méthode {@link #add(double)}. Lors de la construction, * ce champs est initialisé à 0. */ private int n = 0; /** * Nombre de données égales à NaN qui ont été transmises à * la méthode {@link #add(double)}. Les NaN sont ingorés lors * du calcul des statistiques, mais on les compte quand même * au passage. Lors de la construction ce champs est initialisé à 0. */ private int nNaN = 0; /** * Constructs an initially empty set of statistics. * All statistical values are initialized to {@link Double#NaN}. */ public Statistics() { } /** * Resets the statistics to their initial {@link Double#NaN NaN} values. * This method reset this object state as if it was just created. */ public void reset() { min = Double.NaN; max = Double.NaN; sum = 0; sum2 = 0; n = 0; nNaN = 0; } /** * Updates statistics for the specified sample. This {@code add} * method is usually invoked inside a {@code for} loop. * * @param sample The sample value. {@link Double#NaN NaN} values are ignored. * * @see #add(long) * @see #add(Statistics) */ public void add(final double sample) { if (!Double.isNaN(sample)) { /* * Les deux prochaines lignes utilisent !(a>=b) au * lieu de (a<b) afin de prendre en compte les NaN. */ if (!(min<=sample)) min=sample; if (!(max>=sample)) max=sample; sum2 += (sample*sample); sum += sample; n++; } else { nNaN++; } } /** * Updates statistics for the specified sample. This {@code add} * method is usually invoked inside a {@code for} loop. * * @param sample The sample value. * * @see #add(double) * @see #add(Statistics) */ public void add(final long sample) { final double fdatum = sample; if (!(min<=fdatum)) min=fdatum; if (!(max>=fdatum)) max=fdatum; sum2 += (fdatum*fdatum); sum += fdatum; n++; } /** * Updates statistics with all samples from the specified {@code stats}. Invoking this * method is equivalent (except for rounding errors) to invoking {@link #add(double) add} * for all samples that were added to {@code stats}. * * @param stats The statistics to be added to {@code this}, or {@code null} if none. */ public void add(final Statistics stats) { if (stats != null) { // "if (a<b)" équivaut à "if (!isNaN(a) && a<b)". if (Double.isNaN(min) || stats.min<min) min=stats.min; if (Double.isNaN(max) || stats.max>max) max=stats.max; sum2 += stats.sum2; sum += stats.sum; n += stats.n; nNaN += stats.nNaN; } } /** * Returns the number of {@link Double#NaN NaN} samples. {@code NaN} samples are * ignored in all other statitical computation. This method count them for information * purpose only. */ public int countNaN() { return Math.max(nNaN, 0); } /** * Returns the number of samples, excluding {@link Double#NaN NaN} values. */ public int count() { return n; } /** * Returns the minimum sample value, or {@link Double#NaN NaN} if none. * * @see #maximum */ public double minimum() { return min; } /** * Returns the maximum sample value, or {@link Double#NaN NaN} if none. * * @see #minimum */ public double maximum() { return max; } /** * Returns the range of sample values. This is equivalent to <code>{@link #maximum maximum} - * {@link #minimum minimum}</code>, except for rounding error. If no samples were added, * then returns {@link Double#NaN NaN}. * * @see #minimum * @see #maximum */ public double range() { return max-min; } /** * Returns the mean value, or {@link Double#NaN NaN} if none. */ public double mean() { return sum/n; } /** * Returns the root mean square, or {@link Double#NaN NaN} if none. */ public double rms() { return Math.sqrt(sum2/n); } /** * Retourne l'écart type des échantillons par rapport à la moyenne. Si les données * fournies aux différentes méthodes {@code add(...)} se distribuent selon une * loi normale, alors l'écart type est la distance de part et d'autre de la moyenne * dans lequel se trouveraient environ 84% des données. Le tableau ci-dessous donne * le pourcentage approximatif des données que l'on trouve de part et d'autre de la * moyenne à des distances telles que 2 ou 3 fois l'écart-type. * * <table align=center> * <tr><td> 0.5 </td><td> 69.1% </td></tr> * <tr><td> 1.0 </td><td> 84.2% </td></tr> * <tr><td> 1.5 </td><td> 93.3% </td></tr> * <tr><td> 2.0 </td><td> 97.7% </td></tr> * <tr><td> 3.0 </td><td> 99.9% </td></tr> * </table> * * @param allPopulation La valeur {@code true} indique que les données fournies * aux différentes méthodes {@code add(...)} représentent l'ensemble de * la polulation. La valeur {@code false} indique que ces données ne * représentent qu'un échantillon de la population, ce qui est généralement le * cas. Si le nombre de données est élevé, alors les valeurs {@code true} * et {@code false} donneront sensiblement les mêmes résultats. */ public double standardDeviation(final boolean allPopulation) { return Math.sqrt((sum2 - sum*sum/n) / (allPopulation ? n : n-1)); } /** * Returns a clone of this statistics. */ @Override public Statistics clone() { try { return (Statistics) super.clone(); } catch (CloneNotSupportedException exception) { // Should not happen since we are cloneable throw new AssertionError(exception); } } /** * Tests this statistics with the specified object for equality. */ @Override public boolean equals(final Object obj) { if (obj!=null && getClass().equals(obj.getClass())) { final Statistics cast = (Statistics) obj; return n==cast.n && Double.doubleToLongBits(min ) == Double.doubleToLongBits(cast.min) && Double.doubleToLongBits(max ) == Double.doubleToLongBits(cast.max) && Double.doubleToLongBits(sum ) == Double.doubleToLongBits(cast.sum) && Double.doubleToLongBits(sum2) == Double.doubleToLongBits(cast.sum2); } return false; } /** * Returns a hash code value for this statistics. */ @Override public int hashCode() { final long code = (Double.doubleToLongBits(min) + 37*(Double.doubleToLongBits(max) + 37*(Double.doubleToLongBits(sum) + 37*(Double.doubleToLongBits(sum2))))); return (int) code ^ (int) (code >>> 32) ^ n; } /** * Returns a string representation of this statistics. This method invokes * {@link #toString(Locale, boolean)} using the default locale and spaces * separator. */ @Override public final String toString() { return toString(null, false); } /** * Returns a localized string representation of this statistics. This string * will span multiple lines, one for each statistical value. For example: * * <blockquote><pre> * Compte: 8726 * Minimum: 6.853 * Maximum: 8.259 * Moyenne: 7.421 * RMS: 7.846 * Écart-type: 6.489 * </pre></blockquote> * * If {@code tabulations} is true, then labels (e.g. "Minimum") and values * (e.g. "6.853") are separated by tabulations. Otherwise, they are separated * by spaces. */ public String toString(final Locale locale, final boolean tabulations) { String text = Descriptions.getResources(locale).getString( DescriptionKeys.STATISTICS_TO_STRING_$6, new Number[] { count(), minimum(), maximum(), mean(), rms(), standardDeviation(false) }); if (!tabulations) { final TableWriter tmp = new TableWriter(null, 1); tmp.write(text); tmp.setColumnAlignment(1, TableWriter.ALIGN_RIGHT); text = tmp.toString(); } return text; } /** * Holds some statistics about a series of sample values and the difference between them. * Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>, * <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this class computes statistics * in the same way than {@link Statistics} and additionnaly computes statistics for * <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>, * <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>, * <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>..., * which are stored in a {@link #getDeltaStatistics delta} statistics object. * * @version $Id$ * @author Martin Desruisseaux (IRD) */ public static class Delta extends Statistics { /** * Serial number for compatibility with different versions. */ private static final long serialVersionUID = 3464306833883333219L; /** * Statistics about the differences between consecutive sample values. */ private Statistics delta; /** * Last value given to an {@link #add(double) add} method as * a {@code double}, or {@link Double#NaN NaN} if none. */ private double last = Double.NaN; /** * Last value given to an {@link #add(long) add} * method as a {@code long}, or 0 if none. */ private long lastAsLong; /** * Constructs an initially empty set of statistics. * All statistical values are initialized to {@link Double#NaN}. */ public Delta() { delta = new Statistics(); delta.nNaN = -1; // Do not count the first NaN, which will always be the first value. } /** * Constructs an initially empty set of statistics using the specified * object for {@link #getDeltaStatistics delta} statistics. This method * allows chaining different kind of statistics objects. For example, one * could write: * * <blockquote><pre> * new Statistics.Delta(new Statistics.Delta()); * </pre></blockquote> * * Which would compute statistics of sample values, statistics of difference between * consecutive sample values, and statistics of difference of difference between * consecutive sample values. Other kinds of {@link Statistics} object could be * chained as well. */ public Delta(final Statistics delta) { this.delta = delta; delta.reset(); delta.nNaN = -1; // Do not count the first NaN, which will always be the first value. } /** * Returns the statistics about difference between consecutives values. * Given a series of sample values <var>s<sub>0</sub></var>, <var>s<sub>1</sub></var>, * <var>s<sub>2</sub></var>, <var>s<sub>3</sub></var>..., this is statistics for * <var>s<sub>1</sub></var>-<var>s<sub>0</sub></var>, * <var>s<sub>2</sub></var>-<var>s<sub>1</sub></var>, * <var>s<sub>3</sub></var>-<var>s<sub>2</sub></var>..., */ public Statistics getDeltaStatistics() { return delta; } /** * Resets the statistics to their initial {@link Double#NaN NaN} values. * This method reset this object state as if it was just created. */ @Override public void reset() { super.reset(); delta.reset(); delta.nNaN = -1; // Do not count the first NaN, which will always be the first value. last = Double.NaN; lastAsLong = 0; } /** * Updates statistics for the specified sample. The {@link #getDeltaStatistics delta} * statistics are updated with <code>sample - sample<sub>last</sub></code> value, * where <code>sample<sub>last</sub></code> is the last value given to the previous * call of an {@code add(...)} method. */ @Override public void add(final double sample) { super.add(sample); delta.add(sample - last); last = sample; lastAsLong = (long)sample; } /** * Updates statistics for the specified sample. The {@link #getDeltaStatistics delta} * statistics are updated with <code>sample - sample<sub>last</sub></code> value, * where <code>sample<sub>last</sub></code> is the last value given to the previous * call of an {@code add(...)} method. */ @Override public void add(final long sample) { super.add(sample); if (last == (double) lastAsLong) { // 'lastAsLong' may have more precision than 'last' since the cast to the // 'double' type may loose some digits. Invoke the 'delta.add(long)' version. delta.add(sample - lastAsLong); } else { // The sample value is either fractional, outside 'long' range, // infinity or NaN. Invoke the 'delta.add(double)' version. delta.add(sample - last); } last = sample; lastAsLong = sample; } /** * Update statistics with all samples from the specified {@code stats}. Invoking this * method is equivalent (except for rounding errors) to invoking {@link #add(double) add} * for all samples that were added to {@code stats}. The {@code stats} argument * must be an instance of {@code Statistics.Delta}. * * @param stats The statistics to be added to {@code this}, * or {@code null} if none. * @throws ClassCastException If {@code stats} is not an instance of * {@code Statistics.Delta}. */ @Override public void add(final Statistics stats) throws ClassCastException { if (stats != null) { final Delta toAdd = (Delta) stats; if (toAdd.delta.nNaN >= 0) { delta.add(toAdd.delta); last = toAdd.last; lastAsLong = toAdd.lastAsLong; super.add(stats); } } } /** * Returns a clone of this statistics. */ @Override public Delta clone() { Delta copy = (Delta) super.clone(); copy.delta = copy.delta.clone(); return copy; } /** * Tests this statistics with the specified object for equality. */ @Override public boolean equals(final Object obj) { return super.equals(obj) && delta.equals(((Delta) obj).delta); } /** * Returns a hash code value for this statistics. */ @Override public int hashCode() { return super.hashCode() + 37*delta.hashCode(); } } }