package hep.aida.ref.dataset; /** * * @author The FreeHEP team @ SLAC * * This class is meant to calculate the statistics associated with a * n-dimensional data set. In particular it calculates the following quantities for * "fillable" data objects like Histograms, Clouds, Profiles, Tuples for each * of its coordinates: * - mean * - rootMeanSquared * and the global quantities: * - entries * - sum of weights * - equivalent entries * These quantities for a data set {xi} with weights {wi} are defined as: * - mean = sum(xi*wi)/sumOfWeights * - rootMeanSquared = sqrt( sum(xi*xi*wi)*sum(wi) - sum(xi*wi)*sum(xi*wi) )/sum(wi) * - sumOfWeights = sum(wi) * - entries = sum(1) * - equivalent entries = sum(wi)*sum(wi)/sum(wi*wi) * * Internally it uses the class MeanAndRmsStatistics to calculate the mean and rms for each of the * coordinates. * */ public class DataStatistics { /** * Internally we keep track of the following additive quantities: * - sw = sum(wi) * - sws = sum(wi*wi) * - en = sum(1) * * The equivalent bin entries are calculated as: * * equivBinEntries = sw*sw/sws * */ private double sw, sws; private int en; private MeanAndRmsStatistics[] stats; private int dimension; /** * Creates a new instance of DataSetStatistics. * @param descriptions Is an array specifying the description for each * of the coordinates of this data set. */ public DataStatistics(String[] descriptions) { this( descriptions.length ); setDescription( descriptions ); } /** * Creates a new instance of DataSetStatistics. * @param dimension The dimension, i.e. the number of coordinates * of this data set. * */ public DataStatistics(int dimension) { this.dimension = dimension; this.stats = new MeanAndRmsStatistics[ dimension ]; for ( int i = 0; i < dimension; i++ ) stats[i] = new MeanAndRmsStatistics(String.valueOf(i)); reset(); } /** * Get the dimension of the DataStatistics. * @return The dimension of the DataStatistics. * */ public int dimension() { return dimension; } /** * Add a weighted entry to this DataSetStatistics. * The statistical information is updated. * @param x The coordinates of the added entry. * @param w The corresponding weight. * */ public void addEntry( double x[], double w ) { if ( w < 0 ) throw new IllegalArgumentException("Cannot accept an entry with negative weight "+w); for ( int i = 0; i < dimension; i++ ) stats[i].addEntry( x[i], w ); sw += w; sws += w*w; en++; } /** * Add a new entry to this DataSetStatistics with unit weight. * @param x The coordinates of the added entry. * */ public void addEntry( double[] x ) { addEntry( x, 1. ); } /** * Remove a weighted entry from this DataSetStatistics. * The statistical information is updated. * @param x The coordinates of the removed entry. * @param w The corresponding weight. * */ public void removeEntry( double x[], double w ) { if ( w < 0 ) throw new IllegalArgumentException("Cannot accept an entry with negative weight "+w); for ( int i = 0; i < dimension; i++ ) stats[i].removeEntry( x[i], w ); sw -= w; sws -= w*w; en--; } /** * Remove an entry from this DataSetStatistics with unit weight. * @param x The coordinates of the entry to remove. * */ public void removeEntry( double[] x ) { removeEntry( x, 1. ); } /** * Add a set of weighted entries to this DataSetStatistics. * The statistical information is updated. * @param mean The mean of the entries to be added. * @param rms The rms of the entries to be added. * @param sumw The sum of weights of the entries to be added. * @param sumw2 The sum of weights squared of the entries to be added. * @param entries The number of the entries to be added. * */ public void addEntries( double[] mean, double[] rms, double sumw, double sumw2, int entries ) { for ( int i = 0; i < dimension; i++ ) stats[i].addEntries( mean[i], rms[i], sumw ); sw += sumw; sws += sumw2; en += entries; } /** * Add a set of entries to this DataSetStatistics. * The statistical information is updated. * @param mean The mean of the entries to be added. * @param rms The rms of the entries to be added. * @param entries The number of the entries to be added. * */ public void addEntries( double[] mean, double[] rms, int entries ) { addEntries(mean, rms, entries, entries, entries); } /** * Remove the information corresponding to a set of weighted entries. * @param mean The mean of the entries to be removed. * @param rms The rms of the entries to be removed. * @param sumw The sum of weights of the entries to be removed. * @param sumw2 The sum of weights squared of the entries to be removed. * @param entries The number of the entries to be added. * */ public void removeEntries( double[] mean, double[] rms, double sumw, double sumw2, int entries ) { for ( int i = 0; i < dimension; i++ ) stats[i].removeEntries( mean[i], rms[i], sumw ); sw -= sumw; sws -= sumw2; en -= entries; } /** * Remove the information corresponding to a set of weighted entries. * @param mean The mean of the entries to be removed. * @param rms The rms of the entries to be removed. * @param entries The number of the entries to be added. * */ public void removeEntries( double[] mean, double[] rms, int entries ) { removeEntries( mean, rms, entries, entries, entries ); } /** * Get the mean for a given coordinate. * @param coord The index of the coordinate. * @return The mean of the coordinate coord. * */ public double mean(int coord) { return stats[coord].mean(); } /** * Get the rms for a given coordinate. * @param coord The index of the coordinage; * @return The rms of the coordinate coord. * */ public double rms(int coord) { return stats[coord].rms(); } /** * Get the sum of weights for this data set. * @return The sum of weights. * */ public double sumOfWeights() { return sw; } /** * Get the equivalent entries for this data set. * @return The equivalent entries for this data set. * */ public double equivalentEntries() { if ( en > 0 ) return sw*sw/sws; return 0; } /** * Get the number of entries in this data set * @return The number of entries. * */ public int entries() { return en; } /** * Scale the statistics by a give scaleFactor * Rescaling is equivalent to multiplying all the weights by the scale factor. * @param scaleFactor The scaleFactor. * */ public void scale( double scaleFactor ) { if ( scaleFactor > 0 ) { for ( int i = 0; i < dimension; i++ ) stats[i].scale( scaleFactor ); sw *= scaleFactor; sws *= scaleFactor*scaleFactor; } else throw new IllegalArgumentException("Invalid scale factor "+scaleFactor+". It must be positive"); } /** * Reset all the statistics quantities to zero. * */ public void reset() { sw = 0; sws = 0; en = 0; for ( int i = 0; i < dimension; i++ ) stats[i].reset(); } /** * Set the description for all the coordinates. * @param descriptions The array containing the description for each coordinate. * */ public void setDescription( String[] descriptions ) { if ( descriptions.length != dimension ) throw new IllegalArgumentException("Illegal dimension "+descriptions.length+" for the array of descriptions. It has to be equal to the dimension of the data set: "+dimension); for ( int i = 0; i < dimension; i++ ) setDescription( i, descriptions[i] ); } /** * Set the description for a given coordinate * @param coord The index of the coordinate. * @param description The description for the coordinate * */ public void setDescription( int coord, String description ) { if ( coord < 0 || coord >= dimension ) throw new IllegalArgumentException("Illegal coordinate "+coord+". It must be between 0 and "+dimension); stats[coord].setDescription(description); } /** * Get the description corresponding to a coordinate. * @param coord The coordinate. * @return The description corresponding to that coordinate. * */ public String description( int coord ) { return stats[coord].description(); } }