/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Preprocess.Missing_Values.EventCovering.Stat; public class Histogram extends Sample { public Histogram(int bins, double binsize, double base) { this.base=base; this.bins=bins; this.binsize=binsize; bin = new int[bins]; cml = null; maxbin=-1; } public Histogram(int bins, double binsize) { this(bins,binsize,0); } public void add(double X) { super.add(X); int j = (int)Math.floor( (X-base) / binsize); if (j>=0 && j<bins) { bin[j]++; if (maxbin==-1) maxbin=j; else { if (bin[j]>maxbin) maxbin=j; } } cml = null; // cummulative is not valid anymore } public void reset() { super.reset(); bin = new int[bins]; cml = null; maxbin=-1; } public int getBin(int i) { if (i>=0 && i<bins) return bin[i]; return 0; } public int[] getBins() { return bin; } public double getBinsize() { return binsize; } public double getBase() { return base; } public int getNumBins() { return bins; } public double[] getCummulative() { cml = new double[bins]; double sum=0; int n = getSampleSize(); for (int j=0; j<bins; j++) { sum+=bin[j]; cml[j]=sum/n; } return cml; } public double getPercentile(double p) { if (cml==null) getCummulative(); for (int j=0; j<bins; j++) if (cml[j]>=p) return base + j*binsize; return base + bins*binsize; } public double getMedian() { return getPercentile(0.5); } public double getLowerQuartile() { return getPercentile(0.25); } public double getUpperQuartile() { return getPercentile(0.75); } public double getInterQuartileRange() { return getUpperQuartile() - getLowerQuartile(); } public double getMode() { return base + maxbin*binsize; } public String summary() { String s="Sample Summary\n"; s=s+"====================================\n"; s=s+"Sample size "+getSampleSize() + "\n"; s=s+"Mean "+getMean() + "\n"; s=s+"Median "+getMedian() + "\n"; s=s+"Mode "+getMode() + "\n"; s=s+"Variance "+getVariance() + "\n"; s=s+"Std. dev. "+getStandardDeviation() + "\n"; s=s+"Minimum "+getMin() + "\n"; s=s+"Maximum "+getMax() + "\n"; s=s+"Range "+getRange() + "\n"; s=s+"Lower Quartile "+getLowerQuartile() + "\n"; s=s+"Upper Quartile "+getUpperQuartile() + "\n"; s=s+"interquartile range "+getInterQuartileRange() + "\n"; s=s+"Coeff. of variation "+getCoefficientOfVariation() + "\n"; s=s+"Sum "+getSum() + "\n"; return s; } public String boxplot() { String s = getMedian()+" "+getLowerQuartile() +" "+getUpperQuartile() + " " + getPercentile(0.05) +" "+getPercentile(0.95); return s; } private double base=0; private int bins = 100; private double binsize = 1; private int[] bin; private double[] cml; private int maxbin; }