/* Copyright (C) 2001 Kyle Siegrist, Dawn Duehring This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package distributions; /**This class defines a simple implementation of an interval data distribution. The data distribution is based on a specified domain (that is, a partition of an interval). When values are added, frequency counts for the subintervals are computed and various statistic updated.*/ public class IntervalData{ //Variables private int size, maxFreq; private double value, minValue, maxValue, mean, meanSquare, mode; private int[] freq; //Objects private Domain domain; private String name; /**This general constructor creates a new data distribution with a specified domain and a specified name*/ public IntervalData(Domain d, String n){ name = n; setDomain(d); } /**This general constructor creates a new data distribution with a specified domain and a specified name.*/ public IntervalData(double a, double b, double w, String n){ this(new Domain(a, b, w), n); } /**This special constructor creates a new data distribution with a specified domain and the default name "X".*/ public IntervalData(Domain d){ this(d, "X"); } /**This spcial constructor creates a new data distribution with a specified domain and the name "X"*/ public IntervalData(double a, double b, double w){ this(a, b, w, "X"); } /**This default constructor creates a new data distribution on the interval [0, 1] with subintervals of length 0.1, and the default name "X".*/ public IntervalData(){ this(0, 1, 0.1); } /**This method sets the domain of the data set.*/ public void setDomain(Domain d){ domain = d; reset(); } /**This method returns the domain.*/ public Domain getDomain(){ return domain; } /**This method sets the name of the data set.*/ public void setName(String n){ name = n; } /**This method gets the name of the data set.*/ public String getName(){ return name; } /**This method resets the data set*/ public void reset(){ freq = new int[domain.getSize()]; size = 0; minValue = domain.getUpperBound(); maxValue = domain.getLowerBound(); maxFreq = 0; } /**This method adds a new number to the data set and re-compute the mean, mean square, minimum and maximum values, the frequency distribution, and the mode*/ public void setValue(double x){ value = x; //Update the size of the data set: size++; //Re-compute mean and mean square mean = ((double)(size - 1) / size) * mean + value / size; meanSquare = ((double)(size - 1) / size) * meanSquare + value * value / size; //Recompute minimum and maximum values if (value < minValue) minValue = value; if (value > maxValue) maxValue = value; //Update frequency distribution int i = domain.getIndex(x); if (i >= 0 & i < domain.getSize()){ freq[i]++; //Re-compute mode if (freq[i] > maxFreq){ maxFreq = freq[i]; mode = domain.getValue(i); } else if (freq[i] == maxFreq) mode = Double.NaN; //There are two or more modes } } /**This method returns the current value of the data set*/ public double getValue(){ return value; } /**This method returns the domain value (midpoint) closest to given value of x*/ public double getDomainValue(double x){ return domain.getValue(domain.getIndex(x)); } /**This method returns the frequency of the class containing a given value of x.*/ public int getFreq(double x){ int i = domain.getIndex(x); if (i < 0 | i >= domain.getSize()) return 0; else return freq[i]; } /**This method returns the relative frequency of the class containing a given value.*/ public double getRelFreq(double x){ if (size > 0) return (double)(getFreq(x)) / size; else return 0; } /**This method returns the getDensity for a given value*/ public double getDensity(double x){ return getRelFreq(x) / domain.getWidth(); } /**This method returns the mean of the data set.*/ public double getMean(){ return mean; } /**This method returns the mean of the frequency distribution. The interval mean is an approximation to the true mean of the data set.*/ public double getIntervalMean(){ double sum = 0; for (int i = 0; i < domain.getSize(); i++) sum = sum + domain.getValue(i) * freq[i]; return sum / size; } /**This method returns the population variance*/ public double getVarianceP(){ double var = meanSquare - mean * mean; if (var < 0) var = 0; return var; } /**This method returns the population standard deviation.*/ public double getSDP(){ return Math.sqrt(getVarianceP()); } /**This method returns the sample variance.*/ public double getVariance(){ return ((double)size / (size - 1)) * getVarianceP(); } /**This method returns the sample standard deviation.*/ public double getSD(){ return Math.sqrt(getVariance()); } /**This method returns the interval variance.*/ public double getIntervalVariance(){ double m = getIntervalMean(), sum = 0, x; for (int i = 0; i < domain.getSize(); i++){ x = domain.getValue(i); sum = sum + (x - m) * (x - m) * freq[i]; } return sum / size; } /**This method returns the interval standard deviation.*/ public double getIntervalSD(){ return Math.sqrt(getIntervalVariance()); } /**This method returns the minimum value of the data set*/ public double getMinValue(){ return minValue; } /**This method returns the maximum value of the data set*/ public double getMaxValue(){ return maxValue; } /**This method computes the median of the values in the data set between two specified values*/ public double getMedian(double a, double b){ int sumFreq = 0, numValues = 0, lRank, uRank; double lValue = a - 1, uValue = b + 1, w = domain.getWidth(); //Compute sum of frequencies between a and b for (double x = a; x <= b + 0.5 * w; x = x + w) numValues = numValues + getFreq(x); //Determine parity and ranks if (2 * (numValues / 2) == numValues) { lRank = numValues / 2; uRank = lRank + 1; } else { lRank = (numValues + 1) / 2; uRank = lRank; } //Determine values for (double x = a; x <= b + 0.5 * w; x = x + w) { sumFreq = sumFreq + getFreq(x); if ((lValue == a - 1) & (sumFreq >= lRank)) lValue = x; if ((uValue == b + 1) & (sumFreq >= uRank)) uValue = x; } //Return average of upper and lower values return (uValue + lValue) / 2; } /**This method computes the median of the entire data set*/ public double getMedian(){ return getMedian(domain.getLowerValue(), domain.getUpperValue()); } /**This method returns the quartiles of the data set.*/ public double getQuartile(int i){ if (i < 1) i = 1; else if (i > 3) i = 3; if (i == 1) return getMedian(domain.getLowerValue(), getMedian()); else if (i == 2) return getMedian(); else return getMedian(getMedian(), domain.getUpperValue()); } /**This method computes the mean absoulte deviation*/ public double getMAD(){ double mad = 0, x; double m = getMedian(); for (int i = 0; i < domain.getSize(); i++){ x = domain.getValue(i); mad = mad + getRelFreq(x) * Math.abs(x - m); } return mad; } /**This method returns the number of pointCount in the data set*/ public int getSize(){ return size; } /**This method returns the maximum frequency*/ public int getMaxFreq(){ return maxFreq; } /**This method returns the maximum relative frequency.*/ public double getMaxRelFreq(){ if (size > 0) return (double)maxFreq / size; else return 0; } /**This method returns the maximum getDensity.*/ public double getMaxDensity(){ return getMaxRelFreq() / domain.getWidth(); } /**This method returns the mode of the distribution. The mode may not exist*/ public double getMode(){ return mode; } }