/*
This file is part of Cyclos (www.cyclos.org).
A project of the Social Trade Organisation (www.socialtrade.org).
Cyclos is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Cyclos is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Cyclos; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package nl.strohalm.cyclos.utils.statistics;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import nl.strohalm.cyclos.entities.reports.StatisticalNumber;
import nl.strohalm.cyclos.services.stats.StatisticalService;
import JSci.maths.statistics.NormalDistribution;
/**
* This class calculates stuff about the Median of a population. The median is used in case a population is normally distributed, and it is likely
* that the distribution is skewed or has extreme outliers. In such cases, the median is a more robust and better estimator for the center of the
* population than the average is.
*
* The class calculates the median from a give list of Numbers or array of doubles. It is also able to produce a confidence interval. It may be used
* in a static way if you only want to retrieve the median. If you need the confidence interval too, then it must be used non-static.
*
* @author Rinke
*
*/
public class Median {
/*
* if the ratio between the upper half and lower half of the confidence interval is less than this, we will consider the confidence interval as
* symmetric.
*/
private static final double CONFIDENCE_SYMMETRY_LIMIT = 0.05;
/**
* Gets the median of a range of numbers. Takes a double[] as input
* @param data an array of doubles for which the median must be calculated
* @return a double being the median of the range
*/
public static double getMedian(final double[] data) {
return Median.getMedian(ListOperations.arrayToList(data));
}
/**
* Gets the median of a range of numbers.
* @param data an array of doubles for which the median must be calculated
* @param alpha an int indicating the level of the confidence interval. A level of 0.05 gives a 95% confidence interval
* @return a StatisticalNumber indicating the median, complete with confidence interval.
*/
public static StatisticalNumber getMedian(final double[] data, final double alpha) {
return Median.getMedian(ListOperations.arrayToList(data), alpha);
}
/**
* static method for retrieving the median
* @param l the list of Numbers to retrieve the median from
* @return the median as a simple double.
*/
public static double getMedian(final List<Number> l) {
if (l.size() == 1) {
return l.get(0).doubleValue();
}
final Median median = new Median(l);
return median.getMedian();
}
/**
* static method returning the median as a statisticalNumber, including the confidence interval.
* @param l the data
* @param alpha the level. A level of 0.05 brings a 95% confidence interval
* @return a statistical Number representing the number PLUS a confidence interval around it. In case of too little elements, a StatisticalNumber
* is returned with the <code>isNull</code> field set to true.
*/
public static StatisticalNumber getMedian(final List<Number> l, final double alpha) {
if (l.size() < StatisticalService.MINIMUM_NUMBER_OF_VALUES || l.size() == 0) {
return new StatisticalNumber();
}
if (l.size() == 1) {
return new StatisticalNumber(l.get(0).doubleValue(), (byte) 2);
}
final Median median = new Median(l, alpha);
if (median.getHalfOfConfidenceInterval() == null) {
return new StatisticalNumber(median.getMedian(), median.getLowerLimitConfidenceInterval(), median.getUpperLimitConfidenceInterval(), (byte) 2);
}
return new StatisticalNumber(median.getMedian(), median.getHalfOfConfidenceInterval(), (byte) 2);
}
private final List<Number> list;
/**
* the test level, usually 5% (0.05). This would generate a 95% confidence interval.
*/
private double alpha;
/**
* the result value
*/
private Double median;
/**
* the upper limit of the confidence interval
*/
private Double upper = null;
/**
* the lower limit of the confidence interval
*/
private Double lower = null;
public Median(final double[] d) {
this(ListOperations.arrayToList(d));
}
/**
* as previous, but with an array in stead of a list.
* @param d
* @param alpha
*/
public Median(final double[] d, final double alpha) {
this(ListOperations.arrayToList(d));
this.alpha = alpha;
}
/**
* Constructor taking a list as input. Sorts the list immediately
* @param list
*/
public Median(final List<Number> list) {
Collections.sort(list, new Comparator<Number>() {
public int compare(final Number a, final Number b) {
if (a.equals(b)) {
return 0;
}
;
return (a.doubleValue() < b.doubleValue()) ? -1 : 1;
}
});
this.list = list;
}
/**
* This constructor is to be used in case a confidence interval is requested.
* @param list - the data
* @param alpha - the level for testing. Usually 5% (0.05), thus generating a 95% confidence interval.
*/
public Median(final List<Number> list, final double alpha) {
this(list);
this.alpha = alpha;
}
/**
* This method gives the half value of the confidence interval width, so that it can be used in this form: 12 +/- 1.2, where the 1.2 would be the
* half of the confidence interval width. If the confidence interval is NOT symmetric (which can happen with medians), then null is returned. The
* method tests for this symmetry: if lower and upper half range differ more than 5% it is considered a-symmetrical.
*
* @return a Double indicating HALF of the confidence interval width. If the confidence interval is NOT symmetric, null is returned.
*/
public Double getHalfOfConfidenceInterval() {
if (list.size() < StatisticalService.MINIMUM_NUMBER_OF_VALUES) {
return null;
}
if (median == null) {
this.getMedian();
}
if (lower == null) {
calculateConfidenceInterval();
}
final double lowerHalfWidth = median - lower;
final double upperHalfWidth = upper - median;
final double halfWidth = (lowerHalfWidth + upperHalfWidth) / 2.0;
final double asymmetricality = Math.abs((lowerHalfWidth - upperHalfWidth) / halfWidth);
if (asymmetricality < Median.CONFIDENCE_SYMMETRY_LIMIT) {
return new Double(halfWidth);
}
return null;
}
/**
* calculates the lower limit of the confidence interval. The level alpha must be set via the constructor.
*
* @return the lower limit of the confidence interval.
*/
public double getLowerLimitConfidenceInterval() {
if (list.size() < StatisticalService.MINIMUM_NUMBER_OF_VALUES) {
return this.getMedian();
}
if (lower == null) {
calculateConfidenceInterval();
}
return lower.doubleValue();
}
/**
* This method returns the median.
* @return the median.
*
*/
public double getMedian() {
final double center = (list.size() - 1.0) / 2.0;
median = ListOperations.getElementFromIndex(list, center);
return median;
}
/**
* calculates the upper limit of the confidence interval. The level alpha must be set via the constructor.
*
* @return the upper limit of the confidence interval.
*/
public double getUpperLimitConfidenceInterval() {
if (list.size() < StatisticalService.MINIMUM_NUMBER_OF_VALUES) {
return this.getMedian();
}
if (upper == null) {
calculateConfidenceInterval();
}
return upper.doubleValue();
}
/*
* does the actual calculation of the confidence interval.
*/
private void calculateConfidenceInterval() {
if (alpha <= 0) {
throw new NullPointerException("Trying to calculate a confidence interval without setting alpha. " + "Use the other constructor of Median.");
}
final int n = list.size();
final double zAlpha = new NormalDistribution().inverse((1.0 - alpha / 2.0));
final double halfWidth = zAlpha * Math.sqrt(n);
double lowerIndex = ((n + 1) / 2.0) - halfWidth - 1; // the final -1 is not according to the official formula, but because computer indexes
// start at 0 in stead of starting at 1
double upperIndex = ((n + 1) / 2.0) + halfWidth - 1;
if (lowerIndex < 0) {
lowerIndex = 0;
}
if (upperIndex >= list.size()) {
upperIndex = list.size() - 1;
}
lower = new Double(ListOperations.getElementFromIndex(list, lowerIndex));
upper = new Double(ListOperations.getElementFromIndex(list, upperIndex));
}
}