package com.ewjordan.util; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; /** * Stores observations and calculates summary statistics. * This class is able to calculate distributional characteristics * apart from mean and variance because it stores the observations * in addition to the summary statistics. * * @author eric */ public class StoredObservationSummary extends ObservationSummary { protected DoubleList observations; private List<Double> sortedObservations; private boolean isSorted; /** * Constructor. */ public StoredObservationSummary() { super(); observations = new DoubleList(); isSorted = false; } /** * @see com.ewjordan.PaddyPower.Analysis.ObservationSummary#add(double) */ @Override public void add(double value) { observations.add(value); super.add(value); isSorted = false; } public DoubleList getObservations() { return observations; } private void sortObservations() { if (isSorted) return; sortedObservations = new ArrayList<Double>(); for (int i=0; i<observations.size(); ++i) { sortedObservations.add(observations.get(i)); } Collections.sort(sortedObservations); isSorted = true; } /** Return the median value of the distribution. */ public double getMedian() { sortObservations(); int size = sortedObservations.size(); if (size == 0) return 0.0; if (size % 2 == 0) return .5*(sortedObservations.get(size/2) + sortedObservations.get(size/2-1)); else return sortedObservations.get(size/2); } /** * Return the proportion of observations that are lower than this value. */ public double getProportionAtValue(double value) { //TODO: test this sortObservations(); if (sortedObservations.size() == 0) return 0.0; if (sortedObservations.size() == 1) return (value > sortedObservations.get(0)) ? 1.0 : 0.0; int size = sortedObservations.size(); int lastIndexBelow = -1; int firstIndexAbove = size; for (int i=0; i<size; ++i) { double d = sortedObservations.get(i); if (d < value) lastIndexBelow = i; if (d > value && firstIndexAbove == size) firstIndexAbove = i; } // System.out.println(lastIndexBelow + " " + firstIndexAbove + " " + value); if (firstIndexAbove >= size) return 1.0; if (lastIndexBelow <= -1) return 0.0; double proportion = (lastIndexBelow+1.0) / size; // double effectiveIndex = MathUtil.map(value, sortedObservations.get(lastIndexBelow), sortedObservations.get(firstIndexAbove), lastIndexBelow, firstIndexAbove); // double proportion = MathUtil.map(effectiveIndex, 0, size, 0.0, 1.0); return proportion; } /** * Return the value at the given fractional point of the cumulative distribution. * For example, getValueAtProportion(0.5) will give the median of the distribution. */ public double getValueAtProportion(double fraction) { if (sortedObservations == null) return 0.0; if (sortedObservations.size() == 0) return 0.0; if (sortedObservations.size() == 1) return sortedObservations.get(0); sortObservations(); int size = sortedObservations.size(); int lowIndex = (int) (fraction * (size-1)); int highIndex = lowIndex + 1; //catch bad parameters or near-the-edge glitches (fraction ~= 1, mainly) if (lowIndex < 0) lowIndex = 0; if (highIndex < 0) highIndex = 0; if (lowIndex >= size) lowIndex = size-1; if (highIndex >= size) highIndex = size-1; double proportion = fraction*(size-1) - lowIndex; double lowValue = sortedObservations.get(lowIndex); double highValue = sortedObservations.get(highIndex); //System.out.println(proportion + " " +lowValue + " " +highValue + ": "+lowIndex+" "+highIndex); return MathUtil.map(proportion, 0, 1, lowValue, highValue); } public void printResults() { System.out.println("Mean: "+getArithmeticMean()); System.out.println("Stdev: "+getStandardDeviation()); System.out.println("Median: "+getMedian()); System.out.println("Confidence interval of mean @ 95%: +/- " + getMeanConfidence(0.95)); double[] doubles = ArrayUtil.toDoubleArray(observations); DoubleHistogram histo = new DoubleHistogram(doubles, 20); histo.setTitle("Distribution of observations"); histo.printTextRepresentation(80); } public void printResults(String numberFormatString) { System.out.println("Mean: "+getArithmeticMean()); System.out.println("Stdev: "+getStandardDeviation()); System.out.println("Median: "+getMedian()); System.out.println("Confidence interval of mean @ 95%: +/- " + getMeanConfidence(0.95)); double[] doubles = ArrayUtil.toDoubleArray(observations); DoubleHistogram histo = new DoubleHistogram(doubles, 20); histo.setTitle("Distribution of observations"); histo.printTextRepresentation(80, numberFormatString); } public DoubleHistogram getDoubleHistogram(int nBuckets) { double[] doubles = ArrayUtil.toDoubleArray(observations); return new DoubleHistogram(doubles, nBuckets); } public DoubleHistogram getDoubleHistogram(int nBuckets, double minVal, double maxVal) { double[] doubles = ArrayUtil.toDoubleArray(observations); return new DoubleHistogram(doubles, nBuckets, minVal, maxVal); } /** Tests. */ static public void main(String[] args) { StoredObservationSummary summary = new StoredObservationSummary(); Random random = new Random(); for (int i=0; i < 1000001; ++i) { double rand = random.nextGaussian(); // rand = rand*rand; summary.add(rand); } System.out.println("Mean is "+summary.getArithmeticMean()); System.out.println("Stdev is "+summary.getStandardDeviation()); System.out.println("Median is "+summary.getMedian()); System.out.println("which should equal "+summary.getValueAtProportion(0.5)); System.out.println("Confidence interval of mean @ 95% is +/- " + summary.getMeanConfidence(0.95)); System.out.println("Distribution value at 95% is " + summary.getValueAtProportion(0.95)); double[] doubles = ArrayUtil.toDoubleArray(summary.observations); DoubleHistogram histo = new DoubleHistogram(doubles, 30); histo.printTextRepresentation(80); } }