/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.core.stat;
/**
* Class which implements core online statistics. This class does not hold
* values used for calculations, just the statistics itself and some additional
* elements required for calculations.
* <p>
* This is an extension over an algorithm presented by John D Cook
* http://www.johndcook.com/skewness_kurtosis.html.
* Which itself it is an extension of a method presented by Donald Knuth's The Art of Programming.
* Which itself it was proposed by B.P Welford.
* Which itself .. joking. This is the end of the recursion.
* <p>
* This class provides online statistics for:
* <ul>
* <ui>min - minimum value</ui>
* <ui>max - maximum value</ui>
* <ui>mean - mean of the values</ui>
* </ul>
*
* @author Aurelian Tutuianu
*/
public class OnlineStat {
public static OnlineStat empty() {
return new OnlineStat();
}
double n; // number of elements
double m1;
double m2;
double m3;
double m4;
double min = 0;
double max = 0;
private OnlineStat() {
clean();
}
public final void clean() {
n = 0;
min = 0;
max = 0;
m1 = 0;
m2 = 0;
m3 = 0;
m4 = 0;
}
/**
* For now implement this method using only positive values for times. It
* may be later modified in order to support negative values for times, with
* the new meaning that we "remove" elements from calculations and as a side
* effect to decrease the value of N;
*
* @param x value to be used to out statistics
*/
public void update(double x) {
double n1 = n;
n++;
double delta = (x - m1);
double delta_n = delta / n;
double delta_n2 = delta_n * delta_n;
double term1 = delta * delta_n * n1;
m1 += delta_n;
m4 += term1 * delta_n2 * (n * n - 3 * n + 3) + 6 * delta_n2 * m2 - 4 * delta_n * m3;
m3 += term1 * delta_n * (n - 2) - 3 * delta_n * m2;
m2 += term1;
min = Math.min(min, x);
max = Math.max(max, x);
}
/**
* @return the number of elements seen so far and used in calculation
*/
public double n() {
return n;
}
public double min() {
return min;
}
public double max() {
return max;
}
public double mean() {
return m1;
}
public double variance() {
return m2 / (n - 1.0);
}
public double sd() {
return Math.sqrt(variance());
}
public double skewness() {
return Math.sqrt(n) * m3 / Math.pow(m2, 1.5);
}
public double kurtosis() {
return n * m4 / (m2 * m2) - 3.0;
}
public void update(OnlineStat a) {
if (a.n == 0)
return;
OnlineStat combined = new OnlineStat();
combined.n += a.n + this.n;
double delta = this.m1 - a.m1;
double delta2 = delta * delta;
double delta3 = delta * delta2;
double delta4 = delta2 * delta2;
combined.m1 = (a.n * a.m1 + this.n * this.m1) / combined.n;
combined.m2 = a.m2 + this.m2 + delta2 * a.n * this.n / combined.n;
combined.m3 = a.m3 + this.m3 + delta3 * a.n * this.n * (a.n - this.n) / (combined.n * combined.n);
combined.m3 += 3.0 * delta * (a.n * this.m2 - this.n * a.m2) / combined.n;
combined.m4 = a.m4 + this.m4 + delta4 * a.n * this.n * (a.n * a.n - a.n * this.n + this.n * this.n) /
(combined.n * combined.n * combined.n);
combined.m4 += 6.0 * delta2 * (a.n * a.n * this.m2 + this.n * this.n * a.m2) / (combined.n * combined.n) +
4.0 * delta * (a.n * this.m3 - this.n * a.m3) / combined.n;
combined.min = Math.min(this.min, a.min);
combined.max = Math.max(this.max, a.max);
n = combined.n;
m1 = combined.m1;
m2 = combined.m2;
m3 = combined.m3;
m4 = combined.m4;
min = combined.min;
max = combined.max;
}
}