/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math;
import java.awt.Color;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.swing.JEditorPane;
import javax.swing.JLabel;
import com.rapidminer.gui.tools.ExtendedJScrollPane;
import com.rapidminer.gui.tools.SwingTools;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.tools.Tools;
/**
* Determines if the null hypothesis (all actual mean values are the same) holds
* for the given values. This class uses an ANalysis Of VAriances approach to
* determine probability that the null hypothesis is wrong.
*
* @author Ingo Mierswa
* @version $Id: AnovaCalculator.java,v 1.6 2008/05/09 19:23:03 ingomierswa Exp $
*/
public class AnovaCalculator {
public static class AnovaSignificanceTestResult extends SignificanceTestResult {
private static final long serialVersionUID = 9007616378489018565L;
private double sumSquaresBetween = 0.0d;
private double sumSquaresResiduals = 0.0d;
private double meanSquaresBetween = 0.0d;
private double meanSquaresResiduals = 0.0d;
private int df1 = 0;
private int df2 = 0;
private double alpha = 0.05;;
private double fValue = 0.0d;
private double prob = 0.0d;
public AnovaSignificanceTestResult(double sumSquaresBetween, double sumSquaresResiduals, int df1, int df2, double alpha) {
this.sumSquaresBetween = sumSquaresBetween;
this.sumSquaresResiduals = sumSquaresResiduals;
this.df1 = df1;
this.df2 = df2;
this.alpha = alpha;
this.meanSquaresBetween = sumSquaresBetween / df1;
this.meanSquaresResiduals = sumSquaresResiduals / df2;
this.fValue = meanSquaresBetween / meanSquaresResiduals;
FDistribution fDist = new FDistribution(df1, df2);
this.prob = fDist.getProbabilityForValue(this.fValue);
if (this.prob < 0)
this.prob = 1.0d;
else
this.prob = 1.0d - this.prob;
}
public String getName() {
return "Anova Test";
}
public String toString() {
return "ANOVA result (f=" + Tools.formatNumber(fValue) + ", prob=" + Tools.formatNumber(prob) + ", alpha=" + Tools.formatNumber(alpha) + ")";
}
public double getProbability() {
return prob;
}
/**
* Returns a label that displays the {@link #toResultString()} result
* encoded as html.
*/
public java.awt.Component getVisualizationComponent(IOContainer container) {
StringBuffer buffer = new StringBuffer();
Color bgColor = SwingTools.LIGHTEST_YELLOW;
String bgColorString = "#" + Integer.toHexString(bgColor.getRed()) + Integer.toHexString(bgColor.getGreen()) + Integer.toHexString(bgColor.getBlue());
Color headerColor = SwingTools.LIGHTEST_BLUE;
String headerColorString = "#" + Integer.toHexString(headerColor.getRed()) + Integer.toHexString(headerColor.getGreen()) + Integer.toHexString(headerColor.getBlue());
buffer.append("<table bgcolor=\""+bgColorString+"\" border=\"1\">");
buffer.append("<tr bgcolor=\""+headerColorString+"\"><th>Source</th><th>Square Sums</th><th>DF</th><th>Mean Squares</th><th>F</th><th>Prob</th></tr>");
buffer.append("<tr><td>Between</td><td>" + Tools.formatNumber(sumSquaresBetween) + "</td><td>" + df1 + "</td><td>" + Tools.formatNumber(meanSquaresBetween) + "</td><td>" + Tools.formatNumber(fValue) + "</td><td>" + Tools.formatNumber(prob) + "</td></tr>");
buffer.append("<tr><td>Residuals</td><td>" + Tools.formatNumber(sumSquaresResiduals) + "</td><td>" + df2 + "</td><td>" + Tools.formatNumber(meanSquaresResiduals) + "</td><td></td><td></td></tr>");
buffer.append("<tr><td>Total</td><td>" + Tools.formatNumber(sumSquaresBetween + sumSquaresResiduals) + "</td><td>" + (df1 + df2) + "</td><td></td><td></td><td></td></tr>");
buffer.append("</table>");
buffer.append("<br>Probability for random values with the same result: " + Tools.formatNumber(prob) + "<br>");
if (prob < alpha)
buffer.append("Difference between actual mean values is probably significant, since " + Tools.formatNumber(prob) + " < alpha = " + Tools.formatNumber(alpha) + "!");
else
buffer.append("Difference between actual mean values is probably not significant, since " + Tools.formatNumber(prob) + " > alpha = " + Tools.formatNumber(alpha) + "!");
JEditorPane textPane = new JEditorPane("text/html", "<html><h1>" + getName() + "</h1>" + buffer.toString() + "</html>");
textPane.setBackground((new JLabel()).getBackground());
textPane.setBorder(javax.swing.BorderFactory.createEmptyBorder(11, 11, 11, 11));
return new ExtendedJScrollPane(textPane);
}
}
private double alpha = 0.05;
private List<TestGroup> groups = new LinkedList<TestGroup>();
public void setAlpha(double alpha) {
this.alpha = alpha;
}
public void addGroup(TestGroup group) {
groups.add(group);
}
public void addGroup(double numberOfValues, double mean, double variance) {
addGroup(new TestGroup(numberOfValues, mean, variance));
}
public void clearGroups() {
groups.clear();
}
public SignificanceTestResult performSignificanceTest() throws SignificanceCalculationException {
if (groups.size() < 2) {
throw new SignificanceCalculationException("Cannot calculate ANOVA: not enough groups added (current number of groups: " + groups.size() + ", must be at least 2");
}
double meanOfMeans = 0.0d;
Iterator<TestGroup> i = groups.iterator();
while (i.hasNext()) {
TestGroup group = i.next();
meanOfMeans += group.getMean();
}
meanOfMeans /= groups.size();
double sumSquaresBetween = 0.0d;
i = groups.iterator();
while (i.hasNext()) {
TestGroup group = i.next();
double diff = group.getMean() - meanOfMeans;
sumSquaresBetween += group.getNumber() * (diff * diff);
}
double sumSquaresResiduals = 0.0d;
int counterSum = 0;
i = groups.iterator();
while (i.hasNext()) {
TestGroup group = i.next();
sumSquaresResiduals += (group.getNumber() - 1) * group.getVariance();
counterSum += group.getNumber();
}
return new AnovaSignificanceTestResult(sumSquaresBetween, sumSquaresResiduals, groups.size() - 1, counterSum - groups.size(), alpha);
}
}