AbstractUtility.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.igss.utility;

import com.rapidminer.operator.learner.igss.hypothesis.Hypothesis;

/** Abstract superclass for all utility functions. 
 * 
 *  @author Dirk Dach 
 *  @version $Id: AbstractUtility.java,v 1.3 2008/05/09 19:23:24 ingomierswa Exp $
 */
public abstract class AbstractUtility implements Utility{
	
	/** The prior probability of the two classes of the label. */
	protected double[] priors;
	
	/** The number of covered examples before normal approximation is used. */
	protected int large;
	
	/** Constructor for all utilities. */
	public AbstractUtility (double[] priors, int large) {
		this.priors=new double[priors.length];
		System.arraycopy(priors,0,this.priors,0,2);
		this.large=large;
	}
	
	/** Calculates the M-value needed for the GSS algorithm. */
	public double calculateM (double delta, double epsilon) {
		double i=1;
		// perfomance: start with step=10000
		while (conf(i,delta) > epsilon/2.0d) {
			i=i+10000;
		}
		if (i>1) { //i=i+10000 has been executed at least once.
			i=i-10000;
		}

		while (conf(i,delta) > (epsilon/2.0d)) {
			i++;
		}
		return Math.ceil(i);
	}
	
	/** Calculates the the unspecific confidence intervall.
	 *  Uses Chernoff bounds if the number of random experiments is too small and normal approximatione otherwise.
	 *  Considers the number of examples as the number of random experiments. problematic for g*(p-p0)) hypothesis, 
	 *  that only cover a small amount of examples. No normal approximation should be used in this case. */
	public double confidenceIntervall (double totalWeight, double delta) {
		if (totalWeight<large) {
			return confSmallM(totalWeight,delta);
		}
		else {
			return conf(totalWeight,delta);
		}
	}
	
	/** Calculates the the confidence intervall for a specific hypothesis. 
	 *  Uses Chernoff bounds if the number of random experiments is too small and normal approximation otherwise.
	 *  This method is adapted for g*(p-p0) utility types. Every example for that the rule is applicable is one random experiment. 
	 *  Should be overwritten by subclasses if they make a different random experiment.*/
	public double confidenceIntervall (double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta) {
		if (hypo.getCoveredWeight()<large) {
			return confSmallM(totalWeight,delta);
		}
		else {
			return conf(totalWeight,totalPositiveWeight,hypo,delta);
		}
	}
	
	/** Calculates the confidence intervall for small numbers of examples.*/
	public abstract double confSmallM (double totalWeight, double delta);

	/** Calculates the normal approximation of the confidence intervall. */
	public abstract double conf(double totalWeight, double delta);
	
	/** Calculates the normal approximation of the confidence intervall for a specific hypothesis.*/
	public abstract double conf(double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta);

	/** Calculates the inverse of the normal distribution, e.g.inverseNormal(0.95)==1.64. */
	public double inverseNormal(double p) {
        // Coefficients in rational approximations
        double[] a = {-3.969683028665376e+01,  2.209460984245205e+02,
                          -2.759285104469687e+02,  1.383577518672690e+02,
                          -3.066479806614716e+01,  2.506628277459239e+00};

        double[] b = {-5.447609879822406e+01,  1.615858368580409e+02,
                          -1.556989798598866e+02,  6.680131188771972e+01,
                          -1.328068155288572e+01 };

        double[] c = {-7.784894002430293e-03, -3.223964580411365e-01,
                          -2.400758277161838e+00, -2.549732539343734e+00,
                          4.374664141464968e+00,  2.938163982698783e+00};

        double[] d = {7.784695709041462e-03, 3.224671290700398e-01,
                           2.445134137142996e+00,  3.754408661907416e+00};

        // Define break-points.
        double plow  = 0.02425;
        double phigh = 1 - plow;

        // Rational approximation for lower region:
        if ( p < plow ) {
                 double q  = Math.sqrt(-2*Math.log(p));
                 return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
                                                 ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
        }

        // Rational approximation for upper region:
        if ( phigh < p ) {
                 double q  = Math.sqrt(-2*Math.log(1-p));
                 return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
                                                        ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
        }

        // Rational approximation for central region:
        double q = p - 0.5;
        double r = q*q;
        return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
                                 (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1);
	}
	
}