AbstractUtility.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.igss.utility;

import com.rapidminer.operator.learner.igss.hypothesis.Hypothesis;


/**
 * Abstract superclass for all utility functions.
 * 
 * @author Dirk Dach
 */
public abstract class AbstractUtility implements Utility {

	/** The prior probability of the two classes of the label. */
	protected double[] priors;

	/** The number of covered examples before normal approximation is used. */
	protected int large;

	/** Constructor for all utilities. */
	public AbstractUtility(double[] priors, int large) {
		this.priors = new double[priors.length];
		System.arraycopy(priors, 0, this.priors, 0, 2);
		this.large = large;
	}

	/** Calculates the M-value needed for the GSS algorithm. */
	@Override
	public double calculateM(double delta, double epsilon) {
		double i = 1;
		// perfomance: start with step=10000
		while (conf(i, delta) > epsilon / 2.0d) {
			i = i + 10000;
		}
		if (i > 1) { // i=i+10000 has been executed at least once.
			i = i - 10000;
		}

		while (conf(i, delta) > (epsilon / 2.0d)) {
			i++;
		}
		return Math.ceil(i);
	}

	/**
	 * Calculates the the unspecific confidence intervall. Uses Chernoff bounds if the number of
	 * random experiments is too small and normal approximatione otherwise. Considers the number of
	 * examples as the number of random experiments. problematic for g*(p-p0)) hypothesis, that only
	 * cover a small amount of examples. No normal approximation should be used in this case.
	 */
	@Override
	public double confidenceIntervall(double totalWeight, double delta) {
		if (totalWeight < large) {
			return confSmallM(totalWeight, delta);
		} else {
			return conf(totalWeight, delta);
		}
	}

	/**
	 * Calculates the the confidence intervall for a specific hypothesis. Uses Chernoff bounds if
	 * the number of random experiments is too small and normal approximation otherwise. This method
	 * is adapted for g*(p-p0) utility types. Every example for that the rule is applicable is one
	 * random experiment. Should be overwritten by subclasses if they make a different random
	 * experiment.
	 */
	@Override
	public double confidenceIntervall(double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta) {
		if (hypo.getCoveredWeight() < large) {
			return confSmallM(totalWeight, delta);
		} else {
			return conf(totalWeight, totalPositiveWeight, hypo, delta);
		}
	}

	/** Calculates the confidence intervall for small numbers of examples. */
	public abstract double confSmallM(double totalWeight, double delta);

	/** Calculates the normal approximation of the confidence intervall. */
	public abstract double conf(double totalWeight, double delta);

	/** Calculates the normal approximation of the confidence intervall for a specific hypothesis. */
	public abstract double conf(double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta);

	/** Calculates the inverse of the normal distribution, e.g.inverseNormal(0.95)==1.64. */
	public double inverseNormal(double p) {
		// Coefficients in rational approximations
		double[] a = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02,
				-3.066479806614716e+01, 2.506628277459239e+00 };

		double[] b = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01,
				-1.328068155288572e+01 };

		double[] c = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00,
				4.374664141464968e+00, 2.938163982698783e+00 };

		double[] d = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 };

		// Define break-points.
		double plow = 0.02425;
		double phigh = 1 - plow;

		// Rational approximation for lower region:
		if (p < plow) {
			double q = Math.sqrt(-2 * Math.log(p));
			return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
					/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1);
		}

		// Rational approximation for upper region:
		if (phigh < p) {
			double q = Math.sqrt(-2 * Math.log(1 - p));
			return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
					/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1);
		}

		// Rational approximation for central region:
		double q = p - 0.5;
		double r = q * q;
		return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
				/ (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1);
	}

}