AlgoFitLogistic.java example

Explorer
geogebra-master
package org.geogebra.common.kernel.statistics;

/* 
 GeoGebra - Dynamic Mathematics for Everyone
 http://www.geogebra.org

 This file is part of GeoGebra.

 This program is free software; you can redistribute it and/or modify it 
 under the terms of the GNU General Public License as published by 
 the Free Software Foundation.

 */

import java.util.Iterator;
import java.util.TreeSet;

import org.geogebra.common.kernel.Construction;
import org.geogebra.common.kernel.algos.AlgoElement;
import org.geogebra.common.kernel.arithmetic.ExpressionNode;
import org.geogebra.common.kernel.arithmetic.ExpressionValue;
import org.geogebra.common.kernel.arithmetic.Function;
import org.geogebra.common.kernel.arithmetic.FunctionVariable;
import org.geogebra.common.kernel.arithmetic.MyDouble;
import org.geogebra.common.kernel.commands.Commands;
import org.geogebra.common.kernel.geos.GeoElement;
import org.geogebra.common.kernel.geos.GeoFunction;
import org.geogebra.common.kernel.geos.GeoList;
import org.geogebra.common.kernel.geos.GeoPoint;
import org.geogebra.common.plugin.Operation;
import org.geogebra.common.util.debug.Log;

/*******************
 * AlgoFitLogistic * *****************
 * 
 * @author Hans-Petter Ulven
 * @version 22.11.08 20.11: got rid off undefined parts of functions errorMsg()
 *          to call Application.debug() 22.11: Got rid of all testcode except
 *          final outcommented hook: runTest(x[],y[] 08.12: Handling negative
 *          a,b or c...(see findParameters()) Some cleaning up editing. ToDo:
 *          -Better handling of negative a,b and c. (Not sure if anyone needs
 *          that, but still...) -factor out:
 *          error=SortPointList(GeoList,GeoList) (and other GeoList/array
 *          processing) -nice to have:
 *          error=sortPointList2Array(Geolist,x[],y[]);
 * 
 *          Fits c/(1+aexp(-bx)) to a list of points. Adapted from: Nonlinear
 *          regression algorithms are well known, see:
 *          mathworld.wolfram.com/NonlinearLeastSquaresFitting.html
 *          ics.forth.gr/~lourakis/levmar Damping Parameter in Marquardt's
 *          Method, Hans Bruun Nielsen, IMM-Rep 1999-05
 * 
 *          The problem is to find the best initial values for the parameters,
 *          little information available on this problem...
 * 
 *          Bjorn Ove Thue, the norwegian translator and programmer of the
 *          norwegian version of WxMaxima, was kind enough to give me his idea:
 *          Make the assumption that the first and last point are close to the
 *          solution curve. Calculate c and a from those points, with b as
 *          parameter, iterate to a good value for b, and do the final nonlinear
 *          regression iteration with all three parameters.
 * 
 *          Constraints: <List of points> should have at least 3 points. The
 *          first and last datapoint should not be too far from the solution
 *          curve. Negative a,b and c: Asymptotes: Quality of points get even
 *          more important, should not be too close to either vertical or
 *          horisontal asymptotes, should have several "good" points on each
 *          branch of the curve. (Positive a, b and c is quite robust though.
 *          :-) ) Problems: Non-linear regression is difficult, and the choice
 *          of initial values for the parameters are highly critical. The
 *          algorithm here might converge to a local minimum. The algoritm might
 *          diverge, so after MAXITERATIONS the result will be unusable
 * 
 *          Possible future solution: Make more commands where you give both a
 *          list and suggestions for the parameters?
 */

public final class AlgoFitLogistic extends AlgoElement implements FitAlgo {

	// Tuning of noisefilter, Levenberg-Marquardt iteration, debug, rounding off
	// errors
	private final static double LMFACTORDIV = 3.0d;
	private final static double LMFACTORMULT = 2.0d;
	private final static int MAXITERATIONS = 200; // 100 probably enough,
													// nothing is usable after
													// that...
	private final static double EPSILONFIND = 1E-6d;
	private final static double EPSILONREG = 1E-14d;
	private final static double EPSSING = 1E-20d;

	// Properties
	private double a, b, c; // c/(1+a*exp(-bx))
	private double[] xd, yd; // datapoints
	private int size; // of xd and yd
	private int iterations; // LM iterations
	private boolean error = false; // general error flag

	// Flags:
	private boolean allplus, allneg; // flags for y-values, set by
										// getPoints();

	// GeoGebra obligatory:
	private GeoList geolist; // input
	private GeoFunction geofunction; // output

	/**
	 * @param cons
	 *            construction
	 * @param label
	 *            label for output
	 * @param geolist
	 *            input points
	 */
	public AlgoFitLogistic(Construction cons, String label, GeoList geolist) {
		this(cons, geolist);
		geofunction.setLabel(label);
	}// Constructor

	/**
	 * @param cons
	 *            construction
	 * @param geolist
	 *            input points
	 */
	public AlgoFitLogistic(Construction cons, GeoList geolist) {
		super(cons);
		this.geolist = geolist;
		geofunction = new GeoFunction(cons);
		setInputOutput();
		compute();
	}// Constructor

	@Override
	public Commands getClassName() {
		return Commands.FitLogistic;
	}

	@Override
	protected void setInputOutput() {
		input = new GeoElement[1];
		input[0] = geolist;
		setOnlyOutput(geofunction);
		setDependencies();
	}// setInputOutput()

	/**
	 * @return resulting function
	 */
	public GeoFunction getFitLogistic() {
		return geofunction;
	}

	@Override
	public final void compute() {
		size = geolist.size();
		error = false; // General flag
		if (!geolist.isDefined() || (size < 3)) { // Need three points, at the
													// very least...
			geofunction.setUndefined();
			Log.debug(
					"List not properly defined or too small. (3 points needed, but the more points, the better result!)");
			return;
		}

		getPoints(); // sorts the points on x while getting them!
		try {
			doReg();
		} catch (Exception all) {
			error = true;
		} // try-catch
		if (!error) {
			// a=2.0d;c=3.0d;b=0.5d;
			MyDouble A = new MyDouble(kernel, a);
			MyDouble B = new MyDouble(kernel, -b);
			MyDouble C = new MyDouble(kernel, c);
			MyDouble ONE = new MyDouble(kernel, 1.0d);
			FunctionVariable X = new FunctionVariable(kernel);
			ExpressionValue expr = new ExpressionNode(kernel, B,
					Operation.MULTIPLY, X);
			expr = new ExpressionNode(kernel, expr, Operation.EXP, null);
			expr = new ExpressionNode(kernel, A, Operation.MULTIPLY, expr);
			expr = new ExpressionNode(kernel, ONE, Operation.PLUS, expr);
			ExpressionNode node = new ExpressionNode(kernel, C,
					Operation.DIVIDE, expr);
			Function f = new Function(node, X);
			geofunction.setFunction(f);
			geofunction.setDefined(true);
		} else {
			geofunction.setUndefined();
			return;
		} // if error in regression
	}// compute()

	// / ============= IMPLEMENTATION
	// =============================================================///
	private final void doReg() {
		findParameters(); // Find initial parameters a,b,c,d
		Logistic_Reg(); // Run LM nonlinear iteration
	}// doReg()

	private final void findParameters() {
		double err, err_old;
		double lambda = 0.01d; //
		int sign = 1;
		double k = 0.001d; // debug("findParameters():\n================");
		// Remember some values to speed up later calculations:
		x1 = xd[0];
		y1 = yd[0];
		x2 = xd[size - 1];
		y2 = yd[size - 1];
		ymult = y1 * y2;
		e1 = Math.exp(x1);
		e2 = Math.exp(x2);
		emult = e1 * e2;
		ydiff = y1 - y2;

		// Handling negative a,b or c. (To avoid iteration across asymptotic
		// singularity.)
		boolean increasing; // allplus, allneg set by getpoints()
		if (y1 < y2) {
			increasing = true;
		} else {
			increasing = false;
		}

		if (allplus) { // a>0 and c>0
			if (!increasing) {
				sign = -1;
				k = -k;
			} // k<0 else k>0
		} else if (allneg) { // a>0 and c<0
			if (increasing) {
				sign = -1;
				k = -k;
			} // k<0 else k<0
		} else { // a<0: 4 cases to sort out: Last value closer to x-axis than
					// first value=>k<0
			if (Math.abs(y2) < Math.abs(y1)) {
				sign = -1;
				k = -k;
			} // if
		} // if
			// debug("increasing: "+increasing+" allpos: "+allplus+" allneg:
			// "+allneg);

		// / Iterate for best k: ///
		err_old = beta2(k);
		k = k + sign * lambda;
		err = err_old + 1; // to start off the while:
		while (Math.abs(err - err_old) > EPSILONFIND) {
			err = beta2(k);
			// negerr=beta2(xd,yd,-k);
			// if(Math.abs(negerr)<Math.abs(err)){//change to neg k
			// k=-k;sign=-1*sign;err=negerr;
			// }
			if (err < err_old) {
				lambda = lambda * 5; // going right way:-)
				err_old = err;
				err = err + 1; // to keep going...
			} else {
				k = k - sign * lambda; // go back and try again
				lambda = lambda / 5;
			} // if progress
			k += sign * lambda;
			// debug("b, error-error_old: "+k+" , "+diff);
		} // while reduction in error
			// Set params for final iteration:
		b = k; // next routine uses c,a,b...
		a = a(x1, y1, x2, y2, k);
		c = c(x1, y1, x2, y2, k);
		// debug("\nfindParameters()finished with:\n"+a+" b= "+b+" c= "+c);
		// debug("Sum sq. errors: "+beta2(xd,yd,a,b,c)+"\n-------------");
		if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)) {
			error = true;
			Log.debug("findParameters(): a,b or c undefined");
			return;
		} // 20.11:if one is undefined, everything is undefined
	}// findParameters()

	private final void Logistic_Reg() {

		double lambda = 0.0d; // LM-damping coefficient
		double multfaktor = LMFACTORMULT; // later?: divfaktor=LMFACTORDIV;
		double residual, old_residual = beta2(xd, yd, a, b, c);
		// double diff = -1.0d; //negative to start it off

		double da = EPSILONREG, db = EPSILONREG, dc = EPSILONREG; // Something
																	// larger
																	// than eps,
																	// to get
																	// started...
		double b1, b2, b3; // At*beta
		double m11, m12, m13, m21, m22, m23, m31, m32, m33, // At*A
				n; // singular check
		double x, y;
		double dfa, dfb, dfc, beta, newa, newb, newc;
		iterations = 0;
		// ****checked up to here
		// LM: optimal startlambda
		b1 = b2 = b3 = 0.0d;
		m11 = m22 = m33 = 0.0d;
		for (int i = 0; i < size; i++) {
			x = xd[i];
			y = yd[i];
			beta = beta(x, y, a, b, c);
			dfa = df_a(x, a, b, c);
			dfb = df_b(x, a, b, c);
			dfc = df_c(x, a, b);
			// b=At*beta
			b1 += beta * dfa;
			b2 += beta * dfb;
			b3 += beta * dfc;
			// m=At*A
			m11 += dfa * dfa; // only need diagonal
			m22 += dfb * dfb;
			m33 += dfc * dfc;
		} // for all datapoints

		double startfaktor = Math.max(Math.max(m11, m22), m33);
		lambda = startfaktor * 0.001; // heuristic... (Set to zero if no LM)
										// debug("Startlambda: "+lambda);

		while (Math.abs(da) + Math.abs(db) + Math.abs(dc) > EPSILONREG) {// or
																			// while(Math.abs(diff)>EPSILON)
																			// ?
			iterations++; // debug(""+iterations+" : \n---------------");
			if ((iterations > MAXITERATIONS) || (error)) { // From experience:
															// >200 gives
															// nothing more...
				Log.debug("More than " + MAXITERATIONS
						+ " iterations. Solution is probably not usable.");
				break;
			}
			b1 = b2 = b3 = 0.0d;
			m11 = m12 = m13 = m21 = m22 = m23 = m31 = m32 = m33 = 0.0d;
			for (int i = 0; i < size; i++) {
				x = xd[i];
				y = yd[i];
				beta = beta(x, y, a, b, c);
				dfa = df_a(x, a, b, c);
				dfb = df_b(x, a, b, c);
				dfc = df_c(x, a, b);
				// b=At*beta
				b1 += beta * dfa;
				b2 += beta * dfb;
				b3 += beta * dfc;
				// m=At*A
				m11 += dfa * dfa + lambda;
				m12 += dfa * dfb;
				m13 += dfa * dfc;
				m22 += dfb * dfb + lambda;
				m23 += dfb * dfc;
				m33 += dfc * dfc + lambda;
			} // for all datapoints

			// Symmetry:
			m21 = m12;
			m31 = m13;
			m32 = m23;

			n = RegressionMath.det33(m11, m12, m13, m21, m22, m23, m31, m32,
					m33);

			if (Math.abs(n) < EPSSING) { // Not singular?
				error = true;
				Log.debug("Singular matrix...");
				da = db = dc = 0.0d; // to stop it all...
			} else {
				da = RegressionMath.det33(b1, m12, m13, b2, m22, m23, b3, m32,
						m33) / n;
				db = RegressionMath.det33(m11, b1, m13, m21, b2, m23, m31, b3,
						m33) / n;
				dc = RegressionMath.det33(m11, m12, b1, m21, m22, b2, m31, m32,
						b3) / n;
				newa = a + da;
				newb = b + db;
				newc = c + dc; // remember this and update later if ok
				residual = beta2(xd, yd, newa, newb, newc);
				// diff=residual-old_residual;
				// //debug("Residual difference: "+diff+" lambda: "+lambda);

				if (residual < old_residual) { // (Set to true if no LM)
					lambda = lambda / LMFACTORDIV; // going well :-) but don't
													// overdo it...
					old_residual = residual;
					multfaktor = LMFACTORMULT; // reset this!
					a = newa;
					b = newb;
					c = newc;
				} else {
					lambda = lambda * multfaktor; // not going well :-(
					multfaktor *= 2; // LM drives hard...
				} // if going the right way

			} // if(error)-else
				// debug(""+da+"\t"+db+"\t"+dc+"\n"+a+"\t"+b+"\t"+c);
		} // while(|da|+|db|+|dc|>epsilonreg)

		// 20.11: not wanted:
		// errorMsg("AlgoFitLogistic: Sum Errors Squared= "+beta2(xd,yd,a,b,c));
		// //Info

		if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)) {
			error = true;
			Log.debug("findParameters(): a,b or c undefined");
			return;
		} // 20.11:if one is undefined, everything is undefined

	}// Logistic_Reg()

	// --- The Logistic Function and its derivates --- //

	// Variables in calcultions that tries to prevent rounding off errors:
	private double x1, y1, x2, y2, ymult, e1, e2, emult, ydiff;

	/** Logistic function f(x)=c/(1+ae^(-bx)) */
	private final static double f(double x, double a1, double b1, double c1) {
		return df_c(x, a1, b1) * c1;
	}// f(x,a,b,c)

	// Adjusted f, used in findParameters(), when a and c are calculated from
	// first and last datapoint
	// Also tries to avoid rounding off errors
	private final double f(double x, double k) { // k=b
		double e1k = Math.pow(e1, k), e2k = Math.pow(e2, k);
		double efrac = Math.pow(emult / Math.exp(x), k);
		return ymult * (e1k - e2k) / (y2 * e1k - y1 * e2k + ydiff * efrac);
	}// f(x,k)

	// df/dc=1/(1+ae^(-bx))
	private final static double df_c(double x, double a1, double b1) {
		return (1.0d / (1.0d + a1 * Math.exp(-b1 * x)));
	}// simple(x,a,b)

	// df/da
	private final static double df_a(double x, double a1, double b1,
			double c1) {
		double df_c = df_c(x, a1, b1);
		return df_c * df_c * Math.exp(-b1 * x) * (-c1);
	}// df_a(x,a,b,c)

	// df/db
	private final static double df_b(double x, double a1, double b1,
			double c1) {
		double df_c = df_c(x, a1, b1);
		return df_c * df_c * Math.exp(-b1 * x) * x * a1 * c1;
	}// df_b(x,a,b,c)

	// / --- Error calculations --- ///
	// beta = yd-f(xd,yd,a,b,c)
	private final static double beta(double x, double y, double a1, double b1,
			double c1) {
		return y - f(x, a1, b1, c1);
	}// beta(x,y,a,b,c)

	// beta = yd-f(x,b) for use in findParameters(). (a and c calculated)
	private final double beta(double x, double y, double b1) {
		return y - f(x, b1);
	}// bet(x,y,b)

	// Sum of squared errors, using last a,b and c
	private final double beta2(double[] x, double[] y, double a1, double b1,
			double c1) {
		double sum = 0.0d, beta;
		for (int i = 0; i < size; i++) {
			beta = beta(x[i], y[i], a1, b1, c1);
			sum += beta * beta;
		} // for all datapoints
			// debug("Sum Squared Errors: "+sum);
		return sum;
	}// beta2(x,y,a,b,c)

	// Sum of squared errors, using b(=k). a and c are calculated from first and
	// last datapoint.
	private final double beta2(double k1) {
		double beta = 0.0d, sum = 0.0d;
		for (int i = 0; i < size; i++) {
			beta = beta(xd[i], yd[i], k1);
			sum += beta * beta;
		} // for all data
		return sum;
	}// beta2(k)

	// / --- Bjorn Ove Thue's trick --- ///
	// c as function of first and last point
	private final static double c(double cx1, double cy1, double cx2,
			double cy2, double cb) {
		return cy1 * cy2 * (Math.exp(cb * cx1) - Math.exp(cb * cx2))
				/ (cy2 * Math.exp(cb * cx1) - cy1 * Math.exp(cb * cx2));
	}// c(x1,y1,x2,y2,k)

	/** a as function of first and last point */
	private final static double a(double ax1, double ay1, double ax2,
			double ay2, double ab) {
		return Math.exp(ab * (ax1 + ax2)) * (ay1 - ay2)
				/ (ay2 * Math.exp(ab * ax1) - ay1 * Math.exp(ab * ax2));
	}// a(x1,y1,x2,y2,b)

	private final void getPoints() {

		// problem bothering the gui: GeoList
		// newlist=k.Sort("tmp_{FitLogistic}",geolist);
		double[] xlist = null, ylist = null;
		double xy[] = new double[2];
		GeoPoint geoelement;
		// This is code duplication of AlgoSort, but for the time being:
		TreeSet<GeoPoint> sortedSet;
		sortedSet = new TreeSet<GeoPoint>(GeoPoint.getComparatorX());
		for (int i = 0; i < size; i++) {
			if (geolist.get(i) instanceof GeoPoint) {
				geoelement = (GeoPoint) geolist.get(i);
				sortedSet.add(geoelement);
			} else {
				error = true;
			} // if point
		} // for all points
		Iterator<GeoPoint> iter = sortedSet.iterator();
		int i = 0;
		allplus = true;
		allneg = true; // Need sign info in findParameters()
		xlist = new double[size];
		ylist = new double[size];
		while (iter.hasNext()) {
			geoelement = iter.next();
			geoelement.getInhomCoords(xy);
			xlist[i] = xy[0];
			ylist[i] = xy[1];
			if (ylist[i] < 0) {
				allplus = false;
			}
			if (ylist[i] > 0) {
				allneg = false;
			}
			i++;
		} // while iterating

		xd = xlist;
		yd = ylist;
		if (error) {
			Log.debug("getPoints(): Wrong list format...");
		}
	}// getPoints()

	@Override
	public double[] getCoeffs() {
		double[] ret = { a, b, c };
		return ret;
	}

}// class AlgoFitLogistic