package org.geogebra.common.kernel.statistics; /* GeoGebra - Dynamic Mathematics for Everyone http://www.geogebra.org This file is part of GeoGebra. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation. */ import java.util.Iterator; import java.util.TreeSet; import org.geogebra.common.kernel.Construction; import org.geogebra.common.kernel.algos.AlgoElement; import org.geogebra.common.kernel.arithmetic.ExpressionNode; import org.geogebra.common.kernel.arithmetic.ExpressionValue; import org.geogebra.common.kernel.arithmetic.Function; import org.geogebra.common.kernel.arithmetic.FunctionVariable; import org.geogebra.common.kernel.arithmetic.MyDouble; import org.geogebra.common.kernel.commands.Commands; import org.geogebra.common.kernel.geos.GeoElement; import org.geogebra.common.kernel.geos.GeoFunction; import org.geogebra.common.kernel.geos.GeoList; import org.geogebra.common.kernel.geos.GeoPoint; import org.geogebra.common.plugin.Operation; import org.geogebra.common.util.debug.Log; /******************* * AlgoFitLogistic * ***************** * * @author Hans-Petter Ulven * @version 22.11.08 20.11: got rid off undefined parts of functions errorMsg() * to call Application.debug() 22.11: Got rid of all testcode except * final outcommented hook: runTest(x[],y[] 08.12: Handling negative * a,b or c...(see findParameters()) Some cleaning up editing. ToDo: * -Better handling of negative a,b and c. (Not sure if anyone needs * that, but still...) -factor out: * error=SortPointList(GeoList,GeoList) (and other GeoList/array * processing) -nice to have: * error=sortPointList2Array(Geolist,x[],y[]); * * Fits c/(1+aexp(-bx)) to a list of points. Adapted from: Nonlinear * regression algorithms are well known, see: * mathworld.wolfram.com/NonlinearLeastSquaresFitting.html * ics.forth.gr/~lourakis/levmar Damping Parameter in Marquardt's * Method, Hans Bruun Nielsen, IMM-Rep 1999-05 * * The problem is to find the best initial values for the parameters, * little information available on this problem... * * Bjorn Ove Thue, the norwegian translator and programmer of the * norwegian version of WxMaxima, was kind enough to give me his idea: * Make the assumption that the first and last point are close to the * solution curve. Calculate c and a from those points, with b as * parameter, iterate to a good value for b, and do the final nonlinear * regression iteration with all three parameters. * * Constraints: <List of points> should have at least 3 points. The * first and last datapoint should not be too far from the solution * curve. Negative a,b and c: Asymptotes: Quality of points get even * more important, should not be too close to either vertical or * horisontal asymptotes, should have several "good" points on each * branch of the curve. (Positive a, b and c is quite robust though. * :-) ) Problems: Non-linear regression is difficult, and the choice * of initial values for the parameters are highly critical. The * algorithm here might converge to a local minimum. The algoritm might * diverge, so after MAXITERATIONS the result will be unusable * * Possible future solution: Make more commands where you give both a * list and suggestions for the parameters? */ public final class AlgoFitLogistic extends AlgoElement implements FitAlgo { // Tuning of noisefilter, Levenberg-Marquardt iteration, debug, rounding off // errors private final static double LMFACTORDIV = 3.0d; private final static double LMFACTORMULT = 2.0d; private final static int MAXITERATIONS = 200; // 100 probably enough, // nothing is usable after // that... private final static double EPSILONFIND = 1E-6d; private final static double EPSILONREG = 1E-14d; private final static double EPSSING = 1E-20d; // Properties private double a, b, c; // c/(1+a*exp(-bx)) private double[] xd, yd; // datapoints private int size; // of xd and yd private int iterations; // LM iterations private boolean error = false; // general error flag // Flags: private boolean allplus, allneg; // flags for y-values, set by // getPoints(); // GeoGebra obligatory: private GeoList geolist; // input private GeoFunction geofunction; // output /** * @param cons * construction * @param label * label for output * @param geolist * input points */ public AlgoFitLogistic(Construction cons, String label, GeoList geolist) { this(cons, geolist); geofunction.setLabel(label); }// Constructor /** * @param cons * construction * @param geolist * input points */ public AlgoFitLogistic(Construction cons, GeoList geolist) { super(cons); this.geolist = geolist; geofunction = new GeoFunction(cons); setInputOutput(); compute(); }// Constructor @Override public Commands getClassName() { return Commands.FitLogistic; } @Override protected void setInputOutput() { input = new GeoElement[1]; input[0] = geolist; setOnlyOutput(geofunction); setDependencies(); }// setInputOutput() /** * @return resulting function */ public GeoFunction getFitLogistic() { return geofunction; } @Override public final void compute() { size = geolist.size(); error = false; // General flag if (!geolist.isDefined() || (size < 3)) { // Need three points, at the // very least... geofunction.setUndefined(); Log.debug( "List not properly defined or too small. (3 points needed, but the more points, the better result!)"); return; } getPoints(); // sorts the points on x while getting them! try { doReg(); } catch (Exception all) { error = true; } // try-catch if (!error) { // a=2.0d;c=3.0d;b=0.5d; MyDouble A = new MyDouble(kernel, a); MyDouble B = new MyDouble(kernel, -b); MyDouble C = new MyDouble(kernel, c); MyDouble ONE = new MyDouble(kernel, 1.0d); FunctionVariable X = new FunctionVariable(kernel); ExpressionValue expr = new ExpressionNode(kernel, B, Operation.MULTIPLY, X); expr = new ExpressionNode(kernel, expr, Operation.EXP, null); expr = new ExpressionNode(kernel, A, Operation.MULTIPLY, expr); expr = new ExpressionNode(kernel, ONE, Operation.PLUS, expr); ExpressionNode node = new ExpressionNode(kernel, C, Operation.DIVIDE, expr); Function f = new Function(node, X); geofunction.setFunction(f); geofunction.setDefined(true); } else { geofunction.setUndefined(); return; } // if error in regression }// compute() // / ============= IMPLEMENTATION // =============================================================/// private final void doReg() { findParameters(); // Find initial parameters a,b,c,d Logistic_Reg(); // Run LM nonlinear iteration }// doReg() private final void findParameters() { double err, err_old; double lambda = 0.01d; // int sign = 1; double k = 0.001d; // debug("findParameters():\n================"); // Remember some values to speed up later calculations: x1 = xd[0]; y1 = yd[0]; x2 = xd[size - 1]; y2 = yd[size - 1]; ymult = y1 * y2; e1 = Math.exp(x1); e2 = Math.exp(x2); emult = e1 * e2; ydiff = y1 - y2; // Handling negative a,b or c. (To avoid iteration across asymptotic // singularity.) boolean increasing; // allplus, allneg set by getpoints() if (y1 < y2) { increasing = true; } else { increasing = false; } if (allplus) { // a>0 and c>0 if (!increasing) { sign = -1; k = -k; } // k<0 else k>0 } else if (allneg) { // a>0 and c<0 if (increasing) { sign = -1; k = -k; } // k<0 else k<0 } else { // a<0: 4 cases to sort out: Last value closer to x-axis than // first value=>k<0 if (Math.abs(y2) < Math.abs(y1)) { sign = -1; k = -k; } // if } // if // debug("increasing: "+increasing+" allpos: "+allplus+" allneg: // "+allneg); // / Iterate for best k: /// err_old = beta2(k); k = k + sign * lambda; err = err_old + 1; // to start off the while: while (Math.abs(err - err_old) > EPSILONFIND) { err = beta2(k); // negerr=beta2(xd,yd,-k); // if(Math.abs(negerr)<Math.abs(err)){//change to neg k // k=-k;sign=-1*sign;err=negerr; // } if (err < err_old) { lambda = lambda * 5; // going right way:-) err_old = err; err = err + 1; // to keep going... } else { k = k - sign * lambda; // go back and try again lambda = lambda / 5; } // if progress k += sign * lambda; // debug("b, error-error_old: "+k+" , "+diff); } // while reduction in error // Set params for final iteration: b = k; // next routine uses c,a,b... a = a(x1, y1, x2, y2, k); c = c(x1, y1, x2, y2, k); // debug("\nfindParameters()finished with:\n"+a+" b= "+b+" c= "+c); // debug("Sum sq. errors: "+beta2(xd,yd,a,b,c)+"\n-------------"); if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)) { error = true; Log.debug("findParameters(): a,b or c undefined"); return; } // 20.11:if one is undefined, everything is undefined }// findParameters() private final void Logistic_Reg() { double lambda = 0.0d; // LM-damping coefficient double multfaktor = LMFACTORMULT; // later?: divfaktor=LMFACTORDIV; double residual, old_residual = beta2(xd, yd, a, b, c); // double diff = -1.0d; //negative to start it off double da = EPSILONREG, db = EPSILONREG, dc = EPSILONREG; // Something // larger // than eps, // to get // started... double b1, b2, b3; // At*beta double m11, m12, m13, m21, m22, m23, m31, m32, m33, // At*A n; // singular check double x, y; double dfa, dfb, dfc, beta, newa, newb, newc; iterations = 0; // ****checked up to here // LM: optimal startlambda b1 = b2 = b3 = 0.0d; m11 = m22 = m33 = 0.0d; for (int i = 0; i < size; i++) { x = xd[i]; y = yd[i]; beta = beta(x, y, a, b, c); dfa = df_a(x, a, b, c); dfb = df_b(x, a, b, c); dfc = df_c(x, a, b); // b=At*beta b1 += beta * dfa; b2 += beta * dfb; b3 += beta * dfc; // m=At*A m11 += dfa * dfa; // only need diagonal m22 += dfb * dfb; m33 += dfc * dfc; } // for all datapoints double startfaktor = Math.max(Math.max(m11, m22), m33); lambda = startfaktor * 0.001; // heuristic... (Set to zero if no LM) // debug("Startlambda: "+lambda); while (Math.abs(da) + Math.abs(db) + Math.abs(dc) > EPSILONREG) {// or // while(Math.abs(diff)>EPSILON) // ? iterations++; // debug(""+iterations+" : \n---------------"); if ((iterations > MAXITERATIONS) || (error)) { // From experience: // >200 gives // nothing more... Log.debug("More than " + MAXITERATIONS + " iterations. Solution is probably not usable."); break; } b1 = b2 = b3 = 0.0d; m11 = m12 = m13 = m21 = m22 = m23 = m31 = m32 = m33 = 0.0d; for (int i = 0; i < size; i++) { x = xd[i]; y = yd[i]; beta = beta(x, y, a, b, c); dfa = df_a(x, a, b, c); dfb = df_b(x, a, b, c); dfc = df_c(x, a, b); // b=At*beta b1 += beta * dfa; b2 += beta * dfb; b3 += beta * dfc; // m=At*A m11 += dfa * dfa + lambda; m12 += dfa * dfb; m13 += dfa * dfc; m22 += dfb * dfb + lambda; m23 += dfb * dfc; m33 += dfc * dfc + lambda; } // for all datapoints // Symmetry: m21 = m12; m31 = m13; m32 = m23; n = RegressionMath.det33(m11, m12, m13, m21, m22, m23, m31, m32, m33); if (Math.abs(n) < EPSSING) { // Not singular? error = true; Log.debug("Singular matrix..."); da = db = dc = 0.0d; // to stop it all... } else { da = RegressionMath.det33(b1, m12, m13, b2, m22, m23, b3, m32, m33) / n; db = RegressionMath.det33(m11, b1, m13, m21, b2, m23, m31, b3, m33) / n; dc = RegressionMath.det33(m11, m12, b1, m21, m22, b2, m31, m32, b3) / n; newa = a + da; newb = b + db; newc = c + dc; // remember this and update later if ok residual = beta2(xd, yd, newa, newb, newc); // diff=residual-old_residual; // //debug("Residual difference: "+diff+" lambda: "+lambda); if (residual < old_residual) { // (Set to true if no LM) lambda = lambda / LMFACTORDIV; // going well :-) but don't // overdo it... old_residual = residual; multfaktor = LMFACTORMULT; // reset this! a = newa; b = newb; c = newc; } else { lambda = lambda * multfaktor; // not going well :-( multfaktor *= 2; // LM drives hard... } // if going the right way } // if(error)-else // debug(""+da+"\t"+db+"\t"+dc+"\n"+a+"\t"+b+"\t"+c); } // while(|da|+|db|+|dc|>epsilonreg) // 20.11: not wanted: // errorMsg("AlgoFitLogistic: Sum Errors Squared= "+beta2(xd,yd,a,b,c)); // //Info if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)) { error = true; Log.debug("findParameters(): a,b or c undefined"); return; } // 20.11:if one is undefined, everything is undefined }// Logistic_Reg() // --- The Logistic Function and its derivates --- // // Variables in calcultions that tries to prevent rounding off errors: private double x1, y1, x2, y2, ymult, e1, e2, emult, ydiff; /** Logistic function f(x)=c/(1+ae^(-bx)) */ private final static double f(double x, double a1, double b1, double c1) { return df_c(x, a1, b1) * c1; }// f(x,a,b,c) // Adjusted f, used in findParameters(), when a and c are calculated from // first and last datapoint // Also tries to avoid rounding off errors private final double f(double x, double k) { // k=b double e1k = Math.pow(e1, k), e2k = Math.pow(e2, k); double efrac = Math.pow(emult / Math.exp(x), k); return ymult * (e1k - e2k) / (y2 * e1k - y1 * e2k + ydiff * efrac); }// f(x,k) // df/dc=1/(1+ae^(-bx)) private final static double df_c(double x, double a1, double b1) { return (1.0d / (1.0d + a1 * Math.exp(-b1 * x))); }// simple(x,a,b) // df/da private final static double df_a(double x, double a1, double b1, double c1) { double df_c = df_c(x, a1, b1); return df_c * df_c * Math.exp(-b1 * x) * (-c1); }// df_a(x,a,b,c) // df/db private final static double df_b(double x, double a1, double b1, double c1) { double df_c = df_c(x, a1, b1); return df_c * df_c * Math.exp(-b1 * x) * x * a1 * c1; }// df_b(x,a,b,c) // / --- Error calculations --- /// // beta = yd-f(xd,yd,a,b,c) private final static double beta(double x, double y, double a1, double b1, double c1) { return y - f(x, a1, b1, c1); }// beta(x,y,a,b,c) // beta = yd-f(x,b) for use in findParameters(). (a and c calculated) private final double beta(double x, double y, double b1) { return y - f(x, b1); }// bet(x,y,b) // Sum of squared errors, using last a,b and c private final double beta2(double[] x, double[] y, double a1, double b1, double c1) { double sum = 0.0d, beta; for (int i = 0; i < size; i++) { beta = beta(x[i], y[i], a1, b1, c1); sum += beta * beta; } // for all datapoints // debug("Sum Squared Errors: "+sum); return sum; }// beta2(x,y,a,b,c) // Sum of squared errors, using b(=k). a and c are calculated from first and // last datapoint. private final double beta2(double k1) { double beta = 0.0d, sum = 0.0d; for (int i = 0; i < size; i++) { beta = beta(xd[i], yd[i], k1); sum += beta * beta; } // for all data return sum; }// beta2(k) // / --- Bjorn Ove Thue's trick --- /// // c as function of first and last point private final static double c(double cx1, double cy1, double cx2, double cy2, double cb) { return cy1 * cy2 * (Math.exp(cb * cx1) - Math.exp(cb * cx2)) / (cy2 * Math.exp(cb * cx1) - cy1 * Math.exp(cb * cx2)); }// c(x1,y1,x2,y2,k) /** a as function of first and last point */ private final static double a(double ax1, double ay1, double ax2, double ay2, double ab) { return Math.exp(ab * (ax1 + ax2)) * (ay1 - ay2) / (ay2 * Math.exp(ab * ax1) - ay1 * Math.exp(ab * ax2)); }// a(x1,y1,x2,y2,b) private final void getPoints() { // problem bothering the gui: GeoList // newlist=k.Sort("tmp_{FitLogistic}",geolist); double[] xlist = null, ylist = null; double xy[] = new double[2]; GeoPoint geoelement; // This is code duplication of AlgoSort, but for the time being: TreeSet<GeoPoint> sortedSet; sortedSet = new TreeSet<GeoPoint>(GeoPoint.getComparatorX()); for (int i = 0; i < size; i++) { if (geolist.get(i) instanceof GeoPoint) { geoelement = (GeoPoint) geolist.get(i); sortedSet.add(geoelement); } else { error = true; } // if point } // for all points Iterator<GeoPoint> iter = sortedSet.iterator(); int i = 0; allplus = true; allneg = true; // Need sign info in findParameters() xlist = new double[size]; ylist = new double[size]; while (iter.hasNext()) { geoelement = iter.next(); geoelement.getInhomCoords(xy); xlist[i] = xy[0]; ylist[i] = xy[1]; if (ylist[i] < 0) { allplus = false; } if (ylist[i] > 0) { allneg = false; } i++; } // while iterating xd = xlist; yd = ylist; if (error) { Log.debug("getPoints(): Wrong list format..."); } }// getPoints() @Override public double[] getCoeffs() { double[] ret = { a, b, c }; return ret; } }// class AlgoFitLogistic