AlgoFitSin.java example

Explorer
geogebra-master
package org.geogebra.common.kernel.statistics;

/* 
 GeoGebra - Dynamic Mathematics for Everyone
 http://www.geogebra.org

 This file is part of GeoGebra.

 This program is free software; you can redistribute it and/or modify it 
 under the terms of the GNU General Public License as published by 
 the Free Software Foundation.

 */

import java.util.Iterator;
import java.util.TreeSet;

import org.geogebra.common.kernel.Construction;
import org.geogebra.common.kernel.algos.AlgoElement;
import org.geogebra.common.kernel.arithmetic.ExpressionNode;
import org.geogebra.common.kernel.arithmetic.ExpressionValue;
import org.geogebra.common.kernel.arithmetic.Function;
import org.geogebra.common.kernel.arithmetic.FunctionVariable;
import org.geogebra.common.kernel.arithmetic.MyDouble;
import org.geogebra.common.kernel.commands.Commands;
import org.geogebra.common.kernel.geos.GeoElement;
import org.geogebra.common.kernel.geos.GeoFunction;
import org.geogebra.common.kernel.geos.GeoList;
import org.geogebra.common.kernel.geos.GeoPoint;
import org.geogebra.common.plugin.Operation;
import org.geogebra.common.util.debug.Log;

/**************
 * AlgoFitSin * ************
 * 
 * @author Hans-Petter Ulven
 * @version 22.11.08 (november) 17.11: sorting list, noisekiller (nearmaxmin()
 *          ), 19.11: some more testing, small fixes 20.11:
 *          errorMsg->Application.debug 22.11: got rid of all testcode, except
 *          out-commented hook; runTest(x[],y[]) ?.12: Step II and III in
 *          algorithm changed, using Discrete Fourier Transform instead 17.01:
 *          Changed to minimum 4 datapoints to give fewer undefined, even if
 *          this might not be very useful 14.02.09: Small bug in linje 256.
 *          Undefined if many iterations 04.05.11: Bug/weakness in handling more
 *          than one period Fits a+b*sin(c*x+d) to a list of points. Adapted
 *          from: Nonlinear regression algorithms are well known, see:
 *          mathworld.wolfram.com/NonlinearLeastSquaresFitting.html
 *          ics.forth.gr/~lourakis/levmar Damping Parameter in Marquardt's
 *          Method, Hans Bruun Nielsen, IMM-Rep 1999-05 The problem is more to
 *          find best initial values for parameters, and here I was on my own,
 *          little information available on this problem... Experiments showed
 *          me: c and d are most critical If a,b and c are good, d is not that
 *          critical If c and d are good, a and b are not that critical The
 *          Levenberg-Marquardt method makes c and d significantly less
 *          critical. This led me to this algorithm: I a=average of y-data
 *          b=(maxy-miny)/2 II period=2*|x_first_max - x_first_min| c=2pi/period
 *          The first two extremums are found by my
 *          "direction-changing-algorithm" (Ulven nov-08), using a flank of
 *          three points as an indicator of monotonous increasing or decreasing
 *          function. (Same effort as finding a local max as y1<y2>y3, but
 *          effectively equivalent to using 5 points: y1<y2<y3>y4>y5) III simple
 *          iteration of d in <-pi,pi> to find a good d (Critical if c is a bit
 *          off, so better than pi/2-c*xmax) IV Simplified Levenberg-Marquardt
 *          method. (Could be optimized if/when I am able to understand the
 *          mathematics behind it and be able to check if this is of any value.)
 *          (Perhaps Donald Knuth could have done all this in 50 lines, but this
 *          is the best I can do...) Constraints: <List of points> should have
 *          at least 5 points. There should also be three points on the row
 *          steadily increasing or decreasing(y1<=y2<=y3 or y1>=y2>=y3)on each
 *          side/flank of the first extremums. The points should cover at least
 *          two extremums of the function. The two first extremums should not be
 *          too far from the extremums of the solution curve. If more than one
 *          period, there should, at the very least, be more than 6 points in
 *          each period. If any of these demands are not satisfied, the solution
 *          curve might be unusable. Problems: Non-linear regression is
 *          difficult, and the choice of initial values for the parameters are
 *          highly critical. The algorithm here might converge to a local
 *          minimum. It might also diverge, so MAX_ITERATIONS is set to 200.
 *          (Experience suggests that more than 100 iterations give useless
 *          results.) ToDo: Use Discrete Fourier Transform instead of step II
 *          and III. Experiments show that this adds surprisingly little to the
 *          robustness of my simple algorithm, so this will not be done unless
 *          user feedback indicates a need for more sophistication.
 */

public class AlgoFitSin extends AlgoElement implements FitAlgo {

	// Tuning of noisefilter, Levenberg-Marquardt iteration, debug, rounding off
	// errors
	private final static double NOISEKILLER = 0.2D; // Kill local extremums
													// inside a+/-noisekiller*b;
													// 0.2 seems to be a good
													// value?
	private final static double LMFACTORDIV = 3.0d;
	private final static double LMFACTORMULT = 2.0d;
	private final static int MAXITERATIONS = 200;
	private final static double EPSILON = 1E-14d;
	private final static double EPSSING = 1E-20d;
	private final static double PI = Math.PI;
	private final static double TWO_PI = PI * 2;

	// Properties
	private double a, b, c, d; // a+bsin(cx+d)
	private double[] xd, yd; // datapoints
	private int size; // data arrays
	private int iterations; // LM iterations
	private boolean error = false; // General catch-all

	// / --- GeoGebra obligatory: --- ///

	private GeoList geolist; // input
	private GeoFunction geofunction; // output

	/** Implements AlgoElement */
	public AlgoFitSin(Construction cons, String label, GeoList geolist) {
		this(cons, geolist);
		geofunction.setLabel(label);
	}// Constructor

	public AlgoFitSin(Construction cons, GeoList geolist) {
		super(cons);
		this.geolist = geolist;
		geofunction = new GeoFunction(cons);
		setInputOutput();
		compute();
	}// Constructor

	/** Implements AlgoElement */
	@Override
	public Commands getClassName() {
		return Commands.FitSin;
	}

	/** Implements AlgoElement */
	@Override
	protected void setInputOutput() {
		input = new GeoElement[1];
		input[0] = geolist;
		setOnlyOutput(geofunction);
		setDependencies();
	}// setInputOutput()

	/** Implements AlgoElement */
	public GeoFunction getFitSin() {
		return geofunction;
	}

	/** Implements AlgoElement */
	@Override
	public final void compute() {
		size = geolist.size();
		error = false; // General flag
		if (!geolist.isDefined() || (size < 4)) { // Direction-algo needs two
													// flanks, 3 in each.
			geofunction.setUndefined();
			Log.debug(
					"List not properly defined or too small (4 points needed).");
			return;
		}
		// if error in parameters :
		try {
			getPoints(); // Sorts the points while getting them
			doReg();
		} catch (Exception all) {
			error = true;
		} // try-catch
		if (!error) { // Make function
			MyDouble A = new MyDouble(kernel, a);
			MyDouble B = new MyDouble(kernel, b);
			MyDouble C = new MyDouble(kernel, c);
			MyDouble D = new MyDouble(kernel, d);
			FunctionVariable X = new FunctionVariable(kernel);
			ExpressionValue expr = new ExpressionNode(kernel, C,
					Operation.MULTIPLY, X);
			expr = new ExpressionNode(kernel, expr, Operation.PLUS, D);
			expr = new ExpressionNode(kernel, expr, Operation.SIN, null);
			expr = new ExpressionNode(kernel, B, Operation.MULTIPLY, expr);

			ExpressionNode node = new ExpressionNode(kernel, A, Operation.PLUS,
					expr);
			Function f = new Function(node, X);
			geofunction.setFunction(f);
			geofunction.setDefined(true);
		} else {
			geofunction.setUndefined();
			return;
		} // if error in regression
	}// compute()

	// / ============= IMPLEMENTATION
	// =============================================================///
	/** Does the math part of the regression */
	public final void doReg() {
		findParameters(); // Find initial parameters a,b,c,d
		sinus_Reg(); // Run LM nonlinear iteration
	}// doReg()

	/** Tries to find good initial values for a,b,c,d */
	public final void findParameters() {
		double y;
		double min_max_distance; // Distance between x-values of found(?)
									// extrema
		int numberofhalfperiods = 1; // Between the extrema
		int xmax_abs = 0, xmin_abs = 0; // Update in case changes=0 later
										// (few-data-case)
		size = xd.length;
		double sum = 0.0d, max = -Double.MAX_VALUE, min = Double.MAX_VALUE;
		// Find a and b:
		for (int i = 0; i < size; i++) {
			y = yd[i];
			sum += y;
			if (y > max) {
				max = y;
				xmax_abs = i;
			}
			if (y < min) {
				min = y;
				xmin_abs = i;
			}
		} // for
		a = sum / size;
		b = (max - min) / 2.0d; // System.out.println("a= "+a+" b= "+b+" max=
								// "+max+" min= "+min);

		// Find c:
		// This time first and second local max/min, between rise and fall and
		// vv
		// Last y in a rise or decrease *is* the local extremum!
		int xmax = xmax_abs, xmin = xmin_abs; // Keep absolute xmax/xmin in case
												// changes=0 or 1 later
		int state = 0; // undecided so far...
		int current = 0;
		int changes = 0; // undecided so far...

		for (int i = 2; i < size; i++) {
			y = yd[i];
			current = direction(yd[i - 2], yd[i - 1], y);
			// current=direction5(yd[i-4],yd[i-3],yd[i-2],yd[i-1],yd[i]);
			if ((current == 1) || (current == -1)) { // Steady up or steady down
				// do state bookkeeping
				if (state == 0) { // just started:
					state = current; // set first state
				} else { // we are on our way...
					if (current != state) { // Update
											// eventual
											// change
						if (nearmaxmin(a, b, state, current, max, min)) {// Kill
																			// noise
							changes++;
							state = current;
						} // if near
					} // if change
				} // if steady up or down

				// Two changes enough. (Must check before updating extremums.)
				if (changes >= 2) { // debug("Two changes on "+i);
					// go on counting changes!break;
				} else { // if changes>=2

					// Update extremums so far
					if (current == 1) { // Steady up
						max = y;
						xmax = i; // Last is max so far
					} else if (current == -1) { // Steady down
						min = y;
						xmin = i; // Last is min so far
					} // update extremums
				} // if changes<2
			} else { // Not steady, nothing to do...

			} // if steady up or down
				// debug("i: "+i);
				// debug("state: "+state+" current: "+current+" changes:
				// "+changes+" max: "+max+" min: "+min+" xmax: "+xmax+" xmin:
				// "+xmin);
		} // for all data

		// 09.12: Checking half-period:
		min_max_distance = Math.abs(xd[xmax] - xd[xmin]);
		if (changes <= 1) { // Did not succeed, abs extrema probably best
			xmin = xmin_abs;
			xmax = xmax_abs;
			min_max_distance = Math.abs(xd[xmin] - xd[xmax]); // Update for
																// final c
																// further down
			// min_max_distance might be 1,3,5,... halfperiods...find the one
			// that gives the least sse
			numberofhalfperiods = findNumberOfHalfPeriods(size / 4, xmin, xmax); // At
																					// least
																					// 6
																					// (14.02.09:4)
																					// points
																					// in
																					// a
																					// period,
																					// hopefully
		} // if too few extrema
		c = PI * numberofhalfperiods / min_max_distance; // debug("Final c:
															// "+c);
		// System.out.println("c="+c); //**************
		double c2 = 2 * Math.PI / ((xd[size - 1] - xd[0]) * 2 / changes);
		// System.out.println("or..."+c2);
		// System.out.println("changes: "+changes);
		if (changes > 2)
		 {
			c = (c + c2) / 2; // compromise?
		// System.out.println("final c: "+c);
		}

		// Find d
		// (d=0 might go well, but to be on the safe side...100 iterations
		// should be enough?
		// Could also use pi/2=c*xmax+d, but iteration is more robust in bad
		// cases.
		// If a,b and c are a bit off, d should be good!
		d = -Math.PI;
		double deltad = Math.PI * 2 * 0.01;
		double err = 0.0d;
		double bestd = 0.0d;
		double old_err = beta(xd, yd, a, b, c, d);
		for (int i = 0; i < 100; i++) {
			d += deltad;
			err = beta(xd, yd, a, b, c, d); // Without squaring is ok...
			if (err < old_err) {
				old_err = err;
				bestd = d; // System.out.println("d-iteration: error= "+error+"
							// d: "+d);
			} // if new min
		} // for: d-iteration
		d = bestd; // System.out.println("old routine gave d= "+d);
					// debug("Parameters: a= "+a+" b= "+b+" c= "+c+" d "+d);

	}// findParameters()

	/** Doing LM iteration */
	public final void sinus_Reg() {
		double lambda = 0.0d; // LM-damping coefficient
		double multfaktor = LMFACTORMULT; // later?: divfaktor=LMFACTORDIV;
		double residual, old_residual = beta2(xd, yd, a, b, c, d);

		double da = EPSILON, db = EPSILON, dc = EPSILON, dd = EPSILON; // Something
																		// larger
																		// than
																		// eps,
																		// to
																		// get
																		// started...
		double b1, b2, b3, b4; // At*beta
		double m11, m12, m13, m14, m21, m22, m23, m24, m31, m32, m33, m34, m41,
				m42, m43, m44, // At*A
				n; // singular check
		double x, y;
		double dfa, dfb, dfc, dfd, beta, newa, newb, newc, newd;

		iterations = 0;
		// LM: Optimal startlambda
		b1 = b2 = b3 = b4 = 0.0d;
		m11 = m22 = m33 = m44 = 0.0d;
		for (int i = 0; i < size; i++) {
			x = xd[i];
			y = yd[i];
			beta = beta(x, y, a, b, c, d);
			dfa = df_a();
			dfb = df_b(x, c, d);
			dfc = df_c(x, b, c, d);
			dfd = df_d(x, b, c, d);
			// b=At*beta
			b1 += beta * dfa;
			b2 += beta * dfb;
			b3 += beta * dfc;
			b4 += beta * dfd;
			// m=At*A
			m11 += dfa * dfa; // only need diagonal
			m22 += dfb * dfb;
			m33 += dfc * dfc;
			m44 += dfd * dfd;
		} // for all datapoints
		double startfaktor = Math.max(Math.max(Math.max(m11, m22), m33), m44);
		lambda = startfaktor * 0.001; // Heuristic, suggested by several
										// articles
										// debug("Startlambda: "+lambda);
		while (Math.abs(da) + Math.abs(db) + Math.abs(dc)
				+ Math.abs(dd) > EPSILON) {

			iterations++; // debug(""+iterations+" : ");
			if ((iterations > MAXITERATIONS) || (error)) { // From experience:
															// >100 gives
															// unusable result
				Log.debug("More than " + MAXITERATIONS + " iterations...");
				error = true; // 14.02.09: No use=>undefined!
				break;
			} // if diverging
			b1 = b2 = b3 = b4 = 0.0d;
			m11 = m12 = m13 = m14 = m21 = m22 = m23 = m24 = m31 = m32 = m33 = m34 = m41 = m42 = m43 = m44 = 0.0d;
			for (int i = 0; i < size; i++) { // for all datapoints
				x = xd[i];
				y = yd[i];
				beta = beta(x, y, a, b, c, d);
				dfa = df_a();
				dfb = df_b(x, c, d);
				dfc = df_c(x, b, c, d);
				dfd = df_d(x, b, c, d);
				// b=At*beta
				b1 += beta * dfa;
				b2 += beta * dfb;
				b3 += beta * dfc;
				b4 += beta * dfd;
				// m=At*A
				m11 += dfa * dfa + lambda;
				m12 += dfa * dfb;
				m13 += dfa * dfc;
				m14 += dfa * dfd;
				m22 += dfb * dfb + lambda;
				m23 += dfb * dfc;
				m24 += dfb * dfd;
				m33 += dfc * dfc + lambda;
				m34 += dfc * dfd;
				m44 += dfd * dfd + lambda;
			} // for all datapoints

			// Symmetry:
			m21 = m12;
			m31 = m13;
			m32 = m23;
			m41 = m14;
			m42 = m24;
			m43 = m34;

			n = RegressionMath.det44(m11, m12, m13, m14, m21, m22, m23, m24,
					m31, m32, m33, m34, m41, m42, m43, m44);

			if (Math.abs(n) < EPSSING) { // Sinular matrix?
				error = true;
				Log.debug("Singular matrix...");
				da = db = dc = dd = 0; // To stop it all...
			} else {
				da = RegressionMath.det44(b1, m12, m13, m14, b2, m22, m23, m24,
						b3, m32, m33, m34, b4, m42, m43, m44) / n;
				db = RegressionMath.det44(m11, b1, m13, m14, m21, b2, m23, m24,
						m31, b3, m33, m34, m41, b4, m43, m44) / n;
				dc = RegressionMath.det44(m11, m12, b1, m14, m21, m22, b2, m24,
						m31, m32, b3, m34, m41, m42, b4, m44) / n;
				dd = RegressionMath.det44(m11, m12, m13, b1, m21, m22, m23, b2,
						m31, m32, m33, b3, m41, m42, m43, b4) / n;

				newa = a + da;
				newb = b + db;
				newc = c + dc;
				newd = d + dd; // Remember this, in case we have to go back...
				residual = beta2(xd, yd, newa, newb, newc, newd); // debug("ChiSqError:
																	// +"+residual);
				// diff=residual-old_residual;
				// //debug("Residual difference: "+diff+" lambda: "+lambda);
				if (residual < old_residual) {
					lambda = lambda / LMFACTORDIV; // going well :-) But don't
													// overdo it...
					old_residual = residual;
					multfaktor = LMFACTORMULT; // Reset this!
					a = newa;
					b = newb;
					c = newc;
					d = newd;
				} else {
					lambda = lambda * multfaktor; // Not going well :-(
					multfaktor *= 2; // LM drives hard...
				} // if going the right way
			} // if(error)-else
				// System.out.println( ""+da+"\t"+db+"\t"+dc+"\t"+dd+"\n"+
				// ""+a+"\t"+b+"\t"+c+"\t"+d+"\n" );//out
		} // while(da+db+dc>eps)

		// Reduce d to interval <-pi,pi>
		// d=Rdft.reduce(d);//put here not in rdft!

		double reduction = Math.PI * 2;
		while (Math.abs(d) > Math.PI) {
			if (d > Math.PI) {
				d -= reduction;
			}
			if (d < -Math.PI) {
				d += reduction;
			} // debug("justifying: "+d);
		} // while not in i <-pi,pi>

		// Not wanted in log:
		// System.out.println("AlgoFitSin: Sum Errors Squared=
		// "+beta2(xd,yd,a,b,c,d));
		// //Info
		if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)
				|| Double.isNaN(d)) {
			error = true;
			Log.debug("findParameters(): a,b or c undefined (NaN).");
			return;
		} // 20.11:if one is undefined, everything is undefined
	}// sinus_Reg()

	/* sin(Cx+D) */
	private final static double sin(double x, double c, double d) {
		return Math.sin(c * x + d);
	}// sin(x,b,c,d)

	/* cos(Cx+D) */
	private final static double cos(double x, double c, double d) {
		return Math.cos(c * x + d);
	}// cos(x,b,c,d)

	/* f(x)=A+Bsin(Cx+D) */
	private final static double f(double x, double a, double b, double c,
			double d) {
		return a + b * sin(x, c, d);
	}// f(x,a,b,c,d)

	/* Partial derivative of f to a */
	private final static double df_a() {
		return 1.0d;
	}// df_a()

	/* Partial derivative of f to b: sin(cx+d) */
	private final static double df_b(double x, double c, double d) {
		return sin(x, c, d);
	}// df_b(x,b,c,d)

	/* Partial derivative of f to c: cos(cx+d)*B*x */
	private final static double df_c(double x, double b, double c, double d) {
		return cos(x, c, d) * b * x;
	}// df_c(x,b,c,d)

	/* Partial derivative of f to d: Bcos(cx+d) */
	private final static double df_d(double x, double b, double c, double d) {
		return cos(x, c, d) * b;
	}// df_d(x,b,c,d)

	/* Difference to be reduced */
	private final static double beta(double x, double y, double a, double b,
			double c, double d) {
		return y - f(x, a, b, c, d);
	}// beta(x,a,b,c,d)

	/* Sum of quadratic errors */
	public final static double beta2(double[] x, double[] y, double a, double b,
			double c, double d) {
		double sum = 0.0d, beta;
		int n = x.length;
		for (int i = 0; i < n; i++) {
			beta = beta(x[i], y[i], a, b, c, d);
			sum += beta * beta;
		} // for all datapoints
		return sum;
	}// beta(x,y,a,b,c,d)

	// Sum of errors (absolute values)
	private final static double beta(double[] x, double[] y, double a, double b,
			double c, double d) {
		double sum = 0.0d;
		int n = x.length;
		for (int i = 0; i < n; i++) {
			sum += Math.abs(beta(x[i], y[i], a, b, c, d));
		} // for all datapoints
		return sum;
	}// beta(x,y,a,b,c,d)

	// 3 yd's on the row: up=1, down=-1, uncertain=0
	private final static int direction(double y1, double y2, double y3) {
		if ((y3 > y2) && (y2 > y1)) { // Rising!
			return 1;
		} else if ((y1 > y2) && (y2 > y3)) { // All under a
			return -1;
		} else { // Some over, som under...
			return 0;
		} // if
	}// direction()

	// Get Points and sort them. (Could find abs max and min as well,
	// that is done in findParameters() which is better for testing only
	// mathematical functionality.)
	private final void getPoints() {
		// Problem bothering the gui: GeoList
		// newlist=k.Sort("tmp_{FitSin}",geolist);
		double[] xlist = null, ylist = null;
		double xy[] = new double[2];
		GeoElement geoelement;
		// GeoList newlist;
		// This is code duplication of AlgoSort, but for the time being:
		TreeSet<GeoPoint> sortedSet;
		sortedSet = new TreeSet<GeoPoint>(GeoPoint.getComparatorX());
		for (int i = 0; i < size; i++) {
			geoelement = geolist.get(i);
			if (geoelement instanceof GeoPoint) {
				sortedSet.add((GeoPoint) geoelement);
			} else {
				error = true;
			} // if point
		} // for all points
		Iterator<GeoPoint> iter = sortedSet.iterator();
		int i = 0;
		xlist = new double[size];
		ylist = new double[size];
		GeoPoint gp;
		while (iter.hasNext()) {
			gp = iter.next();
			gp.getInhomCoords(xy);
			xlist[i] = xy[0];
			ylist[i] = xy[1];
			i++;
		} // while iterating
		xd = xlist;
		yd = ylist;
		if (error) {
			Log.debug("getPoints(): Wrong list format, must be points.");
		}
	}// getPoints()

	// Noisekiller
	private final static boolean nearmaxmin(double a, double b, int state,
			int current, double max, double min) {
		if ((state == 1) && (current == -1)) { // A real max-change?
			if (max > a + NOISEKILLER * b) {
				return true;
			}
			return false;
		} else if ((state == -1) && (current == 1)) { // A real min-change?
			if (min < a - NOISEKILLER * b) {
				return true;
			}
			return false;
		} else {
			return false; // Should not happen...
		} // if
	}// nearmaxmin(y,a,b)

	// Is distance between abs max and abx min 1,3,5,... halfperiodes?
	// To be used if DFT is used in finding good initial values
	// Finds the number of halfperiods between abs max and abs min that gives
	// the least SSE
	private final int findNumberOfHalfPeriods(int k, int xmin, int xmax) {
		double min_error = Double.MAX_VALUE;
		double error1;
		double period, c1;
		int n = 0, best = 0;
		for (int i = 1; i <= k; i++) {
			n = 2 * i - 1; // number of halfperiods
			period = Math.abs(xd[xmax] - xd[xmin]) * 2.0 / n;
			c1 = TWO_PI / period;
			error1 = beta2(xd, yd, a, b, c1, PI / 2.0d - c1 * xd[xmax]); // debug("halfperiod:
																		// n, c,
																		// error:
																		// "+n+"
																		// "+c+"
																		// "+error);
			if (error1 < min_error) {
				min_error = error1;
				best = n;
			} // if better
		} // for all actual frequencies
		return best;
	}// findNumberOfHalfPeriods(int,double)

	// / =============== To comment out when final
	// =============================================== ///

	@Override
	public double[] getCoeffs() {
		double[] ret = { a, b, c, d };
		return ret;
	}

}// class AlgoFitSin