package org.geogebra.common.kernel.statistics;
/*
GeoGebra - Dynamic Mathematics for Everyone
http://www.geogebra.org
This file is part of GeoGebra.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation.
*/
import java.util.Iterator;
import java.util.TreeSet;
import org.geogebra.common.kernel.Construction;
import org.geogebra.common.kernel.algos.AlgoElement;
import org.geogebra.common.kernel.arithmetic.ExpressionNode;
import org.geogebra.common.kernel.arithmetic.ExpressionValue;
import org.geogebra.common.kernel.arithmetic.Function;
import org.geogebra.common.kernel.arithmetic.FunctionVariable;
import org.geogebra.common.kernel.arithmetic.MyDouble;
import org.geogebra.common.kernel.commands.Commands;
import org.geogebra.common.kernel.geos.GeoElement;
import org.geogebra.common.kernel.geos.GeoFunction;
import org.geogebra.common.kernel.geos.GeoList;
import org.geogebra.common.kernel.geos.GeoPoint;
import org.geogebra.common.plugin.Operation;
import org.geogebra.common.util.debug.Log;
/**************
* AlgoFitSin * ************
*
* @author Hans-Petter Ulven
* @version 22.11.08 (november) 17.11: sorting list, noisekiller (nearmaxmin()
* ), 19.11: some more testing, small fixes 20.11:
* errorMsg->Application.debug 22.11: got rid of all testcode, except
* out-commented hook; runTest(x[],y[]) ?.12: Step II and III in
* algorithm changed, using Discrete Fourier Transform instead 17.01:
* Changed to minimum 4 datapoints to give fewer undefined, even if
* this might not be very useful 14.02.09: Small bug in linje 256.
* Undefined if many iterations 04.05.11: Bug/weakness in handling more
* than one period Fits a+b*sin(c*x+d) to a list of points. Adapted
* from: Nonlinear regression algorithms are well known, see:
* mathworld.wolfram.com/NonlinearLeastSquaresFitting.html
* ics.forth.gr/~lourakis/levmar Damping Parameter in Marquardt's
* Method, Hans Bruun Nielsen, IMM-Rep 1999-05 The problem is more to
* find best initial values for parameters, and here I was on my own,
* little information available on this problem... Experiments showed
* me: c and d are most critical If a,b and c are good, d is not that
* critical If c and d are good, a and b are not that critical The
* Levenberg-Marquardt method makes c and d significantly less
* critical. This led me to this algorithm: I a=average of y-data
* b=(maxy-miny)/2 II period=2*|x_first_max - x_first_min| c=2pi/period
* The first two extremums are found by my
* "direction-changing-algorithm" (Ulven nov-08), using a flank of
* three points as an indicator of monotonous increasing or decreasing
* function. (Same effort as finding a local max as y1<y2>y3, but
* effectively equivalent to using 5 points: y1<y2<y3>y4>y5) III simple
* iteration of d in <-pi,pi> to find a good d (Critical if c is a bit
* off, so better than pi/2-c*xmax) IV Simplified Levenberg-Marquardt
* method. (Could be optimized if/when I am able to understand the
* mathematics behind it and be able to check if this is of any value.)
* (Perhaps Donald Knuth could have done all this in 50 lines, but this
* is the best I can do...) Constraints: <List of points> should have
* at least 5 points. There should also be three points on the row
* steadily increasing or decreasing(y1<=y2<=y3 or y1>=y2>=y3)on each
* side/flank of the first extremums. The points should cover at least
* two extremums of the function. The two first extremums should not be
* too far from the extremums of the solution curve. If more than one
* period, there should, at the very least, be more than 6 points in
* each period. If any of these demands are not satisfied, the solution
* curve might be unusable. Problems: Non-linear regression is
* difficult, and the choice of initial values for the parameters are
* highly critical. The algorithm here might converge to a local
* minimum. It might also diverge, so MAX_ITERATIONS is set to 200.
* (Experience suggests that more than 100 iterations give useless
* results.) ToDo: Use Discrete Fourier Transform instead of step II
* and III. Experiments show that this adds surprisingly little to the
* robustness of my simple algorithm, so this will not be done unless
* user feedback indicates a need for more sophistication.
*/
public class AlgoFitSin extends AlgoElement implements FitAlgo {
// Tuning of noisefilter, Levenberg-Marquardt iteration, debug, rounding off
// errors
private final static double NOISEKILLER = 0.2D; // Kill local extremums
// inside a+/-noisekiller*b;
// 0.2 seems to be a good
// value?
private final static double LMFACTORDIV = 3.0d;
private final static double LMFACTORMULT = 2.0d;
private final static int MAXITERATIONS = 200;
private final static double EPSILON = 1E-14d;
private final static double EPSSING = 1E-20d;
private final static double PI = Math.PI;
private final static double TWO_PI = PI * 2;
// Properties
private double a, b, c, d; // a+bsin(cx+d)
private double[] xd, yd; // datapoints
private int size; // data arrays
private int iterations; // LM iterations
private boolean error = false; // General catch-all
// / --- GeoGebra obligatory: --- ///
private GeoList geolist; // input
private GeoFunction geofunction; // output
/** Implements AlgoElement */
public AlgoFitSin(Construction cons, String label, GeoList geolist) {
this(cons, geolist);
geofunction.setLabel(label);
}// Constructor
public AlgoFitSin(Construction cons, GeoList geolist) {
super(cons);
this.geolist = geolist;
geofunction = new GeoFunction(cons);
setInputOutput();
compute();
}// Constructor
/** Implements AlgoElement */
@Override
public Commands getClassName() {
return Commands.FitSin;
}
/** Implements AlgoElement */
@Override
protected void setInputOutput() {
input = new GeoElement[1];
input[0] = geolist;
setOnlyOutput(geofunction);
setDependencies();
}// setInputOutput()
/** Implements AlgoElement */
public GeoFunction getFitSin() {
return geofunction;
}
/** Implements AlgoElement */
@Override
public final void compute() {
size = geolist.size();
error = false; // General flag
if (!geolist.isDefined() || (size < 4)) { // Direction-algo needs two
// flanks, 3 in each.
geofunction.setUndefined();
Log.debug(
"List not properly defined or too small (4 points needed).");
return;
}
// if error in parameters :
try {
getPoints(); // Sorts the points while getting them
doReg();
} catch (Exception all) {
error = true;
} // try-catch
if (!error) { // Make function
MyDouble A = new MyDouble(kernel, a);
MyDouble B = new MyDouble(kernel, b);
MyDouble C = new MyDouble(kernel, c);
MyDouble D = new MyDouble(kernel, d);
FunctionVariable X = new FunctionVariable(kernel);
ExpressionValue expr = new ExpressionNode(kernel, C,
Operation.MULTIPLY, X);
expr = new ExpressionNode(kernel, expr, Operation.PLUS, D);
expr = new ExpressionNode(kernel, expr, Operation.SIN, null);
expr = new ExpressionNode(kernel, B, Operation.MULTIPLY, expr);
ExpressionNode node = new ExpressionNode(kernel, A, Operation.PLUS,
expr);
Function f = new Function(node, X);
geofunction.setFunction(f);
geofunction.setDefined(true);
} else {
geofunction.setUndefined();
return;
} // if error in regression
}// compute()
// / ============= IMPLEMENTATION
// =============================================================///
/** Does the math part of the regression */
public final void doReg() {
findParameters(); // Find initial parameters a,b,c,d
sinus_Reg(); // Run LM nonlinear iteration
}// doReg()
/** Tries to find good initial values for a,b,c,d */
public final void findParameters() {
double y;
double min_max_distance; // Distance between x-values of found(?)
// extrema
int numberofhalfperiods = 1; // Between the extrema
int xmax_abs = 0, xmin_abs = 0; // Update in case changes=0 later
// (few-data-case)
size = xd.length;
double sum = 0.0d, max = -Double.MAX_VALUE, min = Double.MAX_VALUE;
// Find a and b:
for (int i = 0; i < size; i++) {
y = yd[i];
sum += y;
if (y > max) {
max = y;
xmax_abs = i;
}
if (y < min) {
min = y;
xmin_abs = i;
}
} // for
a = sum / size;
b = (max - min) / 2.0d; // System.out.println("a= "+a+" b= "+b+" max=
// "+max+" min= "+min);
// Find c:
// This time first and second local max/min, between rise and fall and
// vv
// Last y in a rise or decrease *is* the local extremum!
int xmax = xmax_abs, xmin = xmin_abs; // Keep absolute xmax/xmin in case
// changes=0 or 1 later
int state = 0; // undecided so far...
int current = 0;
int changes = 0; // undecided so far...
for (int i = 2; i < size; i++) {
y = yd[i];
current = direction(yd[i - 2], yd[i - 1], y);
// current=direction5(yd[i-4],yd[i-3],yd[i-2],yd[i-1],yd[i]);
if ((current == 1) || (current == -1)) { // Steady up or steady down
// do state bookkeeping
if (state == 0) { // just started:
state = current; // set first state
} else { // we are on our way...
if (current != state) { // Update
// eventual
// change
if (nearmaxmin(a, b, state, current, max, min)) {// Kill
// noise
changes++;
state = current;
} // if near
} // if change
} // if steady up or down
// Two changes enough. (Must check before updating extremums.)
if (changes >= 2) { // debug("Two changes on "+i);
// go on counting changes!break;
} else { // if changes>=2
// Update extremums so far
if (current == 1) { // Steady up
max = y;
xmax = i; // Last is max so far
} else if (current == -1) { // Steady down
min = y;
xmin = i; // Last is min so far
} // update extremums
} // if changes<2
} else { // Not steady, nothing to do...
} // if steady up or down
// debug("i: "+i);
// debug("state: "+state+" current: "+current+" changes:
// "+changes+" max: "+max+" min: "+min+" xmax: "+xmax+" xmin:
// "+xmin);
} // for all data
// 09.12: Checking half-period:
min_max_distance = Math.abs(xd[xmax] - xd[xmin]);
if (changes <= 1) { // Did not succeed, abs extrema probably best
xmin = xmin_abs;
xmax = xmax_abs;
min_max_distance = Math.abs(xd[xmin] - xd[xmax]); // Update for
// final c
// further down
// min_max_distance might be 1,3,5,... halfperiods...find the one
// that gives the least sse
numberofhalfperiods = findNumberOfHalfPeriods(size / 4, xmin, xmax); // At
// least
// 6
// (14.02.09:4)
// points
// in
// a
// period,
// hopefully
} // if too few extrema
c = PI * numberofhalfperiods / min_max_distance; // debug("Final c:
// "+c);
// System.out.println("c="+c); //**************
double c2 = 2 * Math.PI / ((xd[size - 1] - xd[0]) * 2 / changes);
// System.out.println("or..."+c2);
// System.out.println("changes: "+changes);
if (changes > 2)
{
c = (c + c2) / 2; // compromise?
// System.out.println("final c: "+c);
}
// Find d
// (d=0 might go well, but to be on the safe side...100 iterations
// should be enough?
// Could also use pi/2=c*xmax+d, but iteration is more robust in bad
// cases.
// If a,b and c are a bit off, d should be good!
d = -Math.PI;
double deltad = Math.PI * 2 * 0.01;
double err = 0.0d;
double bestd = 0.0d;
double old_err = beta(xd, yd, a, b, c, d);
for (int i = 0; i < 100; i++) {
d += deltad;
err = beta(xd, yd, a, b, c, d); // Without squaring is ok...
if (err < old_err) {
old_err = err;
bestd = d; // System.out.println("d-iteration: error= "+error+"
// d: "+d);
} // if new min
} // for: d-iteration
d = bestd; // System.out.println("old routine gave d= "+d);
// debug("Parameters: a= "+a+" b= "+b+" c= "+c+" d "+d);
}// findParameters()
/** Doing LM iteration */
public final void sinus_Reg() {
double lambda = 0.0d; // LM-damping coefficient
double multfaktor = LMFACTORMULT; // later?: divfaktor=LMFACTORDIV;
double residual, old_residual = beta2(xd, yd, a, b, c, d);
double da = EPSILON, db = EPSILON, dc = EPSILON, dd = EPSILON; // Something
// larger
// than
// eps,
// to
// get
// started...
double b1, b2, b3, b4; // At*beta
double m11, m12, m13, m14, m21, m22, m23, m24, m31, m32, m33, m34, m41,
m42, m43, m44, // At*A
n; // singular check
double x, y;
double dfa, dfb, dfc, dfd, beta, newa, newb, newc, newd;
iterations = 0;
// LM: Optimal startlambda
b1 = b2 = b3 = b4 = 0.0d;
m11 = m22 = m33 = m44 = 0.0d;
for (int i = 0; i < size; i++) {
x = xd[i];
y = yd[i];
beta = beta(x, y, a, b, c, d);
dfa = df_a();
dfb = df_b(x, c, d);
dfc = df_c(x, b, c, d);
dfd = df_d(x, b, c, d);
// b=At*beta
b1 += beta * dfa;
b2 += beta * dfb;
b3 += beta * dfc;
b4 += beta * dfd;
// m=At*A
m11 += dfa * dfa; // only need diagonal
m22 += dfb * dfb;
m33 += dfc * dfc;
m44 += dfd * dfd;
} // for all datapoints
double startfaktor = Math.max(Math.max(Math.max(m11, m22), m33), m44);
lambda = startfaktor * 0.001; // Heuristic, suggested by several
// articles
// debug("Startlambda: "+lambda);
while (Math.abs(da) + Math.abs(db) + Math.abs(dc)
+ Math.abs(dd) > EPSILON) {
iterations++; // debug(""+iterations+" : ");
if ((iterations > MAXITERATIONS) || (error)) { // From experience:
// >100 gives
// unusable result
Log.debug("More than " + MAXITERATIONS + " iterations...");
error = true; // 14.02.09: No use=>undefined!
break;
} // if diverging
b1 = b2 = b3 = b4 = 0.0d;
m11 = m12 = m13 = m14 = m21 = m22 = m23 = m24 = m31 = m32 = m33 = m34 = m41 = m42 = m43 = m44 = 0.0d;
for (int i = 0; i < size; i++) { // for all datapoints
x = xd[i];
y = yd[i];
beta = beta(x, y, a, b, c, d);
dfa = df_a();
dfb = df_b(x, c, d);
dfc = df_c(x, b, c, d);
dfd = df_d(x, b, c, d);
// b=At*beta
b1 += beta * dfa;
b2 += beta * dfb;
b3 += beta * dfc;
b4 += beta * dfd;
// m=At*A
m11 += dfa * dfa + lambda;
m12 += dfa * dfb;
m13 += dfa * dfc;
m14 += dfa * dfd;
m22 += dfb * dfb + lambda;
m23 += dfb * dfc;
m24 += dfb * dfd;
m33 += dfc * dfc + lambda;
m34 += dfc * dfd;
m44 += dfd * dfd + lambda;
} // for all datapoints
// Symmetry:
m21 = m12;
m31 = m13;
m32 = m23;
m41 = m14;
m42 = m24;
m43 = m34;
n = RegressionMath.det44(m11, m12, m13, m14, m21, m22, m23, m24,
m31, m32, m33, m34, m41, m42, m43, m44);
if (Math.abs(n) < EPSSING) { // Sinular matrix?
error = true;
Log.debug("Singular matrix...");
da = db = dc = dd = 0; // To stop it all...
} else {
da = RegressionMath.det44(b1, m12, m13, m14, b2, m22, m23, m24,
b3, m32, m33, m34, b4, m42, m43, m44) / n;
db = RegressionMath.det44(m11, b1, m13, m14, m21, b2, m23, m24,
m31, b3, m33, m34, m41, b4, m43, m44) / n;
dc = RegressionMath.det44(m11, m12, b1, m14, m21, m22, b2, m24,
m31, m32, b3, m34, m41, m42, b4, m44) / n;
dd = RegressionMath.det44(m11, m12, m13, b1, m21, m22, m23, b2,
m31, m32, m33, b3, m41, m42, m43, b4) / n;
newa = a + da;
newb = b + db;
newc = c + dc;
newd = d + dd; // Remember this, in case we have to go back...
residual = beta2(xd, yd, newa, newb, newc, newd); // debug("ChiSqError:
// +"+residual);
// diff=residual-old_residual;
// //debug("Residual difference: "+diff+" lambda: "+lambda);
if (residual < old_residual) {
lambda = lambda / LMFACTORDIV; // going well :-) But don't
// overdo it...
old_residual = residual;
multfaktor = LMFACTORMULT; // Reset this!
a = newa;
b = newb;
c = newc;
d = newd;
} else {
lambda = lambda * multfaktor; // Not going well :-(
multfaktor *= 2; // LM drives hard...
} // if going the right way
} // if(error)-else
// System.out.println( ""+da+"\t"+db+"\t"+dc+"\t"+dd+"\n"+
// ""+a+"\t"+b+"\t"+c+"\t"+d+"\n" );//out
} // while(da+db+dc>eps)
// Reduce d to interval <-pi,pi>
// d=Rdft.reduce(d);//put here not in rdft!
double reduction = Math.PI * 2;
while (Math.abs(d) > Math.PI) {
if (d > Math.PI) {
d -= reduction;
}
if (d < -Math.PI) {
d += reduction;
} // debug("justifying: "+d);
} // while not in i <-pi,pi>
// Not wanted in log:
// System.out.println("AlgoFitSin: Sum Errors Squared=
// "+beta2(xd,yd,a,b,c,d));
// //Info
if (Double.isNaN(a) || Double.isNaN(b) || Double.isNaN(c)
|| Double.isNaN(d)) {
error = true;
Log.debug("findParameters(): a,b or c undefined (NaN).");
return;
} // 20.11:if one is undefined, everything is undefined
}// sinus_Reg()
/* sin(Cx+D) */
private final static double sin(double x, double c, double d) {
return Math.sin(c * x + d);
}// sin(x,b,c,d)
/* cos(Cx+D) */
private final static double cos(double x, double c, double d) {
return Math.cos(c * x + d);
}// cos(x,b,c,d)
/* f(x)=A+Bsin(Cx+D) */
private final static double f(double x, double a, double b, double c,
double d) {
return a + b * sin(x, c, d);
}// f(x,a,b,c,d)
/* Partial derivative of f to a */
private final static double df_a() {
return 1.0d;
}// df_a()
/* Partial derivative of f to b: sin(cx+d) */
private final static double df_b(double x, double c, double d) {
return sin(x, c, d);
}// df_b(x,b,c,d)
/* Partial derivative of f to c: cos(cx+d)*B*x */
private final static double df_c(double x, double b, double c, double d) {
return cos(x, c, d) * b * x;
}// df_c(x,b,c,d)
/* Partial derivative of f to d: Bcos(cx+d) */
private final static double df_d(double x, double b, double c, double d) {
return cos(x, c, d) * b;
}// df_d(x,b,c,d)
/* Difference to be reduced */
private final static double beta(double x, double y, double a, double b,
double c, double d) {
return y - f(x, a, b, c, d);
}// beta(x,a,b,c,d)
/* Sum of quadratic errors */
public final static double beta2(double[] x, double[] y, double a, double b,
double c, double d) {
double sum = 0.0d, beta;
int n = x.length;
for (int i = 0; i < n; i++) {
beta = beta(x[i], y[i], a, b, c, d);
sum += beta * beta;
} // for all datapoints
return sum;
}// beta(x,y,a,b,c,d)
// Sum of errors (absolute values)
private final static double beta(double[] x, double[] y, double a, double b,
double c, double d) {
double sum = 0.0d;
int n = x.length;
for (int i = 0; i < n; i++) {
sum += Math.abs(beta(x[i], y[i], a, b, c, d));
} // for all datapoints
return sum;
}// beta(x,y,a,b,c,d)
// 3 yd's on the row: up=1, down=-1, uncertain=0
private final static int direction(double y1, double y2, double y3) {
if ((y3 > y2) && (y2 > y1)) { // Rising!
return 1;
} else if ((y1 > y2) && (y2 > y3)) { // All under a
return -1;
} else { // Some over, som under...
return 0;
} // if
}// direction()
// Get Points and sort them. (Could find abs max and min as well,
// that is done in findParameters() which is better for testing only
// mathematical functionality.)
private final void getPoints() {
// Problem bothering the gui: GeoList
// newlist=k.Sort("tmp_{FitSin}",geolist);
double[] xlist = null, ylist = null;
double xy[] = new double[2];
GeoElement geoelement;
// GeoList newlist;
// This is code duplication of AlgoSort, but for the time being:
TreeSet<GeoPoint> sortedSet;
sortedSet = new TreeSet<GeoPoint>(GeoPoint.getComparatorX());
for (int i = 0; i < size; i++) {
geoelement = geolist.get(i);
if (geoelement instanceof GeoPoint) {
sortedSet.add((GeoPoint) geoelement);
} else {
error = true;
} // if point
} // for all points
Iterator<GeoPoint> iter = sortedSet.iterator();
int i = 0;
xlist = new double[size];
ylist = new double[size];
GeoPoint gp;
while (iter.hasNext()) {
gp = iter.next();
gp.getInhomCoords(xy);
xlist[i] = xy[0];
ylist[i] = xy[1];
i++;
} // while iterating
xd = xlist;
yd = ylist;
if (error) {
Log.debug("getPoints(): Wrong list format, must be points.");
}
}// getPoints()
// Noisekiller
private final static boolean nearmaxmin(double a, double b, int state,
int current, double max, double min) {
if ((state == 1) && (current == -1)) { // A real max-change?
if (max > a + NOISEKILLER * b) {
return true;
}
return false;
} else if ((state == -1) && (current == 1)) { // A real min-change?
if (min < a - NOISEKILLER * b) {
return true;
}
return false;
} else {
return false; // Should not happen...
} // if
}// nearmaxmin(y,a,b)
// Is distance between abs max and abx min 1,3,5,... halfperiodes?
// To be used if DFT is used in finding good initial values
// Finds the number of halfperiods between abs max and abs min that gives
// the least SSE
private final int findNumberOfHalfPeriods(int k, int xmin, int xmax) {
double min_error = Double.MAX_VALUE;
double error1;
double period, c1;
int n = 0, best = 0;
for (int i = 1; i <= k; i++) {
n = 2 * i - 1; // number of halfperiods
period = Math.abs(xd[xmax] - xd[xmin]) * 2.0 / n;
c1 = TWO_PI / period;
error1 = beta2(xd, yd, a, b, c1, PI / 2.0d - c1 * xd[xmax]); // debug("halfperiod:
// n, c,
// error:
// "+n+"
// "+c+"
// "+error);
if (error1 < min_error) {
min_error = error1;
best = n;
} // if better
} // for all actual frequencies
return best;
}// findNumberOfHalfPeriods(int,double)
// / =============== To comment out when final
// =============================================== ///
@Override
public double[] getCoeffs() {
double[] ret = { a, b, c, d };
return ret;
}
}// class AlgoFitSin