/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package liblinear; import static liblinear.Linear.NL; import static liblinear.Linear.info; import static org.netlib.blas.DAXPY.DAXPY; import static org.netlib.blas.DDOT.DDOT; import static org.netlib.blas.DNRM2.DNRM2; import static org.netlib.blas.DSCAL.DSCAL; class Tron { private final Function fun_obj; private final double eps; private final int max_iter; public Tron( final Function fun_obj ) { this(fun_obj, 0.1); } public Tron( final Function fun_obj, double eps ) { this(fun_obj, eps, 1000); } public Tron( final Function fun_obj, double eps, int max_iter ) { this.fun_obj = fun_obj; this.eps = eps; this.max_iter = max_iter; } // void tron(double *w) void tron( double[] w ) { // Parameters for updating the iterates. double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; // Parameters for updating the trust region size delta. double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; int n = fun_obj.get_nr_variable(); int i, cg_iter; double delta, snorm, one = 1.0; double alpha, f, fnew, prered, actred, gs; int search = 1, iter = 1, inc = 1; double[] s = new double[n]; double[] r = new double[n]; double[] w_new = new double[n]; double[] g = new double[n]; for ( i = 0; i < n; i++ ) w[i] = 0; f = fun_obj.fun(w); fun_obj.grad(w, g); delta = DNRM2(n, g, inc); // delta = dnrm2_(&n, g, &inc); double gnorm1 = delta; double gnorm = gnorm1; if ( gnorm <= eps * gnorm1 ) search = 0; iter = 1; while ( iter <= max_iter && search != 0 ) { cg_iter = trcg(delta, g, s, r); // memcpy(w_new, w, sizeof(double)*n); System.arraycopy(w, 0, w_new, 0, n); DAXPY(n, one, s, inc, w_new, inc); gs = DDOT(n, g, inc, s, inc); // gs = ddot_(&n, g, &inc, s, &inc); prered = -0.5 * (gs - DDOT(n, s, inc, r, inc)); fnew = fun_obj.fun(w_new); // Compute the actual reduction. actred = f - fnew; // On the first iteration, adjust the initial step bound. snorm = DNRM2(n, s, inc); // snorm = dnrm2_(&n, s, &inc); if ( iter == 1 ) delta = Math.min(delta, snorm); // Compute prediction alpha*snorm of the step. if ( fnew - f - gs <= 0 ) alpha = sigma3; else alpha = Math.max(sigma1, -0.5 * (gs / (fnew - f - gs))); // Update the trust region bound according to the ratio of actual to // predicted reduction. if ( actred < eta0 * prered ) delta = Math.min(Math.max(alpha, sigma1) * snorm, sigma2 * delta); else if ( actred < eta1 * prered ) delta = Math.max(sigma1 * delta, Math.min(alpha * snorm, sigma2 * delta)); else if ( actred < eta2 * prered ) delta = Math.max(sigma1 * delta, Math.min(alpha * snorm, sigma3 * delta)); else delta = Math.max(delta, Math.min(alpha * snorm, sigma3 * delta)); info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d" + NL, iter, actred, prered, delta, f, gnorm, cg_iter); if ( actred > eta0 * prered ) { iter++; // memcpy(w, w_new, sizeof(double)*n); System.arraycopy(w_new, 0, w, 0, n); f = fnew; fun_obj.grad(w, g); gnorm = DNRM2(n, g, inc); // gnorm = dnrm2_(&n, g, &inc); if ( gnorm <= eps * gnorm1 ) break; } if ( f < -1.0e+32 ) { info("warning: f < -1.0e+32" + NL); break; } if ( Math.abs(actred) <= 0 && prered <= 0 ) { info("warning: actred and prered <= 0" + NL); break; } if ( Math.abs(actred) <= 1.0e-12 * Math.abs(f) && Math.abs(prered) <= 1.0e-12 * Math.abs(f) ) { info("warning: actred and prered too small" + NL); break; } } } // int TRON::trcg(double delta, double *g, double *s, double *r) int trcg( double delta, double[] g, double[] s, double[] r ) { int i, inc = 1; int n = fun_obj.get_nr_variable(); double one = 1; double[] d = new double[n]; double[] Hd = new double[n]; double rTr, rnewTrnew, cgtol; for ( i = 0; i < n; i++ ) { s[i] = 0; r[i] = -g[i]; d[i] = r[i]; } cgtol = 0.1 * DNRM2(n, g, inc); int cg_iter = 0; // rTr = ddot_(&n, r, &inc, r, &inc); rTr = DDOT(n, r, inc, r, inc); while ( true ) { if ( DNRM2(n, r, inc) <= cgtol ) break; cg_iter++; fun_obj.Hv(d, Hd); double alpha = rTr / DDOT(n, d, inc, Hd, inc); DAXPY(n, alpha, d, inc, s, inc); // daxpy_(&n, &alpha, d, &inc, s, &inc); // if (dnrm2_(&n, s, &inc) > delta) if ( DNRM2(n, s, inc) > delta ) { info("cg reaches trust region boundary\n"); alpha = -alpha; // daxpy_(&n, &alpha, d, &inc, s, &inc); DAXPY(n, alpha, d, inc, s, inc); double std = DDOT(n, s, inc, d, inc); double sts = DDOT(n, s, inc, s, inc); double dtd = DDOT(n, d, inc, d, inc); double dsq = delta * delta; double rad = Math.sqrt(std * std + dtd * (dsq - sts)); if ( std >= 0 ) alpha = (dsq - sts) / (std + rad); else alpha = (rad - std) / dtd; DAXPY(n, alpha, d, inc, s, inc); alpha = -alpha; DAXPY(n, alpha, Hd, inc, r, inc); break; } alpha = -alpha; DAXPY(n, alpha, Hd, inc, r, inc); rnewTrnew = DDOT(n, r, inc, r, inc); double beta = rnewTrnew / rTr; DSCAL(n, beta, d, inc); DAXPY(n, one, r, inc, d, inc); rTr = rnewTrnew; } return (cg_iter); } double norm_inf( int n, double[] x ) { double dmax = Math.abs(x[0]); for ( int i = 1; i < n; i++ ) if ( Math.abs(x[i]) >= dmax ) dmax = Math.abs(x[i]); return (dmax); } }