/**
* Copyright (c) 2007-2014 The LIBLINEAR Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions
* and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials provided with
* the distribution.
*
* 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.bwaldvogel.liblinear;
import static de.bwaldvogel.liblinear.Linear.info;
/**
* Trust Region Newton Method optimization
*/
class Tron {
private final Function fun_obj;
private final double eps;
private final int max_iter;
public Tron(final Function fun_obj) {
this(fun_obj, 0.1);
}
public Tron(final Function fun_obj, double eps) {
this(fun_obj, eps, 1000);
}
public Tron(final Function fun_obj, double eps, int max_iter) {
this.fun_obj = fun_obj;
this.eps = eps;
this.max_iter = max_iter;
}
void tron(double[] w) {
// Parameters for updating the iterates.
double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
// Parameters for updating the trust region size delta.
double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
int n = fun_obj.get_nr_variable();
int i, cg_iter;
double delta, snorm, one = 1.0;
double alpha, f, fnew, prered, actred, gs;
int search = 1, iter = 1;
double[] s = new double[n];
double[] r = new double[n];
double[] w_new = new double[n];
double[] g = new double[n];
for (i = 0; i < n; i++) {
w[i] = 0;
}
f = fun_obj.fun(w);
fun_obj.grad(w, g);
delta = euclideanNorm(g);
double gnorm1 = delta;
double gnorm = gnorm1;
if (gnorm <= eps * gnorm1) {
search = 0;
}
iter = 1;
while (iter <= max_iter && search != 0) {
cg_iter = trcg(delta, g, s, r);
System.arraycopy(w, 0, w_new, 0, n);
daxpy(one, s, w_new);
gs = dot(g, s);
prered = -0.5 * (gs - dot(s, r));
fnew = fun_obj.fun(w_new);
// Compute the actual reduction.
actred = f - fnew;
// On the first iteration, adjust the initial step bound.
snorm = euclideanNorm(s);
if (iter == 1) {
delta = Math.min(delta, snorm);
}
// Compute prediction alpha*snorm of the step.
if (fnew - f - gs <= 0) {
alpha = sigma3;
} else {
alpha = Math.max(sigma1, -0.5 * (gs / (fnew - f - gs)));
}
// Update the trust region bound according to the ratio of actual to
// predicted reduction.
if (actred < eta0 * prered) {
delta = Math.min(Math.max(alpha, sigma1) * snorm, sigma2 * delta);
} else if (actred < eta1 * prered) {
delta = Math.max(sigma1 * delta, Math.min(alpha * snorm, sigma2 * delta));
} else if (actred < eta2 * prered) {
delta = Math.max(sigma1 * delta, Math.min(alpha * snorm, sigma3 * delta));
} else {
delta = Math.max(delta, Math.min(alpha * snorm, sigma3 * delta));
}
info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d%n", iter, actred, prered, delta, f,
gnorm, cg_iter);
if (actred > eta0 * prered) {
iter++;
System.arraycopy(w_new, 0, w, 0, n);
f = fnew;
fun_obj.grad(w, g);
gnorm = euclideanNorm(g);
if (gnorm <= eps * gnorm1) {
break;
}
}
if (f < -1.0e+32) {
info("WARNING: f < -1.0e+32%n");
break;
}
if (Math.abs(actred) <= 0 && prered <= 0) {
info("WARNING: actred and prered <= 0%n");
break;
}
if (Math.abs(actred) <= 1.0e-12 * Math.abs(f) && Math.abs(prered) <= 1.0e-12 * Math.abs(f)) {
info("WARNING: actred and prered too small%n");
break;
}
}
}
private int trcg(double delta, double[] g, double[] s, double[] r) {
int n = fun_obj.get_nr_variable();
double one = 1;
double[] d = new double[n];
double[] Hd = new double[n];
double rTr, rnewTrnew, cgtol;
for (int i = 0; i < n; i++) {
s[i] = 0;
r[i] = -g[i];
d[i] = r[i];
}
cgtol = 0.1 * euclideanNorm(g);
int cg_iter = 0;
rTr = dot(r, r);
while (true) {
if (euclideanNorm(r) <= cgtol) {
break;
}
cg_iter++;
fun_obj.Hv(d, Hd);
double alpha = rTr / dot(d, Hd);
daxpy(alpha, d, s);
if (euclideanNorm(s) > delta) {
info("cg reaches trust region boundary%n");
alpha = -alpha;
daxpy(alpha, d, s);
double std = dot(s, d);
double sts = dot(s, s);
double dtd = dot(d, d);
double dsq = delta * delta;
double rad = Math.sqrt(std * std + dtd * (dsq - sts));
if (std >= 0) {
alpha = (dsq - sts) / (std + rad);
} else {
alpha = (rad - std) / dtd;
}
daxpy(alpha, d, s);
alpha = -alpha;
daxpy(alpha, Hd, r);
break;
}
alpha = -alpha;
daxpy(alpha, Hd, r);
rnewTrnew = dot(r, r);
double beta = rnewTrnew / rTr;
scale(beta, d);
daxpy(one, r, d);
rTr = rnewTrnew;
}
return cg_iter;
}
/**
* constant times a vector plus a vector
*
* <pre>
* vector2 += constant * vector1
* </pre>
*
* @since 1.8
*/
private static void daxpy(double constant, double vector1[], double vector2[]) {
if (constant == 0) {
return;
}
assert vector1.length == vector2.length;
for (int i = 0; i < vector1.length; i++) {
vector2[i] += constant * vector1[i];
}
}
/**
* returns the dot product of two vectors
*
* @since 1.8
*/
private static double dot(double vector1[], double vector2[]) {
double product = 0;
assert vector1.length == vector2.length;
for (int i = 0; i < vector1.length; i++) {
product += vector1[i] * vector2[i];
}
return product;
}
/**
* returns the euclidean norm of a vector
*
* @since 1.8
*/
private static double euclideanNorm(double vector[]) {
int n = vector.length;
if (n < 1) {
return 0;
}
if (n == 1) {
return Math.abs(vector[0]);
}
// this algorithm is (often) more accurate than just summing up the squares and taking the
// square-root afterwards
double scale = 0; // scaling factor that is factored out
double sum = 1; // basic sum of squares from which scale has been factored out
for (int i = 0; i < n; i++) {
if (vector[i] != 0) {
double abs = Math.abs(vector[i]);
// try to get the best scaling factor
if (scale < abs) {
double t = scale / abs;
sum = 1 + sum * (t * t);
scale = abs;
} else {
double t = abs / scale;
sum += t * t;
}
}
}
return scale * Math.sqrt(sum);
}
/**
* scales a vector by a constant
*
* @since 1.8
*/
private static void scale(double constant, double vector[]) {
if (constant == 1.0) {
return;
}
for (int i = 0; i < vector.length; i++) {
vector[i] *= constant;
}
}
}