/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.modeling.semantictypes.mycrf.optimization ;
import java.util.ArrayList;
import edu.isi.karma.modeling.semantictypes.mycrf.common.Constants;
import edu.isi.karma.modeling.semantictypes.mycrf.math.Matrix;
/**
* This class implements limited memory BFGS.
* It generates the next gradient to follow based on
* previous values of the weights and also previous gradients.
*
* @author amangoel
*
*/
public class LBFGS {
ArrayList<double[]> s ;
ArrayList<double[]> y ;
ArrayList<Double> r ;
double[] xOld ;
double[] gOld ;
int iter = 1 ;
int dim ;
public LBFGS(int dim) {
this.dim = dim ;
s = new ArrayList<double[]>() ;
y = new ArrayList<double[]>() ;
r = new ArrayList<Double>() ;
xOld = new double[dim] ;
gOld = new double[dim] ;
iter = 1 ;
}
public void searchDir(double[] x, double[] g, double[] d) {
if (iter == 1) {
System.arraycopy(g, 0, d, 0, dim) ;
for(int i=0;i<dim;i++) {
d[i] = -d[i] ;
}
}
else {
double[] sNew = new double[dim] ;
for (int i=0;i<dim;i++) {
sNew[i] = x[i] - xOld[i] ;
}
double[] yNew = new double[dim] ;
for(int i=0;i<dim;i++) {
yNew[i] = g[i] - gOld[i] ;
}
double rNew = 1 / Matrix.dotProduct(sNew, yNew);
if (s.size() == Constants.MEMORY_FOR_L_BFGS) {
s.remove(Constants.MEMORY_FOR_L_BFGS -1) ;
y.remove(Constants.MEMORY_FOR_L_BFGS -1) ;
r.remove(Constants.MEMORY_FOR_L_BFGS -1) ;
}
s.add(0, sNew) ;
y.add(0, yNew) ;
r.add(0, rNew) ;
int ilMax = s.size() < Constants.MEMORY_FOR_L_BFGS ? s.size() : Constants.MEMORY_FOR_L_BFGS ;
double[] q = new double[dim] ;
System.arraycopy(g, 0, q, 0, dim) ;
double[] a = new double[ilMax] ;
for(int il = 0 ; il < ilMax ; il++) {
a[il] = r.get(il) * Matrix.dotProduct(s.get(il), q) ;
Matrix.plusEquals(q, y.get(il), -a[il]) ;
}
double[] z = new double[dim] ;
System.arraycopy(q, 0, z, 0, dim) ;
for(int il=ilMax-1;il>=0 ; il--) {
double b = r.get(il) * Matrix.dotProduct(y.get(il), z) ;
Matrix.plusEquals(z, s.get(il), (a[il] - b)) ;
}
for(int i=0;i<dim;i++) {
d[i] = -z[i] ;
}
}
System.arraycopy(x, 0, xOld, 0, dim);
System.arraycopy(g, 0, gOld, 0, dim);
iter++ ;
}
}