Optimization.java example

Explorer
wekax-master
- weka-3-6-2
- wekaUT
  - GetAllSubPackages.java
  - weka
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Optimization.java
 *    Copyright (C) 2003 Xin Xu
 *
 */

package weka.core;

import java.util.*;
import java.io.*;

/**
 * Implementation of Active-sets method with BFGS update
 * to solve optimization problem with only bounds constraints in 
 * multi-dimensions.  In this implementation we consider both the lower and higher 
 * bound constraints.  <p>
 *
 * Here is the sketch of our searching strategy, and the detailed description
 * of the algorithm can be found in the Appendix of Xin Xu's MSc thesis:<p>
 * Initialize everything, incl. initial value, direction, etc.<p>
 * LOOP (main algorithm):<br>
 * 
 * 1. Perform the line search using the directions for free variables<br>
 * 1.1  Check all the bounds that are not "active" (i.e. binding variables)
 *      and compute the feasible step length to the bound for each of them<br>
 * 1.2  Pick up the least feasible step length, say \alpha, and set it as 
 *      the upper bound of the current step length, i.e. 0<\lambda<=\alpha<br>
 * 1.3  Search for any possible step length<=\alpha that can result the 
 *      "sufficient function decrease" (\alpha condition) AND "positive definite 
 *      inverse Hessian" (\beta condition), if possible, using SAFEGUARDED polynomial 
 *      interpolation.  This step length is "safe" and thus
 *      is used to compute the next value of the free variables .<br>
 * 1.4  Fix the variable(s) that are newly bound to its constraint(s).<p>     
 *
 * 2. Check whether there is convergence of all variables or their gradients.
 *    If there is, check the possibilities to release any current bindings of
 *    the fixed variables to their bounds based on the "reliable" second-order 
 *    Lagarange multipliers if available.  If it's available and negative for one
 *    variable, then release it.  If not available, use first-order Lagarange 
 *    multiplier to test release.  If there is any released variables, STOP the loop.
 *    Otherwise update the inverse of Hessian matrix and gradient for the newly 
 *    released variables and CONTINUE LOOP.<p>
 *
 * 3. Use BFGS formula to update the inverse of Hessian matrix.  Note the 
 *    already-fixed variables must have zeros in the corresponding entries
 *    in the inverse Hessian.<p>  
 *
 * 4. Compute the new (newton) search direction d=H^{-1}*g, where H^{-1} is the 
 *    inverse Hessian and g is the Jacobian.  Note that again, the already-
 *    fixed variables will have zero direction.<p>
 *
 * ENDLOOP<p>
 *
 * A typical usage of this class is to create your own subclass of this class
 * and provide the objective function and gradients as follows:
 * <code>
 * class MyOpt extends Optimization{
 *   // Provide the objective function
 *   protected double objectiveFunction(double[] x){
 *       // How to calculate your objective function...
 *       // ...
 *   }
 *
 *   // Provide the first derivatives
 *   protected double[] evaluateGradient(double[] x){
 *       // How to calculate the gradient of the objective function...
 *       // ...
 *   } 
 *
 *   // If possible, provide the index^{th} row of the Hessian matrix
 *   protected double[] evaluateHessian(double[] x, int index){
 *      // How to calculate the index^th variable's second derivative
 *      // ...
 *   }
 * } 
 *
 * // When it's the time to use it, in some routine(s) of other class...
 * MyOpt opt = new MyOpt();
 * 
 * // Set up initial variable values and bound constraints
 * double[] x = new double[numVariables];
 * // Lower and upper bounds: 1st row is lower bounds, 2nd is upper
 * double[] constraints = new double[2][numVariables];
 * ...
 *
 * // Find the minimum, 200 iterations as default
 * x = opt.findArgmin(x, constraints); 
 * while(x == null){  // 200 iterations are not enough
 *    x = opt.getVarbValues();  // Try another 200 iterations
 *    x = opt.findArgmin(x, constraints);
 * }
 *
 * // The minimal function value
 * double minFunction = opt.getMinFunction();
 * ...
 * </code>  
 * It is recommended that Hessian values be provided so that the second-order 
 * Lagrangian multiplier estimate can be calcluated.  However, if it is not provided, 
 * there is no need to override the <code>evaluateHessian()</code> function.<p>
 *
 * REFERENCES:<br>
 * The whole model algorithm is adapted from Chapter 5 and other related chapters in 
 * Gill, Murray and Wright(1981) "Practical Optimization", Academic Press.
 * and Gill and Murray(1976) "Minimization Subject to Bounds on the Variables", NPL 
 * Report NAC72, while Chong and Zak(1996) "An Introduction to Optimization", 
 * John Wiley & Sons, Inc. provides us a brief but helpful introduction to the method. <p>
 *
 * Dennis and Schnabel(1983) "Numerical Methods for Unconstrained Optimization and 
 * Nonlinear Equations", Prentice-Hall Inc. and Press et al.(1992) "Numeric Recipe in C",
 * Second Edition, Cambridge University Press. are consulted for the polynomial
 * interpolation used in the line search implementation.  <p>
 *
 * The Hessian modification in BFGS update uses Cholesky factorization and two rank-one 
 * modifications:<br>
 * Bk+1 = Bk + (Gk*Gk')/(Gk'Dk) + (dGk*(dGk)'))/[alpha*(dGk)'*Dk]. <br>
 * where Gk is the gradient vector, Dk is the direction vector and alpha is the
 * step rate.  <br>
 * This method is due to Gill, Golub, Murray and Saunders(1974) ``Methods for Modifying 
 * Matrix Factorizations'', Mathematics of Computation, Vol.28, No.126, pp 505-535.
 * <p>
 *
 * @author Xin Xu (xx5@cs.waikato.ac.nz)
 * @version $Revision: 1.1.1.1 $ 
 */
public abstract class Optimization{
    
    protected double m_ALF = 1.0e-4;

    protected double m_BETA = 0.9;    

    protected double m_TOLX = 1.0e-6;
   
    protected double m_STPMX = 100.0;
    
    protected int m_MAXITS = 200;
    
    protected static boolean m_Debug = false;
    
    // function value
    protected double m_f;    
 
    // G'*p
    private double m_Slope;
    
    // Test if zero step in lnsrch
    private boolean m_IsZeroStep = false;
    
    // Used when iteration overflow occurs
    private double[] m_X;
    
    // Compute machine precision
    protected static double m_Epsilon, m_Zero; 
    static {
	m_Epsilon=1.0;
	while(1.0+m_Epsilon > 1.0){
	    m_Epsilon /= 2.0;	    
	}
	m_Epsilon *= 2.0;
	m_Zero = Math.sqrt(m_Epsilon);
	if (m_Debug)
	    System.err.print("Machine precision is "+m_Epsilon+
			     " and zero set to "+m_Zero);
    }
    
    /* 
     * Subclass should implement this procedure to evaluate objective
     * function to be minimized
     * 
     * @param x the variable values
     * @return the objective function value
     */
    protected abstract double objectiveFunction(double[] x);

    /* 
     * Subclass should implement this procedure to evaluate gradient
     * of the objective function
     * 
     * @param x the variable values
     * @return the gradient vector
     */
    protected abstract double[] evaluateGradient(double[] x);

    /* 
     * Subclass is recommended to override this procedure to evaluate second-order
     * gradient of the objective function.  If it's not provided, it returns
     * null.
     *
     * @param x the variables
     * @param index the row index in the Hessian matrix
     * @return one row (the row #index) of the Hessian matrix, null as default
     */
    protected double[] evaluateHessian(double[] x, int index){
	return null;
    }

    /**
     * Get the minimal function value
     *
     * @return minimal function value found
     */
    public double getMinFunction(){ return m_f;}

    /**
     * Set the maximal number of iterations in searching (Default 200)
     *
     * @param it the maximal number of iterations
     */
    public void setMaxIteration(int it){ m_MAXITS=it; }
      
    /**
     * Set whether in debug mode
     *
     * @param db use debug or not
     */
    public void setDebug(boolean db){ m_Debug = db; }
    
    /**
     * Get the variable values.  Only needed when iterations exceeds 
     * the max threshold.
     *
     * @return the current variable values
     */
    public double[] getVarbValues(){ return m_X; }
    
    /**
     * Find a new point x in the direction p from a point xold at which the
     * value of the function has decreased sufficiently, the positive 
     * definiteness of B matrix (approximation of the inverse of the Hessian)
     * is preserved and no bound constraints are violated.  Details see "Numerical 
     * Methods for Unconstrained Optimization and Nonlinear Equations".
     * "Numeric Recipes in C" was also consulted.
     *
     * @param xold old x value 
     * @param gradient gradient at that point
     * @param direct direction vector
     * @param stpmax maximum step length
     * @param isFixed indicating whether a variable has been fixed
     * @param nwsBounds non-working set bounds.  Means these variables are free and
     *                  subject to the bound constraints in this step
     * @param wsBdsIndx index of variables that has working-set bounds.  Means
     *                  these variables are already fixed and no longer subject to
     *                  the constraints
     * @return new value along direction p from xold, null if no step was taken
     * @exception Exception if an error occurs
     */
    public double[] lnsrch(double[] xold, double[] gradient, 
			   double[] direct, double stpmax,
			   boolean[] isFixed, double[][] nwsBounds,
			   FastVector wsBdsIndx)
	throws Exception {
	
	int i, j, k,len=xold.length, 
	    fixedOne=-1; // idx of variable to be fixed
	double alam, alamin; // lambda to be found, and its lower bound
	
	// For convergence and bound test
	double temp,test,alpha=Double.POSITIVE_INFINITY,fold=m_f,sum; 
	
	// For cubic interpolation
	double a,alam2=0,b,disc=0,maxalam=1.0,rhs1,rhs2,tmplam;
	
	double[] x = new double[len]; // New variable values
	
	// Scale the step 
	for (sum=0.0,i=0;i<len;i++){
	    if(!isFixed[i]) // For fixed variables, direction = 0
		sum += direct[i]*direct[i];
	}	
	sum = Math.sqrt(sum);
	
	if (m_Debug)
	    System.err.println("fold:  "+Utils.doubleToString(fold,10,7)+"\n"+
			       "sum:  "+Utils.doubleToString(sum,10,7)+"\n"+
			       "stpmax:  "+Utils.doubleToString(stpmax,10,7));
	if (sum > stpmax){
	    for (i=0;i<len;i++)
		if(!isFixed[i])
		    direct[i] *= stpmax/sum;		
	}
	else
	    maxalam = stpmax/sum;
	
	// Compute initial rate of decrease, g'*d 
	m_Slope=0.0;
	for (i=0;i<len;i++){
	    x[i] = xold[i];
	    if(!isFixed[i])
		m_Slope += gradient[i]*direct[i];
	}
	
	if (m_Debug)
	    System.err.print("slope:  " + Utils.doubleToString(m_Slope,10,7)+ "\n");
	
	// Slope too small
	if(Math.abs(m_Slope)<=m_Zero){
	    if (m_Debug)
		System.err.println("Gradient and direction orthogonal -- "+
				   "Min. found with current fixed variables"+
				   " (or all variables fixed). Try to release"+
				   " some variables now.");
	    return x;
	}
	
	// Err: slope > 0
	if(m_Slope > m_Zero){
	    if(m_Debug)
		for(int h=0; h<x.length; h++)
		    System.err.println(h+": isFixed="+isFixed[h]+", x="+
				       x[h]+", grad="+gradient[h]+", direct="+
				       direct[h]);
	    throw new Exception("g'*p positive! -- Try to debug from here: line 263.");
	}
	
	// Compute LAMBDAmin and upper bound of lambda--alpha
	test=0.0;
	for(i=0;i<len;i++){	    
	    if(!isFixed[i]){// No need for fixed variables
		temp=Math.abs(direct[i])/Math.max(Math.abs(x[i]),1.0);
		if (temp > test) test=temp;
	    }
	}
	
	if(test>m_Zero) // Not converge
	    alamin = m_TOLX/test;
	else{
	    if (m_Debug)
		System.err.println("Zero directions for all free variables -- "+
				   "Min. found with current fixed variables"+
				   " (or all variables fixed). Try to release"+
				   " some variables now.");
	    return x;
	}
		
	// Check whether any non-working-set bounds are "binding"
	for(i=0;i<len;i++){
	    if(!isFixed[i]){// No need for fixed variables
		double alpi;
		if((direct[i]<-m_Epsilon) && !Double.isNaN(nwsBounds[0][i])){//Not feasible
		    alpi = (nwsBounds[0][i]-xold[i])/direct[i];
		    if(alpi <= m_Zero){ // Zero
			if (m_Debug)
			    System.err.println("Fix variable "+i+
					       " to lower bound "+ nwsBounds[0][i]+
					       " from value "+ xold[i]);
			x[i] = nwsBounds[0][i];
			isFixed[i]=true; // Fix this variable
			alpha = 0.0;
			nwsBounds[0][i]=Double.NaN; //Add cons. to working set
			wsBdsIndx.addElement(new Integer(i));
		    }
		    else if(alpha > alpi){ // Fix one variable in one iteration
			alpha = alpi;
			fixedOne = i;
		    }			
		}
		else if((direct[i]>m_Epsilon) && !Double.isNaN(nwsBounds[1][i])){//Not feasible
		    alpi = (nwsBounds[1][i]-xold[i])/direct[i];
		    if(alpi <= m_Zero){ // Zero
			if (m_Debug)
			    System.err.println("Fix variable "+i+
					       " to upper bound "+ nwsBounds[1][i]+
					       " from value "+ xold[i]);
			x[i] = nwsBounds[1][i];
			isFixed[i]=true; // Fix this variable
			alpha = 0.0;
			nwsBounds[1][i]=Double.NaN; //Add cons. to working set
			wsBdsIndx.addElement(new Integer(i));
		    }
		    else if(alpha > alpi){
			alpha = alpi;
			fixedOne = i;
		    }			
		}				
	    }
	}	
	
	if (m_Debug){
	    System.err.println("alamin: " + Utils.doubleToString(alamin,10,7));
	    System.err.println("alpha: " + Utils.doubleToString(alpha,10,7));
	}
	
	if(alpha <= m_Zero){ // Zero	   
	    m_IsZeroStep = true;
	    if (m_Debug)
		System.err.println("Alpha too small, try again");
	    return x;
	}
	
	alam = alpha; // Always try full feasible newton step 
	if(alam > 1.0)
	    alam = 1.0;
	
	// Iteration of one newton step, if necessary, backtracking is done
	double initF=fold, // Initial function value
	    hi=alam, lo=alam, newSlope=0, fhi=m_f, flo=m_f;// Variables used for beta condition
	double[] newGrad;  // Gradient on the new variable values
	
	kloop:
	for (k=0;;k++) {
	    if(m_Debug)
		System.err.println("\nIteration: " + k);
	    
	    for (i=0;i<len;i++){
		if(!isFixed[i]){
		    x[i] = xold[i]+alam*direct[i];  // Compute xnew
		    if(!Double.isNaN(nwsBounds[0][i]) && (x[i]<nwsBounds[0][i])){    
			x[i] = nwsBounds[0][i]; //Rounding error	
		    }
		    else if(!Double.isNaN(nwsBounds[1][i]) && (x[i]>nwsBounds[1][i])){		
			x[i] = nwsBounds[1][i]; //Rounding error	
		    }
		}
	    }
	    
	    m_f = objectiveFunction(x);    // Compute fnew
	    
	    while(Double.isInfinite(m_f)){ // Avoid infinity
		if(m_Debug)
		    System.err.println("Too large m_f.  Shrink step by half.");
		alam *= 0.5; // Shrink by half
		if(alam <= m_Epsilon){
		    if(m_Debug)
			System.err.println("Wrong starting points, change them!");
		    return x;
		}
		
		for (i=0;i<len;i++)
		    if(!isFixed[i])
			x[i] = xold[i]+alam*direct[i]; 
		
		m_f = objectiveFunction(x); 
		initF = Double.POSITIVE_INFINITY;
	    }
	    
	    if(m_Debug) {
		System.err.println("obj. function: " + 
				 Utils.doubleToString(m_f, 10, 7));
		System.err.println("threshold: " + 
				 Utils.doubleToString(fold+m_ALF*alam*m_Slope,10,7));
	    }
	    
	    if(m_f<=fold+m_ALF*alam*m_Slope){// Alpha condition: sufficient function decrease
		if(m_Debug)		
		    System.err.println("Sufficient function decrease (alpha condition): "); 
		newGrad = evaluateGradient(x);
		for(newSlope=0.0,i=0; i<len; i++)
		    if(!isFixed[i])
			newSlope += newGrad[i]*direct[i];

		if(newSlope >= m_BETA*m_Slope){ // Beta condition: ensure pos. defnty.	
		    if(m_Debug)		
			System.err.println("Increasing derivatives (beta condition): "); 	

		    if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over
			if(direct[fixedOne] > 0){
			    x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error
			    nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set
			}
			else{
			    x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error
			    nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set
			}
			
			if(m_Debug)
			    System.err.println("Fix variable "
					       +fixedOne+" to bound "+ x[fixedOne]+
					       " from value "+ xold[fixedOne]);
			isFixed[fixedOne]=true; // Fix the variable
			wsBdsIndx.addElement(new Integer(fixedOne));
		    }		
		    return x;
		}
		else if(k==0){ // First time: increase alam 
		    // Search for the smallest value not complying with alpha condition
		    double upper = Math.min(alpha,maxalam); 
		    if(m_Debug)
			System.err.println("Alpha condition holds, increase alpha... ");
		    while((alam>=upper) || (m_f>fold+m_ALF*alam*m_Slope)){
			lo = alam;
			flo = m_f;
			alam *= 2.0;
			if(alam>=upper)
			    alam=upper;

			for (i=0;i<len;i++)
			    if(!isFixed[i])
				x[i] = xold[i]+alam*direct[i];
			m_f = objectiveFunction(x);
			newGrad = evaluateGradient(x);
			for(newSlope=0.0,i=0; i<len; i++)
			    if(!isFixed[i])
				newSlope += newGrad[i]*direct[i];

			if(newSlope >= m_BETA*m_Slope){
			    if (m_Debug)		
				System.err.println("Increasing derivatives (beta condition): ");

			    if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over
				if(direct[fixedOne] > 0){
				    x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error
				    nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set
				}
				else{
				    x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error
				    nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set
				}
				
				if(m_Debug)
				    System.err.println("Fix variable "
						       +fixedOne+" to bound "+ x[fixedOne]+
						       " from value "+ xold[fixedOne]);
				isFixed[fixedOne]=true; // Fix the variable
				wsBdsIndx.addElement(new Integer(fixedOne));
			    }		 				    
			    return x;
			}
		    }
		    hi = alam;
		    fhi = m_f;			
		    break kloop;
		}
		else{
		    if(m_Debug)
			System.err.println("Alpha condition holds.");
		    hi = alam2; lo = alam; flo = m_f;
		    break kloop;
		}		    
	    }        
	    else if (alam < alamin) { // No feasible lambda found       
		if(initF<fold){ 
		    alam = Math.min(1.0,alpha);
		    for (i=0;i<len;i++)
			if(!isFixed[i])
			    x[i] = xold[i]+alam*direct[i]; //Still take Alpha
		    
		    if (m_Debug)
			System.err.println("No feasible lambda: still take"+
					   " alpha="+alam);
		    
		    if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over
			if(direct[fixedOne] > 0){
			    x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error
			    nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set
			}
			else{
			    x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error
			    nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set
			}
			
			if(m_Debug)
			    System.err.println("Fix variable "
					       +fixedOne+" to bound "+ x[fixedOne]+
					       " from value "+ xold[fixedOne]);
			isFixed[fixedOne]=true; // Fix the variable
			wsBdsIndx.addElement(new Integer(fixedOne));
		    }		 		    
		}
		else{   // Convergence on delta(x)
		    for(i=0;i<len;i++) 
			x[i]=xold[i];
		    m_f=fold;
		    if (m_Debug)
			System.err.println("Cannot find feasible lambda"); 
		}
		
		return x; 
	    }
	    else { // Backtracking by polynomial interpolation
		if(k==0){ // First time backtrack: quadratic interpolation
		    if(!Double.isInfinite(initF))
			initF = m_f;		    
		    // lambda = -g'(0)/(2*g''(0))
		    tmplam = -0.5*alam*m_Slope/((m_f-fold)/alam-m_Slope);
		}
		else {    // Subsequent backtrack: cubic interpolation 
		    rhs1 = m_f-fold-alam*m_Slope;
		    rhs2 = fhi-fold-alam2*m_Slope;
		    a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
		    b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
		    if (a == 0.0) tmplam = -m_Slope/(2.0*b);
		    else {
			disc=b*b-3.0*a*m_Slope;
			if (disc < 0.0) disc = 0.0;
			tmplam=(-b+Math.sqrt(disc))/(3.0*a);
		    }
		    if (m_Debug)
			System.err.print("newstuff: \n" + 
					 "a:   " + Utils.doubleToString(a,10,7)+ "\n" +
					 "b:   " + Utils.doubleToString(b,10,7)+ "\n" +
					 "disc:   " + Utils.doubleToString(disc,10,7)+ "\n" +
					 "tmplam:   " + tmplam + "\n" +
					 "alam:   " + Utils.doubleToString(alam,10,7)+ "\n");	
		    if (tmplam>0.5*alam)
			tmplam=0.5*alam;             // lambda <= 0.5*lambda_old
		}
	    }
	    alam2=alam;
	    fhi=m_f;
	    alam=Math.max(tmplam,0.1*alam);          // lambda >= 0.1*lambda_old
	    
	    if(alam>alpha){
		throw new Exception("Sth. wrong in lnsrch:"+
				    "Lambda infeasible!(lambda="+alam+
				    ", alpha="+alpha+", upper="+tmplam+
				    "|"+(-alpha*m_Slope/(2.0*((m_f-fold)/alpha-m_Slope)))+
				    ", m_f="+m_f+", fold="+fold+
				    ", slope="+m_Slope);
	    }	    
	} // Endfor(k=0;;k++)
	
	// Quadratic interpolation between lamda values between lo and hi.
	// If cannot find a value satisfying beta condition, use lo.
	double ldiff = hi-lo, lincr;
	if(m_Debug)
	    System.err.println("Last stage of searching for beta condition (alam between "
			       +Utils.doubleToString(lo,10,7)+" and "
			       +Utils.doubleToString(hi,10,7)+")...");
	while((newSlope<m_BETA*m_Slope) && (ldiff>=alamin)){
	    lincr = -0.5*newSlope*ldiff*ldiff/(fhi-flo-newSlope*ldiff);
	    if(lincr<0.2*ldiff) lincr=0.2*ldiff;
	    alam = lo+lincr;

	    for (i=0;i<len;i++)
		if(!isFixed[i])
		    x[i] = xold[i]+alam*direct[i];
	    m_f = objectiveFunction(x);

	    if(m_f>fold+m_ALF*alam*m_Slope){ 
		// Alpha condition fails, shrink lambda_upper
		ldiff = lincr;
		fhi = m_f;
	    }	    
	    else{ // Alpha condition holds	    
		newGrad = evaluateGradient(x);
		for(newSlope=0.0,i=0; i<len; i++)
		    if(!isFixed[i])
			newSlope += newGrad[i]*direct[i];
		
		if(newSlope < m_BETA*m_Slope){
		    // Beta condition fails, shrink lambda_lower
		    lo = alam;
		    ldiff -= lincr;
		    flo = m_f;
		}
	    }
	} 
	
	if(newSlope < m_BETA*m_Slope){ // Cannot satisfy beta condition, take lo
	    if(m_Debug)
		System.err.println("Beta condition cannot be satisfied, take alpha condition");
	    alam=lo;
	    for (i=0;i<len;i++)
		if(!isFixed[i])
		    x[i] = xold[i]+alam*direct[i];
	    m_f = flo;
	}
	else if(m_Debug)
	    System.err.println("Both alpha and beta conditions are satisfied. alam="
			       +Utils.doubleToString(alam,10,7));
	
	if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over
	    if(direct[fixedOne] > 0){
		x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error
		nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set
	    }
	    else{
		x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error
		nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set
	    }
	    
	    if(m_Debug)
		System.err.println("Fix variable "
				   +fixedOne+" to bound "+ x[fixedOne]+
				   " from value "+ xold[fixedOne]);
	    isFixed[fixedOne]=true; // Fix the variable
	    wsBdsIndx.addElement(new Integer(fixedOne));
	}
	
	return x;
    }
    
    /**
     * Main algorithm.  Descriptions see "Practical Optimization"
     *
     * @param initX initial point of x, assuming no value's on the bound!
     * @param constraints the bound constraints of each variable
     *                    constraints[0] is the lower bounds and 
     *                    constraints[1] is the upper bounds
     * @return the solution of x, null if number of iterations not enough
     * @exception Exception if an error occurs
     */
    public double[] findArgmin(double[] initX, double[][] constraints) 
	throws Exception{
	int l = initX.length;
	
	// Initially all variables are free, all bounds are constraints of
	// non-working-set constraints
	boolean[] isFixed = new boolean[l];
	double[][] nwsBounds = new double[2][l];
	// Record indice of fixed variables, simply for efficiency
	FastVector wsBdsIndx = new FastVector(); 
	// Vectors used to record the variable indices to be freed 	
	FastVector toFree=null, oldToFree=null;	

	// Initial value of obj. function, gradient and inverse of the Hessian
	m_f = objectiveFunction(initX);
	double sum=0;
	double[] grad=evaluateGradient(initX), oldGrad, oldX,
	    deltaGrad=new double[l], deltaX=new double[l],
	    direct = new double[l], x = new double[l];
	Matrix L = new Matrix(l, l),// Lower triangle of Cholesky factor 
	    D = new Matrix(l, l);   // Diagonal of Cholesky factor
	for(int i=0; i<l; i++){
	    L.setRow(i, new double[l]);
	    L.setElement(i,i,1.0);
	    D.setRow(i, new double[l]);
	    D.setElement(i,i,1.0);
	    direct[i] = -grad[i];
	    sum += grad[i]*grad[i];
	    x[i] = initX[i];
	    nwsBounds[0][i] = constraints[0][i];
	    nwsBounds[1][i] = constraints[1][i];
	    isFixed[i] = false;
	}	
	double stpmax = m_STPMX*Math.max(Math.sqrt(sum), l);
	
	iterates:
	for(int step=0; step < m_MAXITS; step++){
	    if (m_Debug)
		System.err.println("\nIteration # " + step + ":");	    
	    
	    // Try at most one feasible newton step, i.e. 0<lamda<=alpha
	    oldX = x;
	    oldGrad = grad;
	    
	    // Also update grad
	    if (m_Debug)
		System.err.println("Line search ... ");
	    m_IsZeroStep = false;
	    x=lnsrch(x, grad, direct, stpmax, 
		     isFixed, nwsBounds, wsBdsIndx);
	    if (m_Debug)
		System.err.println("Line search finished.");
	    
	    if(m_IsZeroStep){ // Zero step, simply delete rows/cols of D and L
		for(int f=0; f<wsBdsIndx.size(); f++){
		    int idx=((Integer)wsBdsIndx.elementAt(f)).intValue();
		    L.setRow(idx, new double[l]);
		    L.setColumn(idx, new double[l]);
		    D.setElement(idx, idx, 0.0);
		}		
		grad = evaluateGradient(x);
		step--;
	    }
	    else{
		// Check converge on x
		boolean finish = false;
		double test=0.0;
		for(int h=0; h<l; h++){
		    deltaX[h] = x[h]-oldX[h];
		    double tmp=Math.abs(deltaX[h])/
			Math.max(Math.abs(x[h]), 1.0);
		    if(tmp > test) test = tmp;				    
		}
		if(test < m_Zero){
		    if (m_Debug)
			System.err.println("\nDeltaX converge: "+test);
		    finish = true;
		}
		
		// Check zero gradient	    
		grad = evaluateGradient(x);
		test=0.0;
		double denom=0.0, dxSq=0.0, dgSq=0.0, newlyBounded=0.0; 
		for(int g=0; g<l; g++){
		    if(!isFixed[g]){ 		   
			deltaGrad[g] = grad[g] - oldGrad[g];		  
			// Calculate the denominators			    
			denom += deltaX[g]*deltaGrad[g];
			dxSq += deltaX[g]*deltaX[g];
			dgSq += deltaGrad[g]*deltaGrad[g];
		    }
		    else // Only newly bounded variables will be non-zero
			newlyBounded +=  deltaX[g]*(grad[g]-oldGrad[g]);
		    
		    // Note: CANNOT use projected gradient for testing 
		    // convergence because of newly bounded variables
		    double tmp = Math.abs(grad[g])*
			Math.max(Math.abs(direct[g]),1.0)/
			Math.max(Math.abs(m_f),1.0);
		    if(tmp > test) test = tmp;	
		}
		
		if(test < m_Zero){
		    if (m_Debug)
			System.err.println("Gradient converge: "+test);
		    finish = true;
		}	    
		
		// dg'*dx could be < 0 using inexact lnsrch
		if(m_Debug)
		    System.err.println("dg'*dx="+(denom+newlyBounded));	
		// dg'*dx = 0
		if(Math.abs(denom+newlyBounded) < m_Zero)
		    finish = true;
		
		int size = wsBdsIndx.size();
		boolean isUpdate = true;  // Whether to update BFGS formula	    
		// Converge: check whether release any current constraints
		if(finish){
		    if (m_Debug)
			System.err.println("Test any release possible ...");
		    	
		    if(toFree != null)
			oldToFree = (FastVector)toFree.copy();
		    toFree = new FastVector();
		    
		    for(int m=size-1; m>=0; m--){
			int index=((Integer)wsBdsIndx.elementAt(m)).intValue();
			double[] hessian = evaluateHessian(x, index);			
			double deltaL=0.0;
			if(hessian != null){
			    for(int mm=0; mm<hessian.length; mm++)
				if(!isFixed[mm]) // Free variable
				    deltaL += hessian[mm]*direct[mm];
			}
			
			// First and second order Lagrangian multiplier estimate
			// If user didn't provide Hessian, use first-order only
			double L1, L2;
			if(x[index] >= constraints[1][index]) // Upper bound
			    L1 = -grad[index];
			else if(x[index] <= constraints[0][index])// Lower bound
			    L1 = grad[index];
			else
			    throw new Exception("x["+index+"] not fixed on the"+
						" bounds where it should have been!");
			
			// L2 = L1 + deltaL
			L2 = L1 + deltaL;			
			if (m_Debug)
			    System.err.println("Variable "+index+
					       ": Lagrangian="+L1+"|"+L2);
			
			//Check validity of Lagrangian multiplier estimate
			boolean isConverge = 
			    (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1),
							      Math.abs(L2));  
			if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid
			    if(L2 < 0.0){// Negative Lagrangian: feasible
				toFree.addElement(new Integer(index));
				wsBdsIndx.removeElementAt(m);
				finish=false; // Not optimal, cannot finish
			    }
			}
			
			// Although hardly happen, better check it
			// If the first-order Lagrangian multiplier estimate is wrong,
			// avoid zigzagging
			if((hessian==null) && equal(toFree, oldToFree)) 
			    finish = true;           
		    }
		    
		    if(finish){// Min. found
			if (m_Debug)
			    System.err.println("Minimum found.");
			m_f = objectiveFunction(x);
			return x;
		    }
		    
		    // Free some variables
		    for(int mmm=0; mmm<toFree.size(); mmm++){
			int freeIndx=((Integer)toFree.elementAt(mmm)).intValue();
			isFixed[freeIndx] = false; // Free this variable
			if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound
			    nwsBounds[0][freeIndx] = constraints[0][freeIndx];
			    if (m_Debug)
				System.err.println("Free variable "+freeIndx+
						   " from bound "+ 
						   nwsBounds[0][freeIndx]);
			}
			else{ // Upper bound
			    nwsBounds[1][freeIndx] = constraints[1][freeIndx];
			    if (m_Debug)
				System.err.println("Free variable "+freeIndx+
						   " from bound "+ 
						   nwsBounds[1][freeIndx]);
			}			
			L.setElement(freeIndx, freeIndx, 1.0);
			D.setElement(freeIndx, freeIndx, 1.0);
			isUpdate = false;			
		    }			
		}
		
		if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){
		    if (m_Debug) 
			System.err.println("dg'*dx negative!");
		    isUpdate = false; // Do not update		    
		}		
		// If Hessian will be positive definite, update it
		if(isUpdate){
		    double[] v= new double[l];  
		    Matrix[] result;
	    
		    // modify once: dg*dg'/(dg'*dx)	
		    double coeff = 1.0/denom; // 1/(dg'*dx)	
		    result = updateCholeskyFactor(L,D,deltaGrad,coeff,isFixed);
		    
		    // modify twice: g*g'/(g'*p)	
		    coeff = 1.0/m_Slope; // 1/(g'*p)
		    result=updateCholeskyFactor
			(result[0],result[1],oldGrad,coeff,isFixed);  
		    
		    L = result[0];
		    D = result[1];
		}
	    }
	    
	    // Find new direction 
	    Matrix LD = new Matrix(l,l); // L*D
	    double[] b = new double[l];
	    
	    for(int k=0; k<l; k++){
		if(!isFixed[k])  b[k] = -grad[k];
		else             b[k] = 0.0;
		
		for(int j=k; j<l; j++){ // Lower triangle	
		    if(!isFixed[j] && !isFixed[k])
			LD.setElement(j, k, L.getElement(j,k)*D.getElement(k,k));
		}		
	    }	    	
	    
	    // Solve (LD)*y = -g, where y=L'*direct
	    double[] LDIR = solveTriangle(LD, b, true, isFixed);
	    
	    for(int m=0; m<LDIR.length; m++){
		if(Double.isNaN(LDIR[m]))
		    throw new Exception("L*direct["+m+"] is NaN!"
					+"|-g="+b[m]+"|"+isFixed[m]
					+"|diag="+D.getElement(m,m));
	    }
	    
	    // Solve L'*direct = y
	    direct = solveTriangle(L, LDIR, false, isFixed);
	    for(int m=0; m<direct.length; m++){
		if(Double.isNaN(direct[m]))
		    throw new Exception("direct is NaN!");
	    }
	    
	    System.gc();
	}
	
	if(m_Debug)
	    System.err.println("Cannot find minimum"+
			       " -- too many interations!");
	m_X = x;
	return null;
    }
    
    /** 
     * Solve the linear equation of TX=B where T is a triangle matrix
     * It can be solved using back/forward substitution, with O(N^2) 
     * complexity
     * @param t the matrix T
     * @param b the vector B
     * @param isLower whether T is a lower or higher triangle matrix
     * @param isZero which row(s) of T are not used when solving the equation. 
     *               If it's null or all 'false', then every row is used.
     * @return the solution of X
     */     
    public static double[] solveTriangle(Matrix t, double[] b, 
					 boolean isLower, boolean[] isZero){
	int n = b.length; 
	double[] result = new double[n];
	if(isZero == null)
	    isZero = new boolean[n];
	
	if(isLower){ // lower triangle, forward-substitution
	    int j = 0;
	    while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row
	    
	    if(j<n){
		result[j] = b[j]/t.getElement(j,j);
		
		for(; j<n; j++){
		    if(!isZero[j]){
			double numerator=b[j];
			for(int k=0; k<j; k++)
			    numerator -= t.getElement(j,k)*result[k];
			result[j] = numerator/t.getElement(j,j);
		    }
		    else 
			result[j] = 0.0;
		}
	    }
	}
	else{ // Upper triangle, back-substitution
	    int j=n-1;
	    while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row
	    
	    if(j>=0){
		result[j] = b[j]/t.getElement(j,j);
		
		for(; j>=0; j--){
		    if(!isZero[j]){
			double numerator=b[j];
			for(int k=j+1; k<n; k++)
			    numerator -= t.getElement(k,j)*result[k];
			result[j] = numerator/t.getElement(j,j);
		    }
		    else 
			result[j] = 0.0;
		}
	    }
	}
	
	return result;
    }

    /**
     * One rank update of the Cholesky factorization of B matrix in BFGS updates,
     * i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle
     * matrix and D is a diagonal matrix, and v is a vector.<br>
     * When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described
     * in ``Methods for Modifying Matrix Factorizations'' 
     *
     * @param L the unit triangle matrix L
     * @param D the diagonal matrix D
     * @param v the update vector v
     * @param coeff the coeffcient of update
     * @param isFixed which variables are not to be updated
     * @return the updated L and D
     */    
    protected Matrix[] updateCholeskyFactor(Matrix L, Matrix D, 
					    double[] v, double coeff,
					    boolean[] isFixed)
	throws Exception{
	double t, p, b;
	int n = v.length;
	double[] vp =  new double[n];
	Matrix dBar = new Matrix(n, n), lBar = new Matrix(n, n);	
	for (int i=0; i<v.length; i++)
	    if(!isFixed[i])
		vp[i]=v[i];
	    else
		vp[i]=0.0;
	
	if(coeff>0.0){
	    t = coeff;	    
	    for(int j=0; j<n; j++){		
		if(isFixed[j]) continue;
		
		lBar.setElement(j, j, 1.0); // Unit triangle
		
		p = vp[j];
		double d=D.getElement(j,j), dbarj=d+t*p*p;
		dBar.setElement(j, j, dbarj);
		
		b = p*t/dbarj;
		t *= d/dbarj;
		for(int r=j+1; r<n; r++){
		    if(!isFixed[r]){
			double l=L.getElement(r, j);
			vp[r] -= p*l;
			lBar.setElement(r, j, l+b*vp[r]);
		    }
		    else
		    	lBar.setElement(r, j, 0.0);
		}
	    }
	}
	else{
	    double[] P = solveTriangle(L, v, true, isFixed);	    
	    t = 0.0;
	    for(int i=0; i<n; i++)
		if(!isFixed[i])
		    t += P[i]*P[i]/D.getElement(i,i);	    	
	    
	    double sqrt=1.0+coeff*t;
	    sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt);
	    
	    double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta;
	    
	    for(int j=0; j<n; j++){
		if(isFixed[j]) continue;
		
		lBar.setElement(j, j, 1.0); // Unit triangle
		
		double d=D.getElement(j,j);
		p = P[j]*P[j]/d;
		theta = 1.0+sigma*p;
		t -= p; 
		if(t<0.0) t=0.0; // Rounding error

		double plus = sigma*sigma*p*t;
		if((j<n-1) && (plus <= m_Zero)) 
		    plus=m_Zero; // Avoid rounding error
		rho = theta*theta + plus;		
		dBar.setElement(j, j, rho*d);
		
		if(Double.isNaN(dBar.getElement(j,j))){
		    throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+
					",t="+t+",p="+p+",sigma="+sigma+
					",sclar="+coeff);
		}
		
		b=alpha*P[j]/(rho*d);
		alpha /= rho;
		rho = Math.sqrt(rho);
		double sigmaOld = sigma;
		sigma *= (1.0+rho)/(rho*(theta+rho));	 
		if((j<n-1) && 
		   (Double.isNaN(sigma) || Double.isInfinite(sigma)))
		    throw new Exception("sigma NaN/Inf! rho="+rho+
				       ",theta="+theta+",P["+j+"]="+
				       P[j]+",p="+p+",d="+d+",t="+t+
				       ",oldsigma="+sigmaOld);
		
		for(int r=j+1; r<n; r++){
		    if(!isFixed[r]){
			double l=L.getElement(r, j);
			vp[r] -= P[j]*l;
			lBar.setElement(r, j, l+b*vp[r]);
		    }
		    else
		    	lBar.setElement(r, j, 0.0);
		}
	    }
	}
	
	Matrix[] rt = new Matrix[2];
	rt[0] = lBar; rt[1] = dBar;
	return rt;
    }

    /**
     * Check whether the two integer vectors equal to each other
     * Two integer vectors are equal if all the elements are the 
     * same, regardless of the order of the elements
     *
     * @param a one integer vector
     * @param b another integer vector
     * @return whether they are equal
     */ 
    private boolean equal(FastVector a, FastVector b){
	if((a==null) || (b==null) || (a.size()!=b.size()))
	    return false;
	
	int size=a.size();
	// Store into int arrays
	int[] ia=new int[size], ib=new int[size];
	for(int i=0;i<size;i++){
	    ia[i] = ((Integer)a.elementAt(i)).intValue();
	    ib[i] = ((Integer)b.elementAt(i)).intValue();
	}
	// Only values matter, order does not matter
	int[] sorta=Utils.sort(ia), sortb=Utils.sort(ib);
	for(int j=0; j<size;j++)
	    if(ia[sorta[j]] != ib[sortb[j]])
		return false;
	
	return true;
    }
}