/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.MIL.Diverse_Density.Optimization; import java.util.ArrayList; /** * Optimization auxiliary methods */ public abstract class Optimization { // /////////////////////////////////////////////////////////////// // ---------------------------------------------------- Properties // /////////////////////////////////////////////////////////////// protected double ALFA = 1.0e-4; protected double BETA = 0.9; protected double TOLX = 1.0e-6; protected double STPMX = 100.0; protected int maxIterations = 200; protected double minFunction; protected double slope; protected boolean alphaLEQZero = false; protected double[] varValues; protected double epsilon, zero; // /////////////////////////////////////////////////////////////// // --------------------------------------------------- Constructor // /////////////////////////////////////////////////////////////// public Optimization() { epsilon=1.0; while(1.0+epsilon > 1.0){ epsilon /= 2.0; } epsilon *= 2.0; zero = Math.sqrt(epsilon); } ///////////////////////////////////////////////////////////////// // --------------------------------------------- Abstract methods ///////////////////////////////////////////////////////////////// protected abstract double evaluate(double[] x) throws Exception; protected abstract double[] gradient(double[] x) throws Exception; ///////////////////////////////////////////////////////////////// // ----------------------------------------------- Public methods ///////////////////////////////////////////////////////////////// public double getEpsilon() { return epsilon; } public double getZero() { return zero; } public double getMinFunction() { return minFunction; } public double[] getVarValues() { return varValues; } public double[] find(double[] xold, double[] gradient, double[] directionVector, double maxStep, boolean[] isFixed, double[][] nonworkingSetBounds, ArrayList<Integer> workingSetBoundIndex) throws Exception { int length=xold.length, fixedOne=-1; double lambda, minLambda; double temp,test,alpha = Double.POSITIVE_INFINITY; double fold=minFunction; double tmp1,lambda2=0,tmp2,disc=0,maxLambda=1.0,tmpLambda; double[] newvarValues = new double[length]; double sum = 0.0, newSlope = 0.0; for(int i = 0; i < length; i++) if(!isFixed[i]) sum += directionVector[i]*directionVector[i]; sum = Math.sqrt(sum); if(sum > maxStep){ for(int i = 0; i < length; i++) if(!isFixed[i]) directionVector[i] *= maxStep/sum; } else maxLambda = maxStep/sum; slope = 0.0; for(int i = 0; i < length; i++){ newvarValues[i] = xold[i]; if(!isFixed[i]) slope += gradient[i]*directionVector[i]; } if(Math.abs(slope) <= zero) return newvarValues; test = 0.0; for(int i = 0; i < length; i++){ if(!isFixed[i]){ temp = Math.abs(directionVector[i])/Math.max(Math.abs(newvarValues[i]),1.0); if (temp > test) test = temp; } } if(test > zero) minLambda = TOLX/test; else return newvarValues; for(int i = 0; i < length; i++) { if(!isFixed[i]) { double aux; if((directionVector[i] < -epsilon) && !Double.isNaN(nonworkingSetBounds[0][i])) { aux = (nonworkingSetBounds[0][i]-xold[i])/directionVector[i]; if(aux <= zero) { newvarValues[i] = nonworkingSetBounds[0][i]; isFixed[i]=true; alpha = 0.0; nonworkingSetBounds[0][i]=Double.NaN; workingSetBoundIndex.add(i); } else if(alpha > aux){ alpha = aux; fixedOne = i; } } else if((directionVector[i] > epsilon) && !Double.isNaN(nonworkingSetBounds[1][i])) { aux = (nonworkingSetBounds[1][i]-xold[i])/directionVector[i]; if(aux <= zero) { newvarValues[i] = nonworkingSetBounds[1][i]; isFixed[i]=true; alpha = 0.0; nonworkingSetBounds[1][i]=Double.NaN; workingSetBoundIndex.add(i); } else if(alpha > aux){ alpha = aux; fixedOne = i; } } } } if(alpha <= zero){ alphaLEQZero = true; return newvarValues; } lambda = alpha; if(lambda > 1.0) lambda = 1.0; double init=fold, hi=lambda, lamdaOld=lambda, high=minFunction, low=minFunction; double[] newGrad; for (int k = 0; ;k++) { for (int i = 0; i < length;i++) { if(!isFixed[i]) { newvarValues[i] = xold[i]+lambda*directionVector[i]; if(!Double.isNaN(nonworkingSetBounds[0][i]) && (newvarValues[i]<nonworkingSetBounds[0][i])) newvarValues[i] = nonworkingSetBounds[0][i]; else if(!Double.isNaN(nonworkingSetBounds[1][i]) && (newvarValues[i]>nonworkingSetBounds[1][i])) newvarValues[i] = nonworkingSetBounds[1][i]; } } minFunction = evaluate(newvarValues); while(Double.isInfinite(minFunction)) { lambda *= 0.5; if(lambda <= epsilon) return newvarValues; for (int i = 0; i < length; i++) if(!isFixed[i]) newvarValues[i] = xold[i]+lambda*directionVector[i]; minFunction = evaluate(newvarValues); init = Double.POSITIVE_INFINITY; } if(minFunction <= fold + ALFA*lambda*slope) { newGrad = gradient(newvarValues); newSlope = 0.0; for(int i=0; i < length; i++) if(!isFixed[i]) newSlope += newGrad[i]*directionVector[i]; if(newSlope >= BETA*slope) { if((fixedOne!=-1) && (lambda>=alpha)) { if(directionVector[fixedOne] > 0){ newvarValues[fixedOne] = nonworkingSetBounds[1][fixedOne]; nonworkingSetBounds[1][fixedOne]=Double.NaN; } else{ newvarValues[fixedOne] = nonworkingSetBounds[0][fixedOne]; nonworkingSetBounds[0][fixedOne]=Double.NaN; } isFixed[fixedOne]=true; workingSetBoundIndex.add(fixedOne); } return newvarValues; } else if(k==0) { double upper = Math.min(alpha,maxLambda); while(!((lambda>=upper) || (minFunction>fold+ALFA*lambda*slope))) { lamdaOld = lambda; low = minFunction; lambda *= 2.0; if(lambda >= upper) lambda = upper; for (int i = 0; i < length;i++) if(!isFixed[i]) newvarValues[i] = xold[i]+lambda*directionVector[i]; minFunction = evaluate(newvarValues); newGrad = gradient(newvarValues); newSlope = 0.0; for(int i = 0; i < length; i++) if(!isFixed[i]) newSlope += newGrad[i]*directionVector[i]; if(newSlope >= BETA*slope) { if((fixedOne!=-1) && (lambda>=alpha)) { if(directionVector[fixedOne] > 0){ newvarValues[fixedOne] = nonworkingSetBounds[1][fixedOne]; nonworkingSetBounds[1][fixedOne]=Double.NaN; } else{ newvarValues[fixedOne] = nonworkingSetBounds[0][fixedOne]; nonworkingSetBounds[0][fixedOne]=Double.NaN; } isFixed[fixedOne]=true; workingSetBoundIndex.add(fixedOne); } return newvarValues; } } hi = lambda; high = minFunction; break; } else { hi = lambda2; lamdaOld = lambda; low = minFunction; break; } } else if (lambda < minLambda) { if(init<fold) { lambda = Math.min(1.0,alpha); for (int i = 0; i < length; i++) if(!isFixed[i]) newvarValues[i] = xold[i]+lambda*directionVector[i]; if((fixedOne!=-1) && (lambda>=alpha)) { if(directionVector[fixedOne] > 0){ newvarValues[fixedOne] = nonworkingSetBounds[1][fixedOne]; nonworkingSetBounds[1][fixedOne]=Double.NaN; } else{ newvarValues[fixedOne] = nonworkingSetBounds[0][fixedOne]; nonworkingSetBounds[0][fixedOne]=Double.NaN; } isFixed[fixedOne]=true; workingSetBoundIndex.add(fixedOne); } } else{ for(int i = 0; i < length;i++) newvarValues[i]=xold[i]; minFunction=fold; } return newvarValues; } else { if(k==0) { if(!Double.isInfinite(init)) init = minFunction; tmpLambda = -0.5*lambda*slope/((minFunction-fold)/lambda-slope); } else { tmp1 = ((minFunction-fold-lambda*slope)/(lambda*lambda)-(high-fold-lambda2*slope)/(lambda2*lambda2))/(lambda-lambda2); tmp2 = (-lambda2*(minFunction-fold-lambda*slope)/(lambda*lambda)+lambda*(high-fold-lambda2*slope)/(lambda2*lambda2))/(lambda-lambda2); if (tmp1 == 0.0) tmpLambda = -slope/(2.0*tmp2); else { disc=tmp2*tmp2-3.0*tmp1*slope; if (disc < 0.0) disc = 0.0; double numerator = -tmp2+Math.sqrt(disc); if(numerator >= Double.MAX_VALUE) numerator = Double.MAX_VALUE; tmpLambda=numerator/(3.0*tmp1); } if (tmpLambda>0.5*lambda) tmpLambda=0.5*lambda; } } lambda2=lambda; high=minFunction; lambda=Math.max(tmpLambda,0.1*lambda); } double diff = hi-lamdaOld, incr; while((newSlope < BETA*slope) && (diff >= minLambda)) { incr = -0.5*newSlope*diff*diff/(high-low-newSlope*diff); if(incr < 0.2*diff) incr = 0.2*diff; lambda = lamdaOld+incr; if(lambda >= hi){ lambda=hi; incr=diff; } for(int i = 0; i < length;i++) if(!isFixed[i]) newvarValues[i] = xold[i]+lambda*directionVector[i]; minFunction = evaluate(newvarValues); if(minFunction>fold+ALFA*lambda*slope){ diff = incr; high = minFunction; } else { newGrad = gradient(newvarValues); newSlope = 0.0; for(int i = 0; i < length; i++) if(!isFixed[i]) newSlope += newGrad[i]*directionVector[i]; if(newSlope < BETA*slope){ lamdaOld = lambda; diff -= incr; low = minFunction; } } } if(newSlope < BETA*slope) { lambda=lamdaOld; for (int i = 0; i < length; i++) if(!isFixed[i]) newvarValues[i] = xold[i]+lambda*directionVector[i]; minFunction = low; } if((fixedOne!=-1) && (lambda>=alpha)) { if(directionVector[fixedOne] > 0){ newvarValues[fixedOne] = nonworkingSetBounds[1][fixedOne]; nonworkingSetBounds[1][fixedOne]=Double.NaN; } else{ newvarValues[fixedOne] = nonworkingSetBounds[0][fixedOne]; nonworkingSetBounds[0][fixedOne]=Double.NaN; } isFixed[fixedOne]=true; workingSetBoundIndex.add(fixedOne); } return newvarValues; } @SuppressWarnings("unchecked") public double[] minimum(double[] initPoint, double[][] constraints) throws Exception { int length = initPoint.length; boolean[] isFixed = new boolean[length]; double[][] nonworkingSetBounds = new double[2][length]; ArrayList<Integer> workingSetBoundsIndex = new ArrayList<Integer>(constraints.length); ArrayList<Integer> list=null, prevList=null; minFunction = evaluate(initPoint); double sum=0; double[] diagonal = new double[length]; double[] gradient = gradient(initPoint), prevGradient, prevX; double[] deltaGradient=new double[length], deltaX=new double[length], direct = new double[length], vector = new double[length]; double[][] lowerTriangle = new double[length][length]; for(int i=0; i < length; i++) { lowerTriangle[i][i] = 1.0; diagonal[i] = 1.0; direct[i] = -gradient[i]; sum += gradient[i]*gradient[i]; vector[i] = initPoint[i]; nonworkingSetBounds[0][i] = constraints[0][i]; nonworkingSetBounds[1][i] = constraints[1][i]; isFixed[i] = false; } double maxStep = STPMX*Math.max(Math.sqrt(sum), length); for(int i = 0; i < maxIterations; i++) { prevX = vector; prevGradient = gradient; alphaLEQZero = false; vector = find(vector, gradient, direct, maxStep, isFixed, nonworkingSetBounds, workingSetBoundsIndex); if(alphaLEQZero) { for(int f=0; f<workingSetBoundsIndex.size(); f++) diagonal[workingSetBoundsIndex.get(f)] = 0.0; gradient = gradient(vector); i--; } else { boolean finish = false; double test=0.0; for(int j = 0; j < length; j++) { deltaX[j] = vector[j]-prevX[j]; double tmp=Math.abs(deltaX[j])/Math.max(Math.abs(vector[j]), 1.0); if(tmp > test) test = tmp; } if(test < zero) finish = true; gradient = gradient(vector); test=0.0; double res=0.0, deltaXvalue=0.0, deltaGradientvalue=0.0, newlyBounded=0.0; for(int j = 0; j < length; j++) { if(!isFixed[j]) { deltaGradient[j] = gradient[j] - prevGradient[j]; res += deltaX[j]*deltaGradient[j]; deltaXvalue += deltaX[j]*deltaX[j]; deltaGradientvalue += deltaGradient[j]*deltaGradient[j]; } else newlyBounded += deltaX[j]*(gradient[j]-prevGradient[j]); double tmp = Math.abs(gradient[j])*Math.max(Math.abs(direct[j]),1.0)/Math.max(Math.abs(minFunction),1.0); if(tmp > test) test = tmp; } if(test < zero) finish = true; if(Math.abs(res+newlyBounded) < zero) finish = true; int size = workingSetBoundsIndex.size(); boolean isUpdate = true; if(finish) { if(list != null) prevList = (ArrayList<Integer>) list.clone(); list = new ArrayList<Integer>(workingSetBoundsIndex.size()); for(int j = size-1; j >= 0; j--) { int index=workingSetBoundsIndex.get(j); double deltaL=0.0; double L1 = 0.0, L2; if(vector[index] >= constraints[1][index]) L1 = -gradient[index]; else if(vector[index] <= constraints[0][index]) L1 = gradient[index]; L2 = L1 + deltaL; boolean isConverge = (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1), Math.abs(L2)); if((L1*L2>0.0) && isConverge) { if(L2 < 0.0){ list.add(index); workingSetBoundsIndex.remove(j); finish=false; } } if(list != null && list.equals(prevList)) finish = true; } if(finish) { minFunction = evaluate(vector); return vector; } for(int mmm=0; mmm<list.size(); mmm++) { int freeIndx=list.get(mmm); isFixed[freeIndx] = false; if(vector[freeIndx] <= constraints[0][freeIndx]) nonworkingSetBounds[0][freeIndx] = constraints[0][freeIndx]; else nonworkingSetBounds[1][freeIndx] = constraints[1][freeIndx]; lowerTriangle[freeIndx][freeIndx] = 1.0; diagonal[freeIndx] = 1.0; isUpdate = false; } } if(res<Math.max(zero*Math.sqrt(deltaXvalue)*Math.sqrt(deltaGradientvalue), zero)) isUpdate = false; if(isUpdate) { double coeff = 1.0/res; updateCholeskyFactor(lowerTriangle,diagonal,deltaGradient,coeff,isFixed); coeff = 1.0/slope; updateCholeskyFactor(lowerTriangle,diagonal,prevGradient,coeff,isFixed); } } double[][] LD = new double[length][length]; double[] b = new double[length]; for(int k=0; k<length; k++) { if(!isFixed[k]) b[k] = -gradient[k]; else b[k] = 0.0; for(int j=k; j<length; j++) if(!isFixed[j] && !isFixed[k]) LD[j][k] = lowerTriangle[j][k]*diagonal[k]; } double[] LDIR = solveTriangle(LD, b, true, isFixed); LD = null; direct = solveTriangle(lowerTriangle, LDIR, false, isFixed); } varValues = vector; return null; } ///////////////////////////////////////////////////////////////// // -------------------------------------------- Protected methods ///////////////////////////////////////////////////////////////// protected double[] solveTriangle(double[][] t, double[] b, boolean isLower, boolean[] isZero) throws Exception { int n = b.length; double[] result = new double[n]; if(isZero == null) isZero = new boolean[n]; if(isLower) { int j = 0; while((j<n)&&isZero[j]){result[j]=0.0; j++;} if(j<n) { result[j] = b[j]/t[j][j]; for(; j<n; j++) { if(!isZero[j]) { double numerator=b[j]; for(int k=0; k<j; k++) numerator -= t[j][k]*result[k]; result[j] = numerator/t[j][j]; } else result[j] = 0.0; } } } else { int j=n-1; while((j>=0)&&isZero[j]){result[j]=0.0; j--;} if(j>=0) { result[j] = b[j]/t[j][j]; for(; j>=0; j--) { if(!isZero[j]) { double numerator=b[j]; for(int k=j+1; k<n; k++) numerator -= t[k][j]*result[k]; result[j] = numerator/t[j][j]; } else result[j] = 0.0; } } } return result; } protected void updateCholeskyFactor(double[][] unitTriangleMatrix, double[] diagonal, double[] updateVector, double updateCoeffcient, boolean[] isFixed) throws Exception { double tmp1, tmp2, tmp3; int length = updateVector.length; double[] vector = new double[length]; for (int i = 0; i < updateVector.length; i++) if(!isFixed[i]) vector[i] = updateVector[i]; else vector[i] = 0.0; if(updateCoeffcient>0.0) { tmp1 = updateCoeffcient; for(int i = 0; i < length; i++) { if(isFixed[i]) continue; tmp2 = vector[i]; double diagonalValue = diagonal[i]; diagonal[i] = diagonalValue + tmp1*tmp2*tmp2; tmp3 = tmp2*tmp1/diagonal[i]; tmp1 *= diagonalValue/diagonal[i]; for(int j = i+1; j < length; j++) { if(!isFixed[j]){ double l=unitTriangleMatrix[j][i]; vector[j] -= tmp2*l; unitTriangleMatrix[j][i] = l+tmp3*vector[j]; } else unitTriangleMatrix[j][i] = 0.0; } } } else { double[] tri = solveTriangle(unitTriangleMatrix, updateVector, true, isFixed); tmp1 = 0.0; for(int i=0; i<length; i++) if(!isFixed[i]) tmp1 += tri[i]*tri[i]/diagonal[i]; double sqrt = updateCoeffcient*tmp1 + 1.0; sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt); double alpha=updateCoeffcient, sigma=updateCoeffcient/(1.0+sqrt), omega, kappa; for(int i = 0; i < length; i++) { if(isFixed[i]) continue; double diagonalValue = diagonal[i]; tmp2 = tri[i]*tri[i]/diagonalValue; kappa = 1.0+sigma*tmp2; tmp1 -= tmp2; if(tmp1 < 0.0) tmp1=0.0; double plus = sigma*sigma*tmp2*tmp1; if((i < length-1) && plus <= zero) plus = zero; omega = kappa*kappa + plus; diagonal[i] = omega*diagonalValue; tmp3=alpha*tri[i]/(omega*diagonalValue); alpha /= omega; omega = Math.sqrt(omega); sigma *= (1.0+omega)/(omega*(kappa+omega)); for(int j = i+1; j < length; j++) { if(!isFixed[j]){ double low = unitTriangleMatrix[j][i]; vector[j] -= tri[i]*low; unitTriangleMatrix[j][i] = low+tmp3*vector[j]; } else unitTriangleMatrix[j][i] = 0.0; } } } } }