/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Luciano S�nchez (University of Oviedo) 27/02/2004 * @author Modified by Enrique A. de la Cal (University of Oviedo) 13/12/2008 * @version 1.0 * @since JDK1.4 * </p> */ package keel.Algorithms.Shared.ClassicalOptim; import org.core.*; public class ConjGradQUAD { /** * <p> * <pre> * Quadratic optimized Classificator/Model by Conjugated Gradient. * Also this class is a container for a perceptron neural network and implements the training methods: * * Conjugated Gradient: conjugatedGradient. * * Descendent Gradient: descentGradient. * * * Input-Layer Hidden Layer-i x nLayers Output-Layer * - * | I H * | I H - * | I H O | * nInputs | I H O | nOutputs * | I H O | * | I H O | * | I H - * | I H * - * <pre> * </p> */ //Number of layers int nLayers; //Number of inputs int nInputs; //Number of outputs int nOutputs; //Number of elements in each hidden layer int nElements[]; //Input examples double inputs[][]; //Expected output double outputs[][]; //Random seed generator static Randomize r; /** * <p> * Constructor for a perceptron neural network from its basic elements. * * </p> * @param vInput input examples. * @param vOutput expected outputs. * @param pr Random generator. */ public ConjGradQUAD( double [][] vInput, double [][] vOutput, Randomize pr // Datos ) { // Class Initializator r=pr; inputs=duplicate(vInput); outputs=duplicate(vOutput); nInputs=vInput[0].length; // Number of inputs nOutputs=vOutput[0].length; // Number of outputs System.out.println("Entradas="+nInputs+" Salidas="+nOutputs); // First index: number of outputs // Second index: number of inputs + 1 = row concat(A,B) // Third index: number of columns A and B // WEIGHTS = new double[salida[0].length][entrada[0].length+1][entrada[0].length]; } /** * <p> * calculates the numerical gradient of error function f with weights x using the numerical method based on the tangent calculus. * * </p> * @param f the error function for neural network. * @param x the weights to evaluate * @return the gradient for neural network with weights x using the numerical method based on the tangent calculus */ double[][][] numericalGradient(FUN f,double x[][][]) { // To test gradient calculus subrutine it's estimated f(x) gradient double h=0.1f; double[][][] result,fplus,fminus,copyx; result=new double[x.length][x[0].length][x[0][0].length]; fplus=new double[x.length][x[0].length][x[0][0].length]; fminus=new double[x.length][x[0].length][x[0][0].length]; copyx=new double[x.length][x[0].length][x[0][0].length]; for (int i=0;i<x.length;i++) for (int j=0;j<x[i].length;j++) for (int k=0;k<x[i][j].length;k++) { copyx[i][j][k]=x[i][j][k]; } for (int i=0;i<x.length;i++) for (int j=0;j<x[i].length;j++) for (int k=0;k<x[i][j].length;k++) { copyx[i][j][k]+=h; fplus[i][j][k]=f.evaluate(copyx); copyx[i][j][k]-=(2*h); fminus[i][j][k]=f.evaluate(copyx); copyx[i][j][k]=x[i][j][k]; result[i][j][k]=(fplus[i][j][k]-fminus[i][j][k])/(2*h); } return result; } /** * <p> * Copies the size elements of the vector org to dst. * * </p> * @param org the vector copy. * @param size the number of elements to copy. * @param dst the destinity vector. */ private void copy(double org[], int size, double dst[]) { for (int i=0;i<size;i++) dst[i]=org[i]; } /** * <p> * * Creates and returns a copy of vector x. * * * </p> * @param x the vector to be copied. * @return a clone of vector x. */ public static double[] duplicate(double x[]) { double r[]=new double[x.length]; for (int i=0;i<x.length;i++) { r[i]=x[i]; } return r; } /** * <p> * * Creates and returns a copy of vector x. * * * </p> * @param x the vector to be copied. * @return a clone of vector x. */ public static double[][] duplicate(double x[][]) { double r[][]=new double[x.length][]; for (int i=0;i<x.length;i++) { r[i]=new double[x[i].length]; for (int j=0;j<x[i].length;j++) { r[i][j]=x[i][j]; } } return r; } /** * <p> * * Creates and returns a copy of vector x. * * * </p> * @param x the vector to be copied. * @return a clone of vector x. */ public static double[][][] duplicate(double x[][][]) { double r[][][]=new double[x.length][][]; for (int i=0;i<x.length;i++) { r[i]=new double[x[i].length][]; for (int j=0;j<r[i].length;j++) { r[i][j]=new double[x[i][j].length]; for (int k=0;k<r[i][j].length;k++) r[i][j][k]=x[i][j][k]; } } return r; } /** * <p> * Calculates the quadratic model gradient for a neural network x * * </p> * @param f error function for evaluate neural network with weights x * @param x weights to evaluate * @return the quadratic gradient for each weight x */ double[][][] gradient(FUN f, double x[][][]) { boolean debug=false; // Quadratic Model Gradient double G[][][]=new double [x.length][x[0].length][x[0][0].length]; double y[][]=new double[outputs.length][outputs[0].length]; for (int k=0;k<inputs.length;k++) { y[k]=quadraticModelOutput(inputs[k],x); } for (int s=0;s<x.length;s++) { int tmp=x[s].length-1; for (int alfa=0;alfa<tmp;alfa++) { for (int beta=0;beta<tmp;beta++) { G[s][alfa][beta]=0; for (int k=0;k<inputs.length;k++) { G[s][alfa][beta]+=2*(outputs[k][s]-y[k][s]) *(inputs[k][alfa]) *(inputs[k][beta]); } } } for (int beta=0;beta<tmp;beta++) { G[s][tmp][beta]=0; for (int k=0;k<inputs.length;k++) { G[s][tmp][beta]+=2*(outputs[k][s]-y[k][s]) *(inputs[k][beta]); } } } for (int i=0;i<G.length;i++) for (int j=0;j<G[i].length;j++) for (int k=0;k<G[i][j].length;k++) G[i][j][k]*=-1.0/inputs.length; if (debug) { System.out.println("Gradiente="+AString(G)); double H[][][]=numericalGradient(f,x); System.out.println("Gradiente numerico="+AString(H)); } return G; } /** * <p> * Returns the output of the perceptron with weights W for input example x. * * </p> * * @param x an input example * @param W the weights of a perceptron. * @return the output of the perceptron with weights W for input example x. */ public double[] quadraticModelOutput(double x[], double W[][][]) { // Quadratic Model output double [] sal=new double[W.length]; for (int s=0;s<W.length;s++) { int tmp=W[s].length-1; // Number of inputs sal[s]=0; for (int i=0;i<tmp;i++) { double v=0; for (int j=0;j<tmp;j++) v+=x[j]*W[s][i][j]; sal[s]+=v*x[i]; } for (int i=0;i<W[s][tmp].length;i++) { sal[s]+=W[s][tmp][i]*x[i]; } } return sal; } /** * <p> * Returns a random value in the range [low, high]. * * </p> * @param low * @param high * @return */ private double rnd(double low, double high) { // random value between 0 and 1 return r.Rand()*(high-low)+low; } /** * <p> * Returns the mean square error of the output perceptron calculated with Quadratic Conjugated Gradient training algorithm. * * </p> * @param f the error function. * @param TOL_ERR the stop error. * @param MIN_DELTAGC is not used. * @param MAX_ITER number of maximum iteratations. * @return the mean square error of the output perceptron calculated with Quadratic Conjugated Gradient training algorithm. */ double[][][] conjugatedGradient(FUN ferr, double TOL_ERR, double MIN_DELTAGC, int MAX_ITER) { System.out.println("Dentro "+MIN_DELTAGC); int NVAR=0; double last_err=0,err=0; double x[][][] = new double[outputs[0].length][inputs[0].length+1][inputs[0].length]; randomWeights(x,1); System.out.println("X inicial="+AString(x)); for (int i=0;i<x.length;i++) NVAR+=x[i].length*x[i][0].length; int iter=0,subiter=0; double alpha=0; double d[][][], gr[][][], g_old[][][]; double xSearch[][][], dSearch[][][]; // Conjugated Gradient Algorithm is run boolean restart=true; boolean debug=false; g_old = gradient(ferr,x); d=duplicate(g_old); do { if (debug) { System.out.println("X="+AString(x)); System.out.println("g_old="+AString(g_old)); } gr=gradient(ferr,x); if (restart) { d=OPV.signChange(gr); restart=false; subiter=0; } else { double beta=(OPV.multiply(OPV.subtract(gr,g_old),gr))/ OPV.multiply(g_old,g_old); d=OPV.subtract(OPV.multiply(beta,d),gr); } double dgr=Math.sqrt(OPV.multiply(d,d)); xSearch=duplicate(x); dSearch=OPV.multiply(1.0/dgr,duplicate(d)); LinearSearchBrent BL = new LinearSearchBrent(ferr,dSearch,xSearch); alpha=BL.minimumSearch(r); if (alpha==0) { restart=true; // Gradient direction if there're problems System.out.println("Restart"); } else { x=OPV.sum(x,OPV.multiply(alpha,dSearch)); g_old=duplicate(gr); iter++; subiter++; if (subiter>=NVAR) restart=true; } err=ferr.evaluate(x); System.out.println("Iteracion="+(iter/NVAR)+ " alfa="+alpha+" ECM="+ err+ " Norma del gradiente "+ dgr); last_err=err; if (alpha<1e-4*dgr) { // restart=true; // Gradient direction if there're problems // System.out.println("Restart"); System.out.println("return: alpha<"+(1e-4*dgr)+"="+alpha); break; } } while (Math.sqrt(OPV.multiply(gr,gr))>TOL_ERR*gr.length && iter<MAX_ITER); return x; } /** * <p> * Initializes the matrix of weights with random valued in the range [-x,x]. * * </p> * * @param x the lower/upper limit for random values. */ void randomWeights(double[][][]weights, double x) { for (int i=0;i<weights.length;i++) for (int j=0;j<weights[i].length;j++) for (int k=0;k<weights[i][j].length;k++) weights[i][j][k]=rnd(-x,x); } /** * <p> * Returns a printable version of x. * * </p> * * @return a String with a printable version of x. */ String AString(double x[]) { String result="["; for (int i=0;i<x.length-1;i++) result+=(x[i]+" "); result+=x[x.length-1]; result+="]"; return result; } /** * <p> * Returns a printable version of x. * * </p> * * @return a String with a printable version of x. */ String AString(double x[][]) { String result=""; for (int i=0;i<x.length;i++) result+=AString(x[i]); return result; } /** * <p> * Returns a printable version of x. * * </p> * * @return a String with a printable version of x. */ String AString(double x[][][]) { String result=""; for (int i=0;i<x.length;i++) result+=AString(x[i]); return result; } }