/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Luciano S�nchez (University of Oviedo) 27/02/2004
* @author Modified by Enrique A. de la Cal (University of Oviedo) 13/12/2008
* @version 1.0
* @since JDK1.4
* </p>
*/
package keel.Algorithms.Shared.ClassicalOptim;
import org.core.*;
public class ConjGradNN {
/**
* <p>
* <pre>
* Optimized Classificator/Model by Conjugated Gradient.
* Also this class is a container for a perceptron neural network and implements the training methods:
* * Conjugated Gradient: conjugatedGradient.
* * Descendent Gradient: descentGradient.
*
*
* Input-Layer Hidden Layer-i x nLayers Output-Layer
* -
* | I H
* | I H -
* | I H O |
* nInputs | I H O | nOutputs
* | I H O |
* | I H O |
* | I H -
* | I H
* -
* <pre>
* </p>
*/
//Random seed generator
static Randomize r;
//Number of Layers
int nLayers;
//Number of Inputs
int nInputs;
//Number of Outputs
int nOutputs;
//Number of elements in each hidden layer
int nElements[];
//Weights. Dimension 1 (Layer number), Dimensions 2 and 3 (bi-dimensional grid of neurons)
double [][][] weights;
//Calculated input for each hidden layer
double [][] input;
//Calculated output for each hidden layer
double [] output;
//Gradient error
double [][] delta;
//Error gradient
double [][][] gradf;
//A vector with the difference between max_y and min_y, used for scaling the output
double [] factor;
//Maximum of each input
double[] max_x;
//Minimum of each input
double[] min_x;
//Maximum of each output
double[] max_y;
//Minimum of each output
double[] min_y;
//Input examples
double [][] Input;
//Expected output
double [][] Output;
/**
* <p>
* Constructor for a perceptron neural network from its basic elements.
*
* </p>
* @param vNelement topology. Number of neurons by hidden layer.
* @param vInput input examples.
* @param vOutput expected outputs.
* @param pr Random generator.
*/
public ConjGradNN(
int vNelement[], // Topology
double [][] vInput, double [][] vOutput, // Data
Randomize pr
) {
r=pr;
Input=duplicate(vInput);
Output=duplicate(vOutput);
nInputs=vInput[0].length; // Number of inputs
nOutputs=vOutput[0].length; // Number of outputs
nElements=vNelement; // Number of elements in each hidder layer
nLayers=nElements.length; // Number of hidden layers
weights=new double[nLayers+1][][]; // Weight matrix
gradf=new double[nLayers+1][][]; // Error gradient
input=new double[nLayers+1][];
output=new double[nOutputs];
delta=new double[nLayers+1][];
int i,nInputsAux,j;
for (i=0;i<nLayers;i++) {
weights[i]=new double[nElements[i]][];
gradf[i]=new double[nElements[i]][];
if (i==0) nInputsAux=nInputs; else nInputsAux=weights[i-1].length;
input[i]=new double[nInputsAux+1];
delta[i]=new double[nElements[i]];
for (j=0;j<weights[i].length;j++) {
weights[i][j]=new double[nInputsAux+1];
gradf[i][j]=new double[nInputsAux+1];
}
}
weights[i]=new double[nOutputs][];
gradf[i]=new double[nOutputs][];
if (nLayers==0) {
for (j=0;j<weights[i].length;j++) {
weights[i][j]=new double[nInputs+1];
gradf[i][j]=new double[nInputs+1];
}
input[i]=new double[nInputs+1];
} else {
for (j=0;j<weights[i].length;j++) {
weights[i][j]=new double[weights[i-1].length+1];
gradf[i][j]=new double[weights[i-1].length+1];
}
input[i]=new double[weights[i-1].length+1];
}
delta[i]=new double[nOutputs];
factor=new double[nOutputs];
max_x=new double[nInputs];
min_x=new double[nInputs];
max_y=new double[nOutputs];
min_y=new double[nOutputs];
for (i=0;i<factor.length;i++) { factor[i]=1; }
for (i=0;i<nInputs;i++) { max_x[i]=1; min_x[i]=0; }
for (i=0;i<nOutputs;i++) { max_y[i]=1; min_y[i]=0; }
scale();
}
/**
* <p>
* calculates the numerical gradient of error function f with weights x using the numerical method based on the tangent calculus.
*
* </p>
* @param f the error function for neural network.
* @param x the weights to evaluate
* @return the gradient for neural network with weights x using the numerical method based on the tangent calculus
*/
private double[][][] numericalGradient(FUN f, double x[][][]) {
// To test gradient calculus subroutine it's estimated f(x) gradient
double h=0.001f;
double[][][] result,fplus,fminus,copyx;
result=new double[x.length][x[0].length][x[0][0].length];
fplus=new double[x.length][x[0].length][x[0][0].length];
fminus=new double[x.length][x[0].length][x[0][0].length];
copyx=new double[x.length][x[0].length][x[0][0].length];
for (int i=0;i<x.length;i++)
for (int j=0;j<x[i].length;j++)
for (int k=0;k<x[i][j].length;k++) copyx[i][j][k]=x[i][j][k];
for (int i=0;i<x.length;i++)
for (int j=0;j<x[i].length;j++)
for (int k=0;k<x[i][j].length;k++) {
copyx[i][j][k]+=h; fplus[i][j][k]=f.evaluate(copyx);
copyx[i][j][k]-=(2*h); fminus[i][j][k]=f.evaluate(copyx);
copyx[i][j][k]=x[i][j][k];
result[i][j][k]=(fplus[i][j][k]-fminus[i][j][k])/(2*h);
}
return result;
}
/**
* <p>
* Copies the size elements of the vector org to dst.
*
* </p>
* @param org the vector copy.
* @param size the number of elements to copy.
* @param dst the destinity vector.
*/
private void copy(double org[], int size, double dst[]) {
for (int i=0;i<size;i++) dst[i]=org[i];
}
/**
* <p>
*
* Creates and returns a copy of vector x.
* *
* </p>
* @param x the vector to be copied.
* @return a clone of vector x.
*/
double[] duplicate(double x[]) {
double r[]=new double[x.length];
for (int i=0;i<x.length;i++) { r[i]=x[i]; }
return r;
}
/**
* <p>
*
* Creates and returns a copy of vector x.
* *
* </p>
* @param x the vector to be copied.
* @return a clone of vector x.
*/
double[][] duplicate(double x[][]) {
double r[][]=new double[x.length][];
for (int i=0;i<x.length;i++) {
r[i]=new double[x[i].length];
for (int j=0;j<x[i].length;j++) { r[i][j]=x[i][j]; }
}
return r;
}
/**
* <p>
*
* Creates and returns a copy of vector x.
* *
* </p>
* @param x the vector to be copied.
* @return a clone of vector x.
*/
double[][][] duplicate(double x[][][]) {
double r[][][]=new double[x.length][][];
for (int i=0;i<x.length;i++) {
r[i]=new double[x[i].length][];
for (int j=0;j<r[i].length;j++) {
r[i][j]=new double[x[i][j].length];
for (int k=0;k<r[i][j].length;k++) r[i][j][k]=x[i][j][k];
}
}
return r;
}
/**
* <p>
* Returns a hyperbolic tangent of x.
* </p>
* @param x value in range [-1,+1].
* @return a hyperbolic tangent of x.
*/
private double hTan(double x) {
if (x<-100) return -1;
if (x>100) return 1;
return (double)(Math.exp(x)-Math.exp(-x))/(double)(Math.exp(x)+Math.exp(-x));
}
/**
* <p>
* Returns a sigmoid function of x.
* </p>
* @param x value in range [-1,+1].
* @return a sigmoid function of x.
*/
private double hTanp(double x) {
if (x<-100) return 0;
if (x>100) return 0;
return 4/Math.pow((Math.exp(x)+Math.exp(-x)),2);
}
/**
* <p>
* Calculates the gradient for a neural network x
*
* </p>
* @param f error function for evaluate neural network with weights x
* @param x weights to evaluate
* @return the gradient for each weight x
*/
private double[][][] gradient(FUN f, double x[][][]) {
int i,j,k,l,m;
double [][][] GRADE=duplicate(gradf);
for (k=0;k<Input.length;k++) {
// Forward run
//example k is copied to input[0]
copy(Input[k],Input[k].length,input[0]);
input[0][input[0].length-1]=1;
//for each hidden layer (x.length-1)
for (i=0;i<x.length-1;i++) {
//for each neuron in current layer i
for (j=0;j<x[i].length;j++) {
//input for neuron i+1,j is calculated
input[i+1][j]=OPV.multiply(x[i][j],input[i]);
//sigmoid function is calculated for unit i,j
delta[i][j]=hTanp(input[i+1][j]);
}
//Hyperbolic tangent is applied for each neuron in next layer
for (j=0;j<input[i+1].length-1;j++)
input[i+1][j]=hTan(input[i+1][j]);
input[i+1][input[i+1].length-1]=1;
}
//Now the output for the last hidden layer (i+1) is calculated
for (j=0;j<output.length;j++)
output[j]=OPV.multiply(x[i][j],input[i]);
//And the error for the last layer is measured
for (j=0;j<delta[i].length;j++) delta[i][j]=output[j]-Output[k][j];
// backguard run
for (i=x.length-2;i>=0;i--) {
for (j=0;j<delta[i].length;j++) {
double suma=0;
for (l=0;l<delta[i+1].length;l++) {
suma+=delta[i+1][l]*x[i+1][l][j];
}
delta[i][j]*=suma;
}
}
// Gradient for example k
for (i=0;i<gradf.length;i++) {
for (j=0;j<gradf[i].length;j++) {
for (m=0;m<gradf[i][j].length; m++) {
gradf[i][j][m]=2*delta[i][j]*input[i][m]/Input.length;
}
}
}
if (k==0) {
GRADE=duplicate(gradf);
} else {
GRADE=OPV.sum(GRADE,gradf);
}
}
return GRADE;
}
/**
* <p>
* Prints to standard output N-tier Neural Network x
*
* </p>
* @param x the example to print
*/
public void sample(double x[][][]) {
for (int i=0;i<x.length;i++)
for (int j=0;j<x[i].length;j++)
for (int k=0;k<x[i][j].length;k++) System.out.print(x[i][j][k]+" ");
}
/**
* <p>
* Calculates the output of a perceptron with weights W for input x
*
* </p>
* @param x the example to give the perceptron
* @param W the weights of the perceptron
* @return the output of perceptron with weights W for input x
*/
public double[] nn(double x[], double W[][][]) {
// Last layer has linear activation
copy(x,x.length,input[0]);
input[0][input[0].length-1]=1;
int i,j;
for (i=0;i<W.length-1;i++) {
for (j=0;j<W[i].length;j++)
input[i+1][j]=OPV.multiply(W[i][j],input[i]);
for (j=0;j<input[i+1].length-1;j++)
input[i+1][j]=hTan(input[i+1][j]);
input[i+1][input[i+1].length-1]=1;
}
for (j=0;j<output.length;j++)
output[j]=OPV.multiply(W[i][j],input[i]);
return output;
}
/**
* <p>
* Returns the mean square error of a perceptron with weights x for all the examples Input
*
* </p>
* @param x the example to give the perceptron
* @return the mean square error of the perceptron x output respect the expected output Output
*/
public double f(double x[][][]) {
// Mean square error
double RMS=0;
for (int i=0;i<Input.length;i++) {
double error[]=OPV.subtract(nn(Input[i],x),Output[i]);
RMS+=OPV.multiply(error,error);
}
// Mean square error
return RMS/Input.length;
}
/**
* <p>
* Returns the denormalized mean square error of a perceptron with weights x for all the examples Input
*
* </p>
*
* @param x the example to give the perceptron.
* @param FACTOR a vector with difference between the max and min value for each output
* @return the denormalized mean square error of the perceptron x output respect the expected output Output
*/
public double f_denormalized(double x[][][], double FACTOR[]) {
// Mean square error
double RMS=0;
for (int i=0;i<Input.length;i++) {
double error[]=OPV.subtract(nn(Input[i],x),Output[i]);
for (int j=0;j<error.length;j++) error[j]*=FACTOR[j];
RMS+=OPV.multiply(error,error);
}
// Mean square error
return RMS/Input.length;
}
/**
* <p>
* Returns a random value in the range [low, high].
*
* </p>
* @param low
* @param high
* @return
*/
private double rnd(double low, double high) {
// random value between 0 and 1
return r.Rand()*(high-low)+low;
}
/**
* <p>
* Scales the input examples values and expected output valued
*
* </p>
*/
public void scale() {
// Data are scaled
for (int i=0;i<Input.length;i++) {
if (i==0) {
max_x=duplicate(Input[i]); max_y=duplicate(Output[i]);
min_x=duplicate(Input[i]); min_y=duplicate(Output[i]);
} else {
max_x=OPV.maximum(max_x,Input[i]);
max_y=OPV.maximum(max_y,Output[i]);
min_x=OPV.minimum(min_x,Input[i]);
min_y=OPV.minimum(min_y,Output[i]);
}
}
// Proportional factor for examples
for (int i=0;i<Input.length;i++)
Input[i]=OPV.scale(Input[i],max_x,min_x);
for (int i=0;i<Input.length;i++)
Output[i]=OPV.scale(Output[i],max_y,min_y);
// Proportional factor between scaled and not scaled error
for (int i=0;i<factor.length;i++) factor[i]=max_y[i]-min_y[i];
}
/**
* <p>
* Returns the mean square error of the output perceptron calculated with Conjugated Gradient training algorithm.
*
* </p>
* @param f the error function.
* @param TOL_ERR the stop error.
* @param MIN_DELTAGC is not used.
* @param MAX_ITER number of maximum iteratations.
* @return the mean square error of the output perceptron calculated with Conjugated Gradient training algorithm.
*/
public double conjugatedGradient(FUN f, double TOL_ERR, double MIN_DELTAGC, int MAX_ITER) {
int NVAR=0;
double last_err=0,err=0;
for (int i=0;i<weights.length;i++) NVAR+=weights[i].length*weights[i][0].length;
int iter=0,subiter=0;
double alfa=0;
double x[][][]=weights;
double d[][][], gr[][][], g_old[][][];
double xbus[][][], dbus[][][];
// Conjugated Gradient Algorithm is run
boolean restart=true;
boolean debug=false;
g_old = gradient(f,x);
d=duplicate(g_old);
do {
if (debug) {
System.out.println("Debug: X="+AString(x));
System.out.println("Debug: g_old="+AString(g_old));
}
gr=gradient(f,x);
if (restart) {
d=OPV.signChange(gr);
restart=false;
subiter=0;
} else {
double beta=(OPV.multiply(OPV.subtract(gr,g_old),gr))/
OPV.multiply(g_old,g_old);
d=OPV.subtract(OPV.multiply(beta,d),gr);
}
double ngr=Math.sqrt(OPV.multiply(gr,gr));
double dgr=Math.sqrt(OPV.multiply(d,d));
if (debug) System.out.println("...1");
xbus=duplicate(x);
dbus=OPV.multiply(1.0/dgr,duplicate(d));
if (debug) System.out.println("...2");
LinearSearchBrent BL = new LinearSearchBrent(f,dbus,xbus);
if (debug) System.out.println("...3");
alfa=BL.minimumSearch(r);
if (debug) System.out.println("...4");
x=OPV.sum(x,OPV.multiply(alfa,dbus));
weights=x;
g_old=duplicate(gr);
iter++;
subiter++;
if (subiter>=NVAR) restart=true;
if (debug) System.out.println("...5");
err=f_denormalized(x,factor);
if (alfa<1e-4*dgr) {
// restart=true; // Gradient direction if there're problems
// System.out.println("Restart");
System.out.println("return: alpha<"+(1e-4*dgr)+"="+alfa);
break;
}
last_err=err;
} while (Math.sqrt(OPV.multiply(gr,gr))>TOL_ERR*gr.length
&& iter<MAX_ITER);
return err;
}
/**
* <p>
* Returns the mean square error of the output perceptron calculated with Descendent Gradient training algorithm.
*
* <p>
* @param f the error function.
* @param TOL_ERR the stop error.
* @param MIN_DELTAGC is not used.
* @param MAX_ITER number of maximum iteratations.
* @return the mean square error of the output perceptron calculated with Descendent Gradient training algorithm.
*/
public double descentGradient(FUN f, double TOL_ERR, double MIN_DELTAGC, int MAX_ITER) {
int NVAR=0;
double last_err=0,err=0;
for (int i=0;i<weights.length;i++) NVAR+=weights[i].length*weights[i][0].length;
int iter=0,subiter=0;
double alpha=0;
double x[][][]=weights;
double d[][][], gr[][][], g_old[][][];
double xbus[][][], dbus[][][];
// Gradient Descent
boolean restart=true;
boolean debug=false;
g_old = gradient(f,x);
d=duplicate(g_old);
do {
gr=gradient(f,x);
d=OPV.signChange(gr);
xbus=duplicate(x);
dbus=duplicate(d);
LinearSearchBrent BL = new LinearSearchBrent(f,dbus,xbus);
alpha=BL.minimumSearch(r);
x=OPV.sum(x,OPV.multiply(alpha,dbus));
weights=x;
g_old=duplicate(gr);
iter++;
err=f_denormalized(x,factor);
if (alpha<0.0001) {
System.out.println("return: alpha < 0.0001");
break;
}
} while (Math.sqrt(OPV.multiply(gr,gr))>TOL_ERR*gr.length
&& iter<MAX_ITER);
return err;
}
/**
* <p>
* Initializes the matrix of weights with random valued in the range [-x,x].
*
* </p>
*
* @param x the lower/upper limit for random values.
*/
public void randomWeights(double x) {
for (int i=0;i<weights.length;i++)
for (int j=0;j<weights[i].length;j++)
for (int k=0;k<weights[i][j].length;k++)
weights[i][j][k]=rnd(-x,x);
}
/**
* <p>
* Updates the matrix of weights with the addition of random valued in the range [-x,x].
*
* </p>
* @param x the lower/upper limit for random values.
*/
public void changeWeights(double x) {
for (int i=0;i<weights.length;i++)
for (int j=0;j<weights[i].length;j++)
for (int k=0;k<weights[i][j].length;k++)
weights[i][j][k]+=rnd(-x,x);
}
/**
* <p>
* Returns a printable version of x.
*
* </p>
*
* @return a String with a printable version of x.
*/
private String AString(double x[]) {
String result="[";
for (int i=0;i<x.length-1;i++) result+=(x[i]+" ");
result+=x[x.length-1];
result+="]";
return result;
}
/**
* <p>
* Returns a printable version of x.
*
* </p>
*
* @return a String with a printable version of x.
*/
private String AString(double x[][]) {
String result="";
for (int i=0;i<x.length;i++) result+=AString(x[i]);
return result;
}
/**
* <p>
* Returns a printable version of x.
*
* </p>
*
* @return a String with a printable version of x.
*/
private String AString(double x[][][]) {
String result="";
for (int i=0;i<x.length;i++) result+=AString(x[i]);
return result;
}
/**
* <p>
* Prints to standard output the main information about the training algorithm run:
* -the matrix of weights
* -the original input examples (not scaled)
* -the original obtained output (not scaled)
* -the original expected output (not scaled)
*
* </p>
*
* @return a String with a printable version of x.
*/
public void debugOutput() {
double x[],y[],d[];
System.out.println("Weight="+AString(weights));
for (int i=0;i<Input.length;i++) {
x=OPV.invScale(Input[i],max_x,min_x);
y=OPV.invScale(nn(Input[i],weights),max_y,min_y);
d=OPV.invScale(Output[i],max_y,min_y);
System.out.println(AString(x)+" "+AString(y)+" "+AString(d));
}
}
/**
* <p>
* Returns a copy of weights in vector p.
*
* </p>
*
* @param p an output parameter to obtaing a copy of weights.
*/
public void getWeights(double []p) {
int total=0;
for (int i=0;i<weights.length;i++)
for (int j=0;j<weights[i].length;j++)
for (int k=0;k<weights[i][j].length;k++) p[total++]=weights[i][j][k];
}
/**
* <p>
* Copy the weights contained in p to the weights matrix.
*
* </p>
*
* @param p an output parameter to obtaing a copy of weights.
*/
public void setWeights(double p[]) {
int total=0;
for (int i=0;i<weights.length;i++)
for (int j=0;j<weights[i].length;j++)
for (int k=0;k<weights[i][j].length;k++) weights[i][j][k]=(double)p[total++];
}
/**
* Return the weights as a String to print them
* @return the weights in String format
*/
public String getWeightsAsString(){
return AString(weights);
}
}