/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Neural_Networks.IRPropPlus_Clas;
import keel.Algorithms.Neural_Networks.NNEP_Clas.neuralnet.NeuralNetClassifier;
import keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.Link;
import keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.LinkedLayer;
import keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.LinkedNeuron;
/**
* <p>
* @author Written by Alfonso Carlos Martinez Estudillo (University of Cordoba) 5/11/2007
* @author Written by Pedro Antonio Gutiérrez Peña (University of Crodoba) 5/11/2007
* @version 0.1
* @since JDK1.5
* </p>
*/
public class MSEOptimizableSigmNeuralNetClassifier extends NeuralNetClassifier implements IOptimizableFunc{
/**
* <p>
* Sigmoid Neural Net with only a hidden layer and multiple outputs (classifier)
* Prepared for optimizing MSE.
* </p>
*/
/////////////////////////////////////////////////////////////////
// --------------------------------------------------- Properties
/////////////////////////////////////////////////////////////////
/** Wrapped algorithm */
protected double lastError = 0;
/////////////////////////////////////////////////////////////////
// ----------------------------------------------- Public methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Returns the initial value of a[], that is, the coefficients of
* the model
* B01 B02 ... B0(J-1) [W11 W12 ... B11 B12 B1(J-1)]* ...
*
* @return double array of initial coefficients values
* </p>
*/
public double[] getCoefficients() {
int inputs = inputLayer.getMaxnofneurons();
int outputs = outputLayer.getMaxnofneurons();
int hiddenNeurons = this.getNofhneurons();
int params_node = (inputs+1) + outputs;
int params = (hiddenNeurons * params_node) + outputs;
double [] a = new double[params];
LinkedLayer hl = this.hiddenLayers.get(0);
LinkedLayer ol = this.outputLayer;
// For each neuron in hidden layer
for(int i=0; i<hl.getNofneurons(); i++){
LinkedNeuron hn = hl.getNeuron(i);
Link links[] = hn.getLinks();
int baseIndex = outputs+i*(params_node);
for(int j=0; j<links.length; j++)
if(!links[j].isBroken())
a[baseIndex+j] = links[j].getWeight();
else
a[baseIndex+j] = 0;
// For each neuron in output layer
baseIndex += inputs+1;
for(int j=0; j<ol.getNofneurons(); j++){
LinkedNeuron on = ol.getNeuron(j);
Link outputLinks[] = on.getLinks();
if(!outputLinks[i].isBroken())
a[baseIndex+j] = outputLinks[i].getWeight();
else
a[baseIndex+j] = 0;
}
}
// Bias weights
if(ol.isBiased())
for(int j=0; j<ol.getNofneurons(); j++){
LinkedNeuron on = ol.getNeuron(j);
Link outputLinks[] = on.getLinks();
if(!outputLinks[hl.getMaxnofneurons()].isBroken())
a[j] = outputLinks[hl.getMaxnofneurons()].getWeight();
else
a[j] = 0;
}
return a;
}
/**
* <p>
* Establish the final value of a[], that is, the coefficients of
* model
* B01 B02 ... B0(J-1) [W11 W12 ... B11 B12 B1(J-1)]* ...
*
* @param a array of final coefficients values
* </p>
*/
public void setCoefficients(double[] a) {
int inputs = inputLayer.getMaxnofneurons();
int outputs = outputLayer.getMaxnofneurons();
LinkedLayer hl = this.hiddenLayers.get(0);
LinkedLayer ol = this.outputLayer;
// For each neuron in hidden layer
for(int i=0; i<hl.getNofneurons(); i++){
LinkedNeuron hn = hl.getNeuron(i);
Link links[] = hn.getLinks();
int baseIndex = outputs+i*((inputs+1)+outputs);
for(int j=0; j<links.length-1; j++)
if(a[baseIndex+j]!=0){
links[j].setBroken(false);
links[j].setOrigin(inputLayer.getNeuron(j));
links[j].setTarget(hn);
links[j].setWeight(a[baseIndex+j]);
}
// w bias
int r = links.length-1;
if(a[baseIndex+r]!=0){
links[r].setBroken(false);
links[r].setWeight(a[baseIndex+r]);
}
// For each neuron in output layer
baseIndex += inputs+1;
for(int j=0; j<ol.getNofneurons(); j++){
LinkedNeuron on = ol.getNeuron(j);
Link outputLinks[] = on.getLinks();
if(a[baseIndex+j]!=0){
outputLinks[i].setBroken(false);
outputLinks[i].setOrigin(hn);
outputLinks[i].setTarget(on);
outputLinks[i].setWeight(a[baseIndex+j]);
}
}
}
// Bias weights
if(ol.isBiased())
for(int j=0; j<ol.getNofneurons(); j++){
LinkedNeuron on = ol.getNeuron(j);
Link outputLinks[] = on.getLinks();
if(a[j]!=0){
outputLinks[hl.getMaxnofneurons()].setBroken(false);
outputLinks[hl.getMaxnofneurons()].setWeight(a[j]);
}
}
}
/**
* <p>
* Returns the gradient vector of the derivative of MSE error function
* with respect to each coefficient of the model, using an input observation
* matrix (x[]) and an expected output matrix (y[])
*
* @param x Array with all inputs of all observations
* @param y Array with all expected outputs of all observations
*
* @return double[] Gradient vector gradient of dE/da for all coefficients
* </p>
*/
public double[] gradient(double [][] x, double [][] y){
// Initialize variables
int inputs = inputLayer.getMaxnofneurons() + 1; //Bias
int outputs = outputLayer.getMaxnofneurons();
int params = (this.getNofhneurons()*(inputs+outputs))+outputs;
int n_pattern = x.length;
double sum;
// Obtain the error
lastError = 0;
double g[][] = new double[n_pattern][];
double vdgda [][][] = new double[n_pattern][][];
double coef[][] = new double[n_pattern][outputs+1];
for (int i=0; i < n_pattern; i++) {
g[i] = this.softmaxProbabilities(x[i]);
vdgda[i] = this.dgda(x[i], g[i]);
for(int l = 0; l <= outputs; l++){
coef[i][l] = g[i][l]- y[i][l];
lastError += coef[i][l]*coef[i][l];
}
}
//lastError /= (n_pattern*outputs);
lastError /= 2.;
// Resulting gradient
double[] gradient = new double [params];
for (int j=0; j < params; j++) {
gradient[j] = 0;
// For each pattern
for (int i = 0; i < n_pattern; i++ ){
sum = 0;
// For each output
for(int l = 0; l <= outputs; l++)
sum += coef[i][l] * vdgda[i][j][l];
gradient[j] = gradient[j] + sum;
}
// gradient[j] = 2*gradient[j];
// gradient[j] = (2*gradient[j])/(n_pattern*outputs);
}
return gradient;
}
/**
* <p>
* Last error of the model
*
* @return double Error of the function of the model with respect to data y[]
* </p>
*/
public double getLastError(){
return lastError;
}
/////////////////////////////////////////////////////////////////
// ---------------------------------------------- Private methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Obtain derivative of each softmax transformed output (g)
* with respect of each coefficient for an input observation
* pattern (x)
*
* @param x Input observation array
*
* @return double[][] Partial derivatives matrix
* </p>
*/
private double[][] dgda (double[]x, double [] g){
// Initalize variables
int inputs = inputLayer.getMaxnofneurons() + 1; // +1 para los bias
int outputs = outputLayer.getMaxnofneurons();
int params = (this.getNofhneurons()*(inputs+outputs))+outputs;
double [][] vdgda = new double[params][outputs+1];
// Calculate outputs of network (f)
double [] f = this.rawOutputs(x);
// Calculate partial derivative of f respect all weights
double [][] vdfda = this.dfda(x);
// Calculate softmax probabilities if they are not previously defined
if(g==null)
g = this.applySoftmax(f);
// Calculate partial derivatives of g respect all weights
for(int h = 0; h<params; h++) {
for(int j = 0; j<=outputs; j++){
double sum = 0;
for (int l = 0; l<outputs; l++){
if (vdfda[h][l] != 0)
sum+=Math.exp(f[l]) * vdfda[h][l];
}
vdgda[h][j]=g[j] * (vdfda[h][j] - (g[j] * Math.exp(-f[j]) * sum));
}
}
return vdgda;
}
/**
* <p>
* Obtain derivative of each softmax non transformed output (f)
* with respect of each coefficient for an input observation
* pattern (x)
*
* @param x Input observation array
*
* @return double[][] Partial derivatives matrix
* </p>
*/
private double[][] dfda (double[] x)
{
// Initalize variables
int inputs = inputLayer.getMaxnofneurons();
int outputs = outputLayer.getMaxnofneurons();
int params_node = (inputs+1) + outputs;
int hiddenNeurons = this.getNofhneurons();
int params = hiddenNeurons * params_node + outputs;
double [] a = this.getCoefficients();
double [][] vdfda = new double[params][outputs+1];
// Initialize vdfda[][]
for (int i = 0; i < params; i++)
for (int j = 0; j <= outputs; j++)
vdfda[i][j] = 0 ;
// Calculate partial derivatives of Beta Bias weights
for (int j = 0; j < outputs; j++)
vdfda[j][j] = 1 ;
// Calculate rest derivatives of the rest Beta weights
double sal;
int baseIndex;
for (int j = 0; j < hiddenNeurons; j++)
{
// Calculate output Hidden Units j for input x
sal = 0;
baseIndex = outputs + (j * params_node);
for (int i = 0; i < inputs; i++)
sal += x[i] * a[baseIndex + i];
sal = 1/(1+(Math.exp(-(sal+a[baseIndex+inputs]))));
baseIndex = baseIndex + inputs+1;
for (int i = 0; i<outputs; i++)
vdfda[baseIndex + i][i] = sal;
}
// Calculate partial derivate of the w weights
for (int j = 0; j < inputs; j++)
{
for (int h = 0; h < hiddenNeurons; h++ )
{
baseIndex=(outputs) + (h * params_node);
for (int l = 0; l < outputs; l++)
{
vdfda[baseIndex + j][l] = a[baseIndex + inputs+1 + l] * vdfda[baseIndex + inputs+1][0] * (1 - vdfda[baseIndex + inputs+1][0]) * x[j];
}
}
}
// Calculate partial derivate of the w Bias weights
int j = inputs;
for (int h = 0; h < hiddenNeurons; h++ )
{
baseIndex=(outputs) + (h * params_node);
for (int l = 0; l < outputs; l++)
{
vdfda[baseIndex + j][l] = a[baseIndex + inputs+1 + l] * vdfda[baseIndex + inputs+1][0] * (1 - vdfda[baseIndex + inputs+1][0]) ;
}
}
return vdfda;
}
/**
* <p>
* Apply softmax transformation for a set of raw outputs, controlling
* problems with very high values
*
* @param rawOutputs Array with non tranformed raw outputs
*
* @return double[] Softmax transformed values
* </p>
*/
private double[] applySoftmax(double [] rawOutputs) {
double[] exp = new double[rawOutputs.length];
// Sum of exp(rawOutputs) values
double expSum = 0;
for(int i=0; i<rawOutputs.length; i++){
if(i!=rawOutputs.length-1)
exp[i] = Math.exp(rawOutputs[i]);
else
exp[i] = 1;
expSum += exp[i];
}
// Test problems with very high outputs
if(Double.isInfinite(expSum) || Double.isNaN(expSum)){
// Sum of exp(rawOutputs) values
expSum = 0;
for(int i=0; i<rawOutputs.length; i++){
double[] reduced = new double[rawOutputs.length];
reduced[i] = (rawOutputs[i]/50000.);
if(i!=rawOutputs.length-1)
exp[i] = Math.exp(exp[i]);
else
exp[i] = 1;
expSum += exp[i];
}
}
// Normalize outputs
for(int i=0; i<exp.length; i++)
exp[i] /= expSum;
return exp;
}
}