/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.ImbalancedClassification.CSMethods.MLPerceptronBackpropCS;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Vector;
import keel.Dataset.Attributes;
import org.core.Randomize;
/**
* <p>
* Class for generating the individuals
* </p>
* @author Written by Nicolas Garcia Pedrajas (University of Cordoba) 27/02/2007
* @author Modified by Victoria Lopez Morales (University of Granada) 23/05/2010
* @version 0.1
* @since JDK1.5
*/
public class MLPerceptronBackpropCS {
/**
* <p>
* Empty constructor
* </p>
*/
public MLPerceptronBackpropCS() {
}
/**
* <p>
* Main function
* </p>
* @param args Arguments to main method
* @throws FileNotFoundException
* @throws IOException
*/
public static void main(String[] args) {
if (args.length <= 0) {
System.err.println("No parameters file");
System.exit(1);
}
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> 1");
Parameters global = new Parameters();
global.LoadParameters(args[0]);
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> 2");
OpenDataset train = new OpenDataset();
OpenDataset test = null;
OpenDataset validation = null;
boolean isTrain;
train.processClassifierDataset(global.train_file, true);
global.n_test_patterns = 0;
global.n_train_patterns = train.getndatos();
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> test_data="+global.test_data);
if (global.test_data) {
test = new OpenDataset();
test.processClassifierDataset(global.test_file, false);
global.n_test_patterns = test.getndatos();
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> n_test_patterns:"+global.n_test_patterns);
}
global.n_val_patterns = 0;
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> global.val_data="+global.val_data);
if (global.val_data) {
validation = new OpenDataset();
validation.processClassifierDataset(global.val_file, false);
global.n_val_patterns = validation.getndatos();
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> n_val_patterns:"+global.n_val_patterns);
}
// Assign data and parameters to internal variables
// Number of inputs
global.Ninputs = 0;
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> getnentradas:"+train.getnentradas());
for (int i = 0; i < train.getnentradas(); i++) {
if (train.getTiposAt(i) == 0) {
Vector in_values = train.getRangosVar(i);
global.Ninputs += in_values.size();
} else {
global.Ninputs++;
}
}
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> Ninputs:"+global.Ninputs);
// Number of outputs
if (train.getTiposAt(train.getnentradas()) != 0) {
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> no es cero");
global.Noutputs = train.getnsalidas();
} else {
Vector out_values = train.getRangosVar(train.getnentradas());
global.Noutputs = out_values.size();
}
//System.out.println("");
//System.out.println("------------------>>>>>>>>>>>>>>>>>>>> Noutputs:"+global.Noutputs);
Data data = new Data(global.Ninputs + global.Noutputs,
global.n_train_patterns,
global.n_test_patterns, global.n_val_patterns);
global.Nhidden[global.Nhidden_layers] = global.Noutputs;
DatasetToArray(data.train, train);
if (global.test_data) {
DatasetToArray(data.test, test);
}
if (global.val_data) {
DatasetToArray(data.validation, validation);
}
/* TODO Print of weight matrix
System.out.println (global.Ninputs +" "+ global.Noutputs);
//FileWriter writer = new FileWriter("./matriz.txt");
for (int f=0; f< global.n_train_patterns; f++)
{
for (int c=global.Ninputs; c<global.Ninputs+global.Noutputs; c++)
{
Double aux = new Double ( data.train[f][c] );
System.out.print(aux.toString()+" ");
//writer.write( aux.toString()+" " );
}
System.out.println("");
//writer.write("\n");
}
// writer.close();
/************************************** */
if (global.tipify_inputs == true) {
data.TipifyInputData(global);
}
/* System.out.println ("------------------" + global.Ninputs +" "+ global.Noutputs);
System.out.println ("------------------"+global.Ninputs +" "+ global.Noutputs);
for (int f=0; f< global.n_train_patterns; f++)
{
for (int c=global.Ninputs; c<global.Ninputs+global.Noutputs; c++)
{
Double aux = new Double ( data.train[f][c] );
System.out.print(aux.toString()+" ");
}
System.out.println("");
}
*/
if (global.transfer[global.Nhidden_layers].compareToIgnoreCase("Htan") ==
0 &&
global.problem.compareToIgnoreCase("Classification") == 0) {
for (int i = 0; i < global.n_train_patterns; i++) {
for (int j = 0; j < global.Noutputs; j++) {
if (data.train[i][j + global.Ninputs] == 0) {
data.train[i][j + global.Ninputs] = -1.0;
}
}
}
if (global.test_data) {
for (int i = 0; i < global.n_test_patterns; i++) {
for (int j = 0; j < global.Noutputs; j++) {
if (data.test[i][j + global.Ninputs] == 0) {
data.test[i][j + global.Ninputs] = -1.0;
}
}
}
}
if (global.val_data) {
for (int i = 0; i < global.n_val_patterns; i++) {
for (int j = 0; j < global.Noutputs; j++) {
if (data.validation[i][j + global.Ninputs] == 0) {
data.validation[i][j + global.Ninputs] = -1.0;
}
}
}
}
}
if (global.problem.compareToIgnoreCase("Regression") == 0) {
// Scale outputs
double ubound = 1.0, lbound;
if (global.transfer[global.Nhidden_layers].compareToIgnoreCase(
"Log") == 0) {
lbound = 0.0;
} else {
lbound = -1.0;
}
data.ScaleOutputData(global, lbound, ubound);
}
/* System.out.println("------------------ "+global.problem+" ----- "+global.Nhidden_layers+" ----- " +global.transfer[global.Nhidden_layers]);
System.out.println ("------------------" + global.Ninputs +" "+ global.Noutputs);
System.out.println ("------------------"+global.Ninputs +" "+ global.Noutputs);
for (int f=0; f< global.n_train_patterns; f++)
{
for (int c=global.Ninputs; c<global.Ninputs+global.Noutputs; c++)
{
Double aux = new Double ( data.train[f][c] );
System.out.print(aux.toString()+" ");
}
System.out.println("");
}
global.verbose=true;
System.out.println("------------global.crossValidation ------------"+global.cross_validation);
*/
Network neural = new Network(global);
if (global.verbose) {
neural.PrintWeights();
}
if (global.cross_validation) {
neural.TrainNetworkWithCrossvalidation(global, data);
} else {
neural.TrainNetwork(global, data.train, global.n_train_patterns);
}
if (global.save) {
neural.SaveNetwork("network", false);
}
int positive_class;
double positive_cost, negative_cost;
positive_class = positive_class(train);
positive_cost = positive_cost(train);
negative_cost = negative_cost(train);
if (global.verbose) {
neural.PrintWeights();
double res = neural.TestNetworkInClassification(global, data.train,
global.n_train_patterns);
System.out.println("Final network training accuracy: " +
100.0 * res);
if (global.val_data == true) {
res = neural.TestNetworkInClassification(global,
data.validation,
global.n_val_patterns);
System.out.println("Final network validation accuracy: " +
100.0 * res);
}
if (global.test_data == true) {
res = neural.TestNetworkInClassification(global, data.test,
global.n_test_patterns);
System.out.println("Final network test accuracy: " +
100.0 * res);
}
}
neural.SaveOutputFile(global.train_output, data.train, //data.validation,
global.n_train_patterns, global.problem, positive_class, positive_cost, negative_cost); // global.n_val_patterns, global.problem);
if (global.test_data) {
neural.SaveOutputFile(global.test_output, data.test,
global.n_test_patterns, global.problem, positive_class, positive_cost, negative_cost);
}
if (global.val_data) {
neural.SaveOutputFile(global.val_output, data.validation,
global.n_val_patterns, global.problem, positive_class, positive_cost, negative_cost);
}
}
/**
* <p>
* Transforms the dataset into a double matrix
* </p>
* @param array Output matrix
* @param dataset Input dataset
*/
public static void DatasetToArray(double array[][], OpenDataset dataset) {
String line;
int pos1, pos2 = 0, group;
// For all the patterns
for (int i = 0; i < dataset.getndatos(); i++) {
line = dataset.getDatosAt(i);
pos1 = 1;
int offset = 0;
for (int j = 0; j < dataset.getnentradas(); j++) {
pos2 = line.indexOf(",", pos1);
if (dataset.getTiposAt(j) == 0) {
Vector values = dataset.getRangosVar(j);
String cats[] = new String[values.size()];
for (int k = 0; k < values.size(); k++) {
cats[k] = values.elementAt(k).toString();
}
for (int k = 0; k < values.size(); k++) {
if (line.substring(pos1,
pos2).compareToIgnoreCase(cats[k]) == 0) {
array[i][offset + k] = 1.0;
} else {
array[i][offset + k] = 0.0;
}
}
offset += values.size();
} else {
try {
array[i][offset] = Double.parseDouble(line.substring(
pos1, pos2));
} catch (java.lang.NumberFormatException
NumberFormatException) {
array[i][offset] = 0.0;
} catch (java.lang.ArrayIndexOutOfBoundsException e) {
e.printStackTrace();
System.exit( -1);
}
offset++;
}
pos1 = pos2 + 1;
}
// Take the output classes without spaces and convert them to binary outputs.
pos1 = line.indexOf(",", pos2);
//pos2 = line.indexOf("]" pos1);
String category = line.substring(pos1 + 1, line.length());
if (dataset.getTiposAt(dataset.getnentradas()) != 0) {
pos1 = 0;
for (int k = 0; k < dataset.getnsalidas() - 1; k++) {
pos2 = category.indexOf(",", pos1);
array[i][offset +
k] = Double.parseDouble(category.
substring(pos1, pos2));
pos1 = pos2 + 1;
}
try {
array[i][offset + dataset.getnsalidas() -
1] = Double.parseDouble(category.substring(pos1));
} catch (java.lang.NumberFormatException e) {
e.printStackTrace();
System.exit( -1);
}
} else {
Vector out_values = dataset.getRangosVar(dataset.getnentradas());
String cats[] = new String[out_values.size()];
for (int k = 0; k < out_values.size(); k++) {
cats[k] = out_values.elementAt(k).toString();
}
for (int j = 0; j < out_values.size(); j++) {
if (category.compareToIgnoreCase(cats[j]) == 0) {
array[i][offset + j] = 1.0;
} else {
array[i][offset + j] = 0.0;
}
}
}
}
}
/**
* <p>
* Generates a random number between min and max
* </p>
* @param min Min value
* @param max Max value
* @return random float number
*/
public static double frandom(double min, double max) {
return Randomize.Randdouble(min, max);
}
/**
* <p>
* Generates a random integer number between min and max
* </p>
* @param min Min value
* @param max Max value
* @return random integer number
*/
public static int irandom(double min, double max) {
return (int) Randomize.Randdouble(min, max);
}
/**
* <p>
* Computes the positive_class of the dataset (in an imbalanced classification problem) according to the frequency of patterns
* </p>
*
* @param data Dataset used to compute the positive class
* @return integer that represents the positive class
*/
private static int positive_class (OpenDataset data) {
int n_classes = data.getRangosVar(data.getnentradas()).size();
int freqClasses[] = new int[n_classes];
double minimum = 0;
int minIndex = 0;
// Compute the class frequency
for (int i = 0; i < data.getndatos(); i++) {
freqClasses[data.getClassAt(i)]++;
}
// Obtain the positive class
minIndex = 0;
minimum = freqClasses[0];
for (int i = 1; i < freqClasses.length; i++) {
if (freqClasses[i] < minimum) {
minIndex = i;
minimum = freqClasses[i];
}
}
return minIndex;
}
/**
* <p>
* Computes the cost of misclassifying a positive instance in an imbalanced classification problem according to the frequency of patterns
* </p>
*
* @param data Dataset used to compute the cost of misclassifying a positive instance
* @return cost of misclassifying a positive instance
*/
private static double positive_cost(OpenDataset data) {
int n_classes = data.getRangosVar(data.getnentradas()).size();
int freqClasses[] = new int[n_classes];
double minimum = 0;
int minIndex = 0;
int positive_class;
int freq_pos = 0;
int freq_neg = 0;
double positive_cost;
// Compute the class frequency
for (int i = 0; i < data.getndatos(); i++) {
freqClasses[data.getClassAt(i)]++;
}
// Obtain the positive class
minIndex = 0;
minimum = freqClasses[0];
for (int i = 1; i < freqClasses.length; i++) {
if (freqClasses[i] < minimum) {
minIndex = i;
minimum = freqClasses[i];
}
}
positive_class = minIndex;
// Compute the N+ and N-
for (int i = 0; i < freqClasses.length; i++) {
if (i == positive_class) {
freq_pos += freqClasses[i];
}
else {
freq_neg += freqClasses[i];
}
}
positive_cost = (double)freq_neg/(double)freq_pos;
return positive_cost;
}
/**
* <p>
* Computes the cost of misclassifying a negative instance in an imbalanced classification problem according to the frequency of patterns
* </p>
*
* @param data Dataset used to compute the cost of misclassifying a negative instance
* @return cost of misclassifying a negative instance
*/
private static double negative_cost(OpenDataset data) {
return 1.0;
}
}