/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Juli�n Luengo Mart�n 10/11/2005
* @version 0.2
* @since JDK 1.5
* </p>
*/
package keel.Algorithms.Preprocess.Transformations.min_max;
import java.io.*;
import java.util.*;
import keel.Dataset.*;
import keel.Algorithms.Preprocess.Basic.*;
/**
* <p>
* This class performs the min-max transformation. All attributes scales their data
* to the new "min" and "max" bounds.
* </p>
*/
public class min_max {
double min = 0; //max found in score set
double max = 0; //min found in score set
double new_min = 0; //new min after normalization
double new_max = 1; //new max after normalization
double tempData = 0;
String[][] X = null;
int ndatos = 0;
int nentradas = 0;
int tipo = 0;
int direccion = 0;
int nvariables = 0;
int nsalidas = 0;
InstanceSet IS;
String input_train_name = new String();
String input_test_name = new String();
String output_train_name = new String();
String output_test_name = new String();
String temp = new String();
String data_out = new String("");
/** Creates a new instance of min_max
* @param fileParam The path to the configuration file with all the parameters in KEEL format
*/
public min_max(String fileParam) {
config_read(fileParam);
IS = new InstanceSet();
}
/**
* <p>
* Process the training and test files provided in the parameters file to the constructor.
* </p>
*/
public void normalize(){
try {
// Load in memory a dataset that contains a classification problem
IS.readSet(input_train_name,true);
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
X = new String[ndatos][nvariables];//matrix with transformed data
for(int i = 0;i < ndatos;i++){
Instance inst = IS.getInstance(i);
in = 0;
out = 0;
for(int j = 0; j < nvariables;j++){
Attribute a = Attributes.getAttribute(j);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if(direccion == Attribute.INPUT){
if(tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)){//we want to normalize everything but nominal attributes
if(a.getMinAttribute()!=a.getMaxAttribute())
tempData = (inst.getInputRealValues(in) - a.getMinAttribute())/(a.getMaxAttribute() - a.getMinAttribute())* (new_max - new_min) + new_min;
else
tempData = new_min;
X[i][j] = new String(String.valueOf(tempData));
} else{
if(!inst.getInputMissingValues(in))
X[i][j] = inst.getInputNominalValues(in); //points out its a nominal value, look at vector fila
else
X[i][j] = new String("?");
}
in++;
} else{
if(direccion == Attribute.OUTPUT){
if(tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)){//we want to normalize everything but nominal attributes
if(a.getMinAttribute()!=a.getMaxAttribute())
tempData = (inst.getOutputRealValues(out) - a.getMinAttribute())/(a.getMaxAttribute() - a.getMinAttribute())* (new_max - new_min) + new_min;
else
tempData = new_min;
X[i][j] = new String(String.valueOf(tempData));
} else{
if(!inst.getOutputMissingValues(out))
X[i][j] = inst.getOutputNominalValues(out); //points out its a nominal value, look at vector fila
else
X[i][j] = new String("?");
}
out++;
}
/*else{
What we should do with non-defined direction values?
}*/
}
}
}
for(int j = 0; j < nvariables;j++){
Attribute a = Attributes.getAttribute(j);
tipo = a.getType();
if(tipo != Attribute.NOMINAL){
a.setBounds(new_min,new_max);
}
}
}catch (Exception e){
System.out.println("Dataset exception = " + e );
System.exit(-1);
}
write_results(output_train_name);
/***************************************************************************************/
//does a test file associated exist?
if(input_train_name.compareTo(input_test_name)!=0){
try {
//delete the modified attributes! the test set has the older
//bounds
Attributes.clearAll();
// Load in memory a dataset that contains a classification problem
IS.readSet(input_test_name,true);
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
X = new String[ndatos][nvariables];//matrix with transformed data
for(int i = 0;i < ndatos;i++){
Instance inst = IS.getInstance(i);
in = 0;
out = 0;
for(int j = 0; j < nvariables;j++){
Attribute a = Attributes.getAttribute(j);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if(direccion == Attribute.INPUT){
if(tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)){//we want to normalize everything but nominal attributes
if(a.getMinAttribute()!=a.getMaxAttribute())
tempData = (inst.getInputRealValues(in) - a.getMinAttribute())/(a.getMaxAttribute() - a.getMinAttribute())* (new_max - new_min) + new_min;
else
tempData = new_min;
X[i][j] = new String(String.valueOf(tempData));
} else{
if(!inst.getInputMissingValues(in))
X[i][j] = inst.getInputNominalValues(in); //points out its a nominal value, look at vector fila
else
X[i][j] = new String("?");
}
in++;
} else{
if(direccion == Attribute.OUTPUT){
if(tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)){//we want to normalize everything but nominal attributes
if(a.getMinAttribute()!=a.getMaxAttribute())
tempData = (inst.getOutputRealValues(out) - a.getMinAttribute())/(a.getMaxAttribute() - a.getMinAttribute())* (new_max - new_min) + new_min;
else
tempData = new_min;
X[i][j] = new String(String.valueOf(tempData));
} else{
if(!inst.getOutputMissingValues(out))
X[i][j] = inst.getOutputNominalValues(out); //points out its a nominal value, look at vector fila
else
X[i][j] = new String("?");
}
out++;
}
/*else{
What we should do with non-defined direction values?
}*/
}
}
}
for(int j = 0; j < nvariables;j++){
Attribute a = Attributes.getAttribute(j);
tipo = a.getType();
if(tipo != Attribute.NOMINAL){
a.setBounds(new_min,new_max);
}
}
}catch (Exception e){
System.out.println("Dataset exception = " + e );
System.exit(-1);
}
write_results(output_test_name);
}
}
private void config_read(String fileParam){
File inputFile = new File(fileParam);
if (inputFile == null || !inputFile.exists()) {
System.out.println("parameter "+fileParam+" file doesn't exists!");
System.exit(-1);
}
//begin the configuration read from file
try {
FileReader file_reader = new FileReader(inputFile);
BufferedReader buf_reader = new BufferedReader(file_reader);
//FileWriter file_write = new FileWriter(outputFile);
String line;
do{
line = buf_reader.readLine();
}while(line.length()==0); //avoid empty lines for processing -> produce exec failure
String out[]= line.split("algorithm = ");
//alg_name = new String(out[1]); //catch the algorithm name
//input & output filenames
do{
line = buf_reader.readLine();
}while(line.length()==0);
out= line.split("inputData = ");
out = out[1].split("\\s\"");
input_train_name = new String(out[0].substring(1, out[0].length()-1));
input_test_name = new String(out[1].substring(0, out[1].length()-1));
if(input_test_name.charAt(input_test_name.length()-1)=='"')
input_test_name = input_test_name.substring(0,input_test_name.length()-1);
do{
line = buf_reader.readLine();
}while(line.length()==0);
out = line.split("outputData = ");
out = out[1].split("\\s\"");
output_train_name = new String(out[0].substring(1, out[0].length()-1));
output_test_name = new String(out[1].substring(0, out[1].length()-1));
if(output_test_name.charAt(output_test_name.length()-1)=='"')
output_test_name = output_test_name.substring(0,output_test_name.length()-1);
//parameters
do{
line = buf_reader.readLine();
}while(line.length()==0);
out = line.split("newMin = ");
new_min = (new Double(out[1])).doubleValue(); //parse the string into a double
do{
line = buf_reader.readLine();
}while(line.length()==0);
out = line.split("newMax = ");
new_max = (new Double(out[1])).doubleValue(); //parse the string into a double
file_reader.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
private void write_results(String output){
Attribute a;
String header = "";
int i, j, k;
int aux;
//File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
try {
FileWriter file_write = new FileWriter(output);
/* Printing input attributes */
header += "@relation " + Attributes.getRelationName()+ "\n";
for (i = 0; i < Attributes.getNumAttributes(); i++) {
a = Attributes.getAttribute(i);
if(a.getDirectionAttribute()==Attribute.INPUT){
header += "@attribute " + a.getName() + " ";
if (a.getType() == Attribute.NOMINAL) {
header += "{";
for (j = 0; j < a.getNominalValuesList().size(); j++) {
header += (String) a.getNominalValuesList()
.elementAt(j);
if (j < a.getNominalValuesList().size() - 1) {
header += ", ";
}
}
header += "}\n";
} else {
if (a.getType() == Attribute.INTEGER) {
header += "integer";
header += " ["
+ String.valueOf((int) a.getMinAttribute()) + ", "+ String.valueOf((int) a.getMaxAttribute()) + "]\n";
} else {
header += "real";
header += " ["
+ String.valueOf(a.getMinAttribute())
+ ", "
+ String.valueOf(a.getMaxAttribute())
+ "]\n";
}
}
}else{
/* Printing output attribute */
header += "@attribute " + a.getName() + " ";
if (a.getType() == Attribute.NOMINAL) {
header += "{";
for (j = 0; j < a.getNominalValuesList().size(); j++) {
header += (String) a.getNominalValuesList().elementAt(j);
if (j < a.getNominalValuesList().size() - 1) {
header += ", ";
}
}
header += "}\n";
} else {
header += "integer ["
+ String.valueOf((int) a.getMinAttribute()) + ", "
+ String.valueOf((int) a.getMaxAttribute()) + "]\n";
}
}
}
file_write.write(header);
file_write.write(Attributes.getInputHeader()+"\n");
file_write.write(Attributes.getOutputHeader()+"\n");
//now, print the normalized data
file_write.write("@data\n");
for( i=0;i<ndatos;i++){
file_write.write(X[i][0]);
for( j=1;j<nvariables;j++){
file_write.write(","+X[i][j]);
}
file_write.write("\n");
}
file_write.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
}