/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Juli�n Luengo Mart�n 18/02/2009
* @version 0.2
* @since JDK 1.5
* </p>
*/
package keel.Algorithms.Preprocess.Transformations.Nominal2Binary;
import java.io.*;
import java.util.*;
import keel.Dataset.*;
import keel.Algorithms.Preprocess.Basic.*;
/**
* <p>
* This class performs the nominal to binary transformation.
* A nominal attribute is broken down into several binary ones, one per
* nominal value of the original attribute. These new attributes will
* present the '1' value if the nominal value was present, '0' otherwise.
* </p>
*/
public class Nominal2Binary {
double tempData = 0;
String[][] X = null;
int ndatos = 0;
int nentradas = 0;
int tipo = 0;
int direccion = 0;
int nvariables = 0;
int nsalidas = 0;
InstanceSet IS;
String input_train_name = new String();
String input_test_name = new String();
String output_train_name = new String();
String output_test_name = new String();
String temp = new String();
String data_out = new String("");
/** Creates a new instance of min_max
* @param fileParam The path to the configuration file with all the parameters in KEEL format
*/
public Nominal2Binary(String fileParam) {
config_read(fileParam);
IS = new InstanceSet();
}
/**
* <p>
* Process the training and test files provided in the parameters file to the constructor.
* </p>
*/
public void transform(){
InstanceSet transformed = null;
File file;
PrintWriter pw;
try {
// Load in memory a dataset that contains a classification problem
IS.readSet(input_train_name,true);
transformed = convertNominal2Binary(IS);
}catch (Exception e){
System.out.println("Nominal2Binary exception = " + e );
e.printStackTrace();
System.exit(1);
}
write_results(output_train_name,transformed);
/***************************************************************************************/
//does a test file associated exist?
if(input_train_name.compareTo(input_test_name)!=0){
try {
// Load in memory a dataset that contains a classification problem
IS.readSet(input_test_name,false);
transformed = convertNominal2Binary(IS);
}catch (Exception e){
System.out.println("Nominal2Binary exception = " + e );
e.printStackTrace();
System.exit(1);
}
write_results(output_test_name,transformed);
}
}
private void config_read(String fileParam){
File inputFile = new File(fileParam);
if (inputFile == null || !inputFile.exists()) {
System.out.println("parameter "+fileParam+" file doesn't exists!");
System.exit(-1);
}
//begin the configuration read from file
try {
FileReader file_reader = new FileReader(inputFile);
BufferedReader buf_reader = new BufferedReader(file_reader);
//FileWriter file_write = new FileWriter(outputFile);
String line;
do{
line = buf_reader.readLine();
}while(line.length()==0); //avoid empty lines for processing -> produce exec failure
String out[]= line.split("algorithm = ");
//alg_name = new String(out[1]); //catch the algorithm name
//input & output filenames
do{
line = buf_reader.readLine();
}while(line.length()==0);
out= line.split("inputData = ");
out = out[1].split("\\s\"");
input_train_name = new String(out[0].substring(1, out[0].length()-1));
input_test_name = new String(out[1].substring(0, out[1].length()-1));
if(input_test_name.charAt(input_test_name.length()-1)=='"')
input_test_name = input_test_name.substring(0,input_test_name.length()-1);
do{
line = buf_reader.readLine();
}while(line.length()==0);
out = line.split("outputData = ");
out = out[1].split("\\s\"");
output_train_name = new String(out[0].substring(1, out[0].length()-1));
output_test_name = new String(out[1].substring(0, out[1].length()-1));
if(output_test_name.charAt(output_test_name.length()-1)=='"')
output_test_name = output_test_name.substring(0,output_test_name.length()-1);
file_reader.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
private void write_results(String output,InstanceSet transformed){
Instance inst;
//File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
try {
FileWriter file_write = new FileWriter(output);
file_write.write(transformed.getNewHeader());
//now, print the normalized data
file_write.write("@data\n");
for(int i=0;i<transformed.getNumInstances();i++){
inst = transformed.getInstance(i);
file_write.write(inst.toString(transformed.getAttributeDefinitions()));
file_write.write("\n");
}
file_write.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
/**
* Creates a new allocated KEEL's set of Instances (i.e. Instances) from a KEEL's set of instances
* (i.e. InstanceSet). The new InstanceSet will not contain nominal values, as they
* have been transformed into binary attributes.
* @param is The original KEEL Instance set
* @return A new allocated KEEL formatted Instance set
*/
public InstanceSet convertNominal2Binary(InstanceSet is){
Attribute a,newAt;
Instance instW,instK;
int out,in,newNumAttributes,enlargedValueVectorPos;
double values[];
InstanceSet data;
Vector atts;
// Create header of instances object
out = Attributes.getInputNumAttributes(); //the class attribute is usually the last one
//convert the nominal values to binary strings
newNumAttributes = 0;
atts = new Vector(Attributes.getNumAttributes());
for(int i=0;i<Attributes.getNumAttributes();i++){
a = Attributes.getAttribute(i);
if(a.getType()==Attribute.NOMINAL && a.getDirectionAttribute()!=Attribute.OUTPUT){
if(a.getNumNominalValues()>2){ //more than 2 nominal values implies 1 new attribute per value
newNumAttributes+=a.getNumNominalValues();
for(int j=0;j<a.getNumNominalValues();j++){
newAt = new Attribute();
newAt.setType(Attribute.INTEGER);
newAt.setDirectionAttribute(a.getDirectionAttribute());
newAt.setName(a.getName()+"="+a.getNominalValue(j));
newAt.enlargeBounds(0);
newAt.enlargeBounds(1);
atts.addElement(newAt);
}
}else{ //if it has only 2 nominal values, the binary conversion is trivial
newNumAttributes++;
//the old attribute is not useful since it is nominal, create an integer equivalent
newAt = new Attribute();
newAt.setType(Attribute.INTEGER);
newAt.setDirectionAttribute(a.getDirectionAttribute());
newAt.setName(a.getName());
newAt.enlargeBounds(0);
newAt.enlargeBounds(1);
atts.addElement(newAt);
}
}
if(a.getType()!=Attribute.NOMINAL){
newNumAttributes++;
atts.addElement(a);
}
if(a.getDirectionAttribute()==Attribute.OUTPUT){
atts.addElement(a);
out = newNumAttributes;
newNumAttributes++;
}
}
data = new InstanceSet(true);
for(int i=0;i<atts.size();i++){
data.addAttribute((Attribute)atts.get(i));
}
//now fill the data in the data instance set
for(int i=0;i<is.getNumInstances();i++){
instK = is.getInstance(i);
in = out = 0;
enlargedValueVectorPos = 0;
values = new double[newNumAttributes];
for(int j=0;j<Attributes.getNumAttributes();j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute()==Attribute.INPUT){
if(a.getType()==Attribute.NOMINAL){
if(a.getNumNominalValues()>2){
for(int k=0;k<a.getNumNominalValues();k++){
if(!instK.getInputMissingValues(in) && a.getNominalValue(k).compareTo(instK.getInputNominalValues(in))==0)
values[enlargedValueVectorPos+k] = 1;
}
enlargedValueVectorPos+=a.getNumNominalValues();
}else{
values[enlargedValueVectorPos] = instK.getAllInputValues()[in];
enlargedValueVectorPos++;
}
}else{
values[enlargedValueVectorPos] = instK.getAllInputValues()[in];
enlargedValueVectorPos++;
}
in++;
}else{
values[enlargedValueVectorPos] = instK.getAllOutputValues()[out];
out++;
enlargedValueVectorPos++;
}
}
instW = new Instance(values,data.getAttributeDefinitions());
data.addInstance(instW);
}
return data;
}
}