/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ //==================================================== // Adapted to Java for KEEL by Julian Luengo // julianlm@decsai.ugr.es //==================================================== package keel.Algorithms.Preprocess.Missing_Values.BPCA; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import no.uib.cipr.matrix.DenseMatrix; import org.core.Fichero; import jp.ac.naist.dynamix.mpca.BPCAFill; import keel.Dataset.*; public class BPCA { InstanceSet IStrain; InstanceSet IStest; double eps = MachineAccuracy.EPSILON; //Floating-point relative accuracy String input_train_name = new String(); String input_test_name = new String(); String output_train_name = new String(); String output_test_name = new String(); String temp = new String(); String data_out = new String(""); /** * <p> * Creates a new object of BPCA using the parameter file indicated * </p> * @param fileParam The path to the parameter file */ public BPCA(String fileParam){ config_read(fileParam); IStrain = new InstanceSet(); IStest = new InstanceSet(); try { IStrain.readSet(input_train_name, true); IStest.readSet(input_test_name, false); } catch (DatasetException e) { System.err.println("Data set loading error, now exiting BPCA"); e.printStackTrace(); System.exit(-1); } catch (HeaderFormatException e) { System.err.println("Data set loading error, now exiting BPCA"); e.printStackTrace(); System.exit(-1); } } /** * <p> * Runs the BPCA algorithm. * </p> */ public void run(){ Instance inst; double inputs[]; double value; String dataMatrix = new String(""); String args[] = new String[2]; String out[]; String[][] X; int pos; DenseMatrix train,test; System.out.println("\n\t---Processing train file---\n"); for(int i=0;i<IStrain.getNumInstances();i++){ inst = IStrain.getInstance(i); inputs = inst.getAllInputValues(); for(int j=0;j<inputs.length;j++){ if(inst.getInputMissingValues(j)) value = 999.0; //jBPCAfill flags the missing values as '999.0' else{ value = inputs[j]; if(value == 999.0) //if the real value was 999.0, change it slightly value += eps; } dataMatrix = dataMatrix + value; if(j<(inputs.length-1)) dataMatrix = dataMatrix + "\t"; } if(i < IStrain.getNumInstances()-1) dataMatrix = dataMatrix + "\n"; } Fichero.escribeFichero("dataMatrix.tmp", dataMatrix); args[0] = "dataMatrix.tmp"; args[1] = "filledMatrix.tmp"; BPCAFill.main(args); train = new DenseMatrix(IStrain.getNumInstances(),Attributes.getInputNumAttributes()); dataMatrix = Fichero.leeFichero(args[1]); out = dataMatrix.split("\\s"); pos = 0; for(int i=0;i<IStrain.getNumInstances();i++){ for(int j=0;j<Attributes.getInputNumAttributes();j++){ train.set(i, j, Double.parseDouble(out[pos])); pos++; } while(pos<out.length && out[pos].compareTo("")==0) pos++; } X = new String[IStrain.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data data2string(train,X,IStrain); write_results(output_train_name,X,IStrain); System.out.println("\n\t---Processing test file---\n"); for(int i=0;i<IStest.getNumInstances();i++){ inst = IStest.getInstance(i); inputs = inst.getAllInputValues(); for(int j=0;j<inputs.length;j++){ if(inst.getInputMissingValues(j)) value = 999.0; //jBPCAfill flags the missing values as '999.0' else{ value = inputs[j]; if(value == 999.0) //if the real value was 999.0, change it slightly value += eps; } dataMatrix = dataMatrix + value; if(j<(inputs.length-1)) dataMatrix = dataMatrix + "\t"; } if(i < IStest.getNumInstances()-1) dataMatrix = dataMatrix + "\n"; } Fichero.escribeFichero("dataMatrix.tmp", dataMatrix); args[0] = "dataMatrix.tmp"; args[1] = "filledMatrix.tmp"; BPCAFill.main(args); train = new DenseMatrix(IStest.getNumInstances(),Attributes.getInputNumAttributes()); dataMatrix = Fichero.leeFichero(args[1]); out = dataMatrix.split("\\s"); pos = 0; for(int i=0;i<IStest.getNumInstances();i++){ for(int j=0;j<Attributes.getInputNumAttributes();j++){ train.set(i, j, Double.parseDouble(out[pos])); pos++; } while(pos<out.length && out[pos].compareTo("")==0) pos++; } X = new String[IStest.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data data2string(train,X,IStest); write_results(output_test_name,X,IStest); } // Read the pattern file, and parse data into strings protected void config_read(String fileParam) { File inputFile = new File(fileParam); if (inputFile == null || !inputFile.exists()) { System.out.println("parameter " + fileParam + " file doesn't exists!"); System.exit(-1); } // begin the configuration read from file try { FileReader file_reader = new FileReader(inputFile); BufferedReader buf_reader = new BufferedReader(file_reader); // FileWriter file_write = new FileWriter(outputFile); String line; do { line = buf_reader.readLine(); } while (line.length() == 0); // avoid empty lines for processing // -> // produce exec failure String out[] = line.split("algorithm = "); // alg_name = new String(out[1]); //catch the algorithm name // input & output filenames do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("inputData = "); out = out[1].split("\\s\""); input_train_name = new String(out[0].substring(1,out[0].length() - 1)); input_test_name = new String(out[1].substring(0,out[1].length() - 1)); if (input_test_name.charAt(input_test_name.length() - 1) == '"') input_test_name = input_test_name.substring(0, input_test_name .length() - 1); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("outputData = "); out = out[1].split("\\s\""); output_train_name = new String(out[0].substring(1, out[0].length() - 1)); output_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (output_test_name.charAt(output_test_name.length() - 1) == '"') output_test_name = output_test_name.substring(0, output_test_name.length() - 1); // parameters // do { // line = buf_reader.readLine(); // } while (line.length() == 0); // out = line.split("seed = "); // seed = (new Integer(out[1])).intValue(); // /*do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("SVMtype = "); svmType = (new String(out[1])); */ } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } } /** * Parse the DenseMatrix of INPUT real values to a String 2D array, ready for printing * to a file. It also fits the values to the original bounds if needed. * @param mat The DenseMatrix with the input values in double format * @param X The output String matrix, ready to be printed * @param IS The InstanceSet with the original values, used to obtain the OUTPUT values */ protected void data2string(DenseMatrix mat, String [][] X,InstanceSet IS){ Attribute a; Instance inst; double value; int in,out; for(int i=0;i<X.length;i++){ in = 0; out = 0; inst = IS.getInstance(i); for(int j=0;j<X[i].length;j++){ a = Attributes.getAttribute(j); if(a.getDirectionAttribute() == Attribute.INPUT){ value = mat.get(i, in); in++; } else{ value = inst.getAllOutputValues()[out]; out++; } if(a.getType() != Attribute.NOMINAL){ if(value < a.getMinAttribute()) value = a.getMinAttribute(); else if(value > a.getMaxAttribute()) value = a.getMaxAttribute(); } if(a.getType() == Attribute.REAL) X[i][j] = String.valueOf(value); else if(a.getType() == Attribute.INTEGER) X[i][j] = String.valueOf(Math.round(value)); else{ value = Math.round(value); if(value >= a.getNumNominalValues()) value = a.getNumNominalValues()-1; if(value < 0) value = 0; X[i][j] = a.getNominalValue((int)value); } } } } // Write data matrix X to disk, in KEEL format protected void write_results(String output,String[][] X,InstanceSet IS){ //File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1)); try { FileWriter file_write = new FileWriter(output); file_write.write(IS.getHeader()); //now, print the normalized data file_write.write("@data\n"); for(int i=0;i<X.length;i++){ //System.out.println(i); file_write.write(X[i][0]); for(int j=1;j<X[i].length;j++){ file_write.write(","+X[i][j]); } file_write.write("\n"); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e ); System.exit(-1); } } }