/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
//====================================================
// Adapted to Java for KEEL by Julian Luengo
// julianlm@decsai.ugr.es
//====================================================
package keel.Algorithms.Preprocess.Missing_Values.SVDimpute;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import keel.Algorithms.Preprocess.Missing_Values.EM.*;
import keel.Algorithms.Preprocess.Missing_Values.EM.util.MachineAccuracy;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Dataset.DatasetException;
import keel.Dataset.HeaderFormatException;
import keel.Dataset.Instance;
import keel.Dataset.InstanceSet;
import no.uib.cipr.matrix.DenseMatrix;
import no.uib.cipr.matrix.NotConvergedException;
import no.uib.cipr.matrix.SVD;
import flanagan.analysis.Regression;
/**
* This class implements the Single Value Decomposition Imputation
* @author Julian Luengo Martin
*/
public class SVDimpute {
final static int TTLS = 1;
final static int MRIDGE = 2;
final static int IRIDGE = 3;
DenseMatrix m;
double eps = MachineAccuracy.EPSILON; //Floating-point relative accuracy
InstanceSet IStrain;
InstanceSet IStest;
//parameters
int maxit;
double stagtol = 5e-10;
int optRegression;
int neigs;
double regpar = Double.NaN;
double minvarfrac = 0;
double inflation = 1;
int trunc = 4;
boolean useRegPar = false;
int nSingularValuestaken = 10;
String input_train_name = new String();
String input_test_name = new String();
String output_train_name = new String();
String output_test_name = new String();
String temp = new String();
String data_out = new String("");
/**
* <p>
* Creates a new object of SVDI based on the parameter file provided
* </p>
* @param fileParam the path to the parameter file
*/
public SVDimpute(String fileParam) {
config_read(fileParam);
IStrain = new InstanceSet();
IStest = new InstanceSet();
try {
IStrain.readSet(input_train_name, true);
IStest.readSet(input_test_name, false);
} catch (DatasetException e) {
System.err.println("Data set loading error, now exiting SVDimpute");
e.printStackTrace();
System.exit(-1);
} catch (HeaderFormatException e) {
System.err.println("Data set loading error, now exiting SVDimpute");
e.printStackTrace();
System.exit(-1);
}
}
/**
* <p>
* It runs the SVDI algorithm once the configuration has been readed
* </p>
*/
public void run(){
DenseMatrix train,test;
Regression reg;
Attribute at;
Instance inst;
String[][] X;
EM initialEstimation;
SVD sings = null;
EV greaterEV;
DenseMatrix V_t;
int kmisr[][],kvalr[][],pos,in,out,minSize;
double y[],x[][],coefs[],result[];
initialEstimation = new EM(maxit,stagtol,optRegression,neigs,regpar,
minvarfrac, inflation, trunc,useRegPar);
//put the train data into a DenseMatrix Class, looking for easier matrix operations
train = new DenseMatrix(IStrain.getNumInstances(),Attributes.getNumAttributes());
for(int i=0;i<IStrain.getNumInstances();i++){
inst = IStrain.getInstance(i);
in = out = 0;
for(int j=0;j<Attributes.getNumAttributes();j++){
at = Attributes.getAttribute(j);
if(at.getDirectionAttribute() == Attribute.INPUT){
train.set(i, j, inst.getAllInputValues()[in]);
in++;
}
else{
train.set(i, j, inst.getAllOutputValues()[out]);
out++;
}
}
}
if(nSingularValuestaken > Attributes.getNumAttributes()-3){
System.out.print("\nWarning: There are less attributes than Singular Values desired. ");
System.out.println("Reducing the amount of Singular Values from "+nSingularValuestaken+" to "+(Attributes.getInputNumAttributes()-3));;
nSingularValuestaken = Attributes.getNumAttributes()-3;
}
minSize = Math.min(IStrain.getNumInstances(),IStest.getNumInstances());
if(nSingularValuestaken > minSize){
System.out.print("\nWarning: There are less instances than Singular Values desired. ");
System.out.println("Reducing the amount of Singular Values from "+nSingularValuestaken+" to "+minSize);;
nSingularValuestaken = minSize;
}
//Impute by means of EM regression
//the results are stored in the matrix passed by argument
initialEstimation.regem(train,IStrain);
System.out.print("Computing the SVD fot the EM refined data... ");
//compute the eigenvectors from the data set
greaterEV = psings(train, nSingularValuestaken);
// try {
// sings = SVD.factorize(train);
// } catch (NotConvergedException e) {
// System.err.println("Error: Matrix Singular Value Descomposition didn't converge");
// e.printStackTrace();
// System.exit(1);
// }
V_t = greaterEV.V;
// V_t = sings.getVt();
System.out.println("Done");
//obtain the positions of the missing values
//previously computed by the EM algorithm
//and impute them with the SVD regression
System.out.print("Applying SVD regression... ");
kmisr = initialEstimation.getKmisr();
x = new double[nSingularValuestaken][Attributes.getNumAttributes()-1];
y = new double[Attributes.getNumAttributes()-1];
for(int i=0;i<kmisr.length;i++){
result = new double[kmisr[i].length];
for(int j=0;j<kmisr[i].length;j++){
pos = kmisr[i][j];
for(int k=0,a=0;k<Attributes.getNumAttributes();k++){
if(k!=pos){
y[a] = train.get(i, k);
a++;
}
}
for(int b=0;b<nSingularValuestaken;b++){
for(int k=0,a=0;k<Attributes.getNumAttributes();k++){
if(k!=pos){
x[b][a] = V_t.get(b, k);
a++;
}
}
}
//apply the regression
reg = new Regression(x,y);
//general linear regression
reg.linear();
coefs = reg.getCoeff();
result[j] = coefs[0];
for(int k=1;k<coefs.length;k++){
result[j] += coefs[k] * V_t.get(k-1, pos);
}
}
for(int j=0;j<kmisr[i].length;j++){
pos = kmisr[i][j];
train.set(i, pos, result[j]);
}
}
System.out.println("Done");
X = new String[IStrain.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data
data2string(train,X,IStrain);
write_results(output_train_name,X,IStrain);
/** Apply on test data **/
System.out.println("\n\n Test partition");
test = new DenseMatrix(IStest.getNumInstances(),Attributes.getNumAttributes());
for(int i=0;i<IStest.getNumInstances();i++){
inst = IStest.getInstance(i);
in = out = 0;
for(int j=0;j<Attributes.getNumAttributes();j++){
at = Attributes.getAttribute(j);
if(at.getDirectionAttribute() == Attribute.INPUT){
test.set(i, j, inst.getAllInputValues()[in]);
in++;
}
else{
test.set(i, j, inst.getAllOutputValues()[out]);
out++;
}
}
}
// Impute by means of EM regression
//the results are stored in the matrix passed by argument
initialEstimation.regem(test,IStest);
System.out.print("Computing the SVD fot the EM refined data... ");
//compute the eigenvectors from the data set
greaterEV = psings(test, nSingularValuestaken);
V_t = greaterEV.V;
System.out.println("Done");
//obtain the positions of the missing values
//previously computed by the EM algorithm
//and impute them with the SVD regression
System.out.print("Applying SVD regression... ");
kmisr = initialEstimation.getKmisr();
x = new double[nSingularValuestaken][Attributes.getNumAttributes()-1];
y = new double[Attributes.getNumAttributes()-1];
for(int i=0;i<kmisr.length;i++){
result = new double[kmisr[i].length];
for(int j=0;j<kmisr[i].length;j++){
pos = kmisr[i][j];
for(int k=0,a=0;k<Attributes.getNumAttributes();k++){
if(k!=pos){
y[a] = train.get(i, k);
a++;
}
}
for(int b=0;b<nSingularValuestaken;b++){
for(int k=0,a=0;k<Attributes.getNumAttributes();k++){
if(k!=pos){
x[b][a] = V_t.get(b, k);
a++;
}
}
}
//apply the regression
reg = new Regression(x,y);
//general linear regression
reg.linear();
coefs = reg.getCoeff();
result[j] = coefs[0];
for(int k=1;k<coefs.length;k++){
result[j] += coefs[k] * V_t.get(k-1, pos);
}
}
for(int j=0;j<kmisr[i].length;j++){
pos = kmisr[i][j];
train.set(i, pos, result[j]);
}
}
System.out.println("Done");
X = new String[IStest.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data
data2string(test,X,IStest);
write_results(output_test_name,X,IStest);
}
/**
* <p>
* Computes the rmax eigenvalues of a given matrix (with greater absolute value)
* </p>
* @param A The matrix from which we want to compute the eigenvalues
* @param rmax the maximum number of greatest eigenvalues obtained
* @return the rmax eigenvalues with greater absolute values
*/
public EV psings(DenseMatrix A, int rmax){
EV values = null;
int m,n,r;
DenseMatrix V,V_t;
double d[] = null;
double posEigen[] = null;
double d_min;
EVpair p[];
SVD sings;
m = A.numRows();
n = A.numColumns();
if(rmax > Math.min(m, n))
rmax = Math.min(m, n);
//get first rmax eigenvectors of A
sings = new SVD(m, n);
try {
sings = SVD.factorize(A);
d = sings.getS();
} catch (NotConvergedException e) {
System.err.println("Error: Matrix Singular Value Descomposition didn't converge");
// e.printStackTrace();
System.exit(1);
}
p = new EVpair[d.length];
for(int i=0;i<p.length;i++)
p[i] = new EVpair(d[i],i);
//sort in ascending order
// Arrays.sort(p);
//ensure that eigenvalues are monotonically decreasing
// int len = d.length;
// int hlen = len / 2;
// EVpair temp;
// for(int i = 0; i < hlen; i++)
// {
// temp = p[i];
// p[i] = p[len - 1 - i];
// p[len - 1 - i] = temp;
// }
Arrays.sort(p,Collections.reverseOrder());
// d_min = p[0].eigenValue * Math.max(m,n) * eps;
// r = 0;
// for(int i=0;i<d.length;i++)
// if(p[i].eigenValue>d_min)
// r++;
r = d.length;
posEigen = new double[r];
V_t = sings.getVt();
V = new DenseMatrix(rmax,n);
for(int i=0;i<Math.min(r,rmax);i++){
posEigen[i] = p[i].eigenValue;
for(int j=0;j<n;j++){
V.set(i,j,V_t.get(p[i].evIndex,j));
}
}
values = new EV(V,posEigen);
return values;
}
// Read the pattern file, and parse data into strings
protected void config_read(String fileParam) {
File inputFile = new File(fileParam);
if (inputFile == null || !inputFile.exists()) {
System.out.println("parameter " + fileParam
+ " file doesn't exists!");
System.exit(-1);
}
// begin the configuration read from file
try {
FileReader file_reader = new FileReader(inputFile);
BufferedReader buf_reader = new BufferedReader(file_reader);
// FileWriter file_write = new FileWriter(outputFile);
String line;
do {
line = buf_reader.readLine();
} while (line.length() == 0); // avoid empty lines for processing
// ->
// produce exec failure
String out[] = line.split("algorithm = ");
// alg_name = new String(out[1]); //catch the algorithm name
// input & output filenames
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("inputData = ");
out = out[1].split("\\s\"");
input_train_name = new String(out[0].substring(1,out[0].length() - 1));
input_test_name = new String(out[1].substring(0,out[1].length() - 1));
if (input_test_name.charAt(input_test_name.length() - 1) == '"')
input_test_name = input_test_name.substring(0, input_test_name
.length() - 1);
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("outputData = ");
out = out[1].split("\\s\"");
output_train_name = new String(out[0].substring(1,
out[0].length() - 1));
output_test_name = new String(out[1].substring(0,
out[1].length() - 1));
if (output_test_name.charAt(output_test_name.length() - 1) == '"')
output_test_name = output_test_name.substring(0,
output_test_name.length() - 1);
// parameters
// do {
// line = buf_reader.readLine();
// } while (line.length() == 0);
// out = line.split("seed = ");
// seed = (new Integer(out[1])).intValue();
//
/*do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("SVMtype = ");
svmType = (new String(out[1])); */
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("RegrParameter = ");
regpar = (new Double(out[1])).doubleValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("MaxIter = ");
maxit = (new Integer(out[1])).intValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("RegressionType = ");
if(out[1].compareTo("mridge")==0)
optRegression = EM.MRIDGE;
else if(out[1].compareTo("iridge")==0)
optRegression = EM.IRIDGE;
else if(out[1].compareTo("ttls")==0)
optRegression = EM.TTLS;
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("StagnationTolerance = ");
stagtol = (new Double(out[1])).doubleValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("NumberOfEigens = ");
neigs = (new Integer(out[1])).intValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("MinimumFractionOfTotalVariation = ");
minvarfrac = (new Double(out[1])).doubleValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("CovMatrixInflationFactor = ");
inflation = (new Double(out[1])).doubleValue(); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("UseRegPar = ");
useRegPar = (out[1].compareTo("Yes")==0); // parse the string into
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("NumOfSingularVectors = ");
nSingularValuestaken = (new Integer(out[1])).intValue(); // parse the string into
} catch (IOException e) {
System.out.println("IO exception = " + e);
e.printStackTrace();
System.exit(-1);
}
}
/**
* Parse the DenseMatrix of INPUT real values to a String 2D array, ready for printing
* to a file. It also fits the values to the original bounds if needed.
* @param mat The DenseMatrix with the input values in double format
* @param X The output String matrix, ready to be printed
* @param IS The InstanceSet with the original values, used to obtain the OUTPUT values
*/
protected void data2string(DenseMatrix mat, String [][] X,InstanceSet IS){
Attribute a;
Instance inst;
double value;
int in,out;
for(int i=0;i<X.length;i++){
in = 0;
out = 0;
inst = IS.getInstance(i);
for(int j=0;j<X[i].length;j++){
a = Attributes.getAttribute(j);
//older version - SVDi only used inputs attributes
// if(a.getDirectionAttribute() == Attribute.INPUT){
// value = mat.get(i, in);
// in++;
// }
// else{
// value = inst.getAllOutputValues()[out];
// out++;
// }
value = mat.get(i, j);
if(a.getType() != Attribute.NOMINAL){
if(value < a.getMinAttribute())
value = a.getMinAttribute();
else if(value > a.getMaxAttribute())
value = a.getMaxAttribute();
}
if(a.getType() == Attribute.REAL)
X[i][j] = String.valueOf(value);
else if(a.getType() == Attribute.INTEGER)
X[i][j] = String.valueOf(Math.round(value));
else{
value = Math.round(value);
if(value >= a.getNumNominalValues())
value = a.getNumNominalValues()-1;
if(value < 0)
value = 0;
X[i][j] = a.getNominalValue((int)value);
}
}
}
}
// Write data matrix X to disk, in KEEL format
protected void write_results(String output,String[][] X,InstanceSet IS){
//File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
try {
FileWriter file_write = new FileWriter(output);
file_write.write(IS.getHeader());
//now, print the normalized data
file_write.write("@data\n");
for(int i=0;i<X.length;i++){
//System.out.println(i);
file_write.write(X[i][0]);
for(int j=1;j<X[i].length;j++){
file_write.write(","+X[i][j]);
}
file_write.write("\n");
}
file_write.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
}