/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
//==================================================
//
// LLSimpute - Local Least Squares Imputation
//
// (Missing Value Estimation Package)
//
// Author: Hyunsoo Kim
// Date: Fall/2003 - Spring/2004
// E-mail: hskim@cs.umn.edu
// Personal homepage:
// http://www.cs.umn.edu/~hskim
// Reference: Missing value estimation for DNA microarray gene
// expression data: Local Least Squares Imputation, H. Kim,
// G. H. Golub, and H. Park, Bioinformatics, to appear, 2004.
// This software may be free downloaded from site:
// http://www.cs.umn.edu/~hskim/tools.html
// License:
// It is free for academic or nonprofit insistutions.
// All right is reserved regarding commecial usage.
// Please consult if you try to use this package for
// commercial purpose.
// Comments:
// Please let me know if you have done any improvement.
//
// Sample Usage:
// // please use miss0.mat distributed in the same package
// load miss0.mat
// // read miss0.mat and impute
// E=impute_llsq_l2_blind(0,1,210);
// idx=find(miss_matrix==1e99);
// answer=matrix(idx);
// guess=E(idx);
// nrmse=sqrt(mean((guess-answer).^2))/std(answer)
// you will see ---> nrmse=0.5145
//
// Description:
//
// function E=impute_llsq_l2_blind(set,fig,mink)
// impute the missing values without k-value estimator
//
// Input parameter:
// set - the number of set (if set=0, it reads miss0.mat)
// fig - draw helpful figure and echo some comments
// mink - the number of nearest neighbor genes
// Output parameter:
// E - the estimated matrix (if set=0, it writes e0.csv and e0.mat)
// Data structure:
// miss0.mat should contain miss_matrix variable.
// missing values of miss_matrix should be 1e99.
// Needed other products:
// impute_rowavg.m
//
//====================================================
// Adapted to Java for KEEL by Julian Luengo
// julianlm@decsai.ugr.es
//====================================================
package keel.Algorithms.Preprocess.Missing_Values.LLSImpute;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import keel.Algorithms.Preprocess.Missing_Values.EM.util.MachineAccuracy;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Dataset.DatasetException;
import keel.Dataset.HeaderFormatException;
import keel.Dataset.Instance;
import keel.Dataset.InstanceSet;
import no.uib.cipr.matrix.*;
/**
* This class implements the Local Least Squares Imputation
* @author Julian Luengo Martin
*/
public class LLSImpute {
boolean f_rowaverage;
int mink = 210;
int initialMink;
DenseMatrix A,B,Apart,Bpart;
DenseVector w;
static double eps = MachineAccuracy.EPSILON; //Floating-point relative accuracy
InstanceSet IStrain;
InstanceSet IStest;
String input_train_name = new String();
String input_test_name = new String();
String output_train_name = new String();
String output_test_name = new String();
String temp = new String();
String data_out = new String("");
public LLSImpute(){
super();
}
/**
* Constructor which extract the parameters from a KEEL pattern file and
* initializes the InstanceSet structures
* @param fileParam
*/
public LLSImpute(String fileParam) {
config_read(fileParam);
initialMink = mink;
IStrain = new InstanceSet();
IStest = new InstanceSet();
try {
IStrain.readSet(input_train_name, true);
IStest.readSet(input_test_name, false);
} catch (DatasetException e) {
System.err.println("Data set loading error, now exiting EM");
e.printStackTrace();
System.exit(-1);
} catch (HeaderFormatException e) {
System.err.println("Data set loading error, now exiting EM");
e.printStackTrace();
System.exit(-1);
}
}
/**
* Parse the paramete file in KEEL format to obtain the parameters and working files
* @param fileParam Pattern file in KEEL format
*/
protected void config_read(String fileParam) {
File inputFile = new File(fileParam);
if (inputFile == null || !inputFile.exists()) {
System.out.println("parameter " + fileParam
+ " file doesn't exists!");
System.exit(-1);
}
// begin the configuration read from file
try {
FileReader file_reader = new FileReader(inputFile);
BufferedReader buf_reader = new BufferedReader(file_reader);
// FileWriter file_write = new FileWriter(outputFile);
String line;
do {
line = buf_reader.readLine();
} while (line.length() == 0); // avoid empty lines for processing
// ->
// produce exec failure
String out[] = line.split("algorithm = ");
// alg_name = new String(out[1]); //catch the algorithm name
// input & output filenames
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("inputData = ");
out = out[1].split("\\s\"");
input_train_name = new String(out[0].substring(1,out[0].length() - 1));
input_test_name = new String(out[1].substring(0,out[1].length() - 1));
if (input_test_name.charAt(input_test_name.length() - 1) == '"')
input_test_name = input_test_name.substring(0, input_test_name
.length() - 1);
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("outputData = ");
out = out[1].split("\\s\"");
output_train_name = new String(out[0].substring(1,
out[0].length() - 1));
output_test_name = new String(out[1].substring(0,
out[1].length() - 1));
if (output_test_name.charAt(output_test_name.length() - 1) == '"')
output_test_name = output_test_name.substring(0,
output_test_name.length() - 1);
// parameters
do {
line = buf_reader.readLine();
} while (line.length() == 0);
out = line.split("Mink = ");
mink = (new Integer(out[1])).intValue(); // parse the string into
} catch (IOException e) {
System.out.println("IO exception = " + e);
e.printStackTrace();
System.exit(-1);
}
}
/**
* Function that runs the LLSImpute over the data sets given in the pattern file in
* KEEL format
*/
public void run(){
DenseMatrix train,test,E;
Instance inst;
String mat = new String();
String[][] X;
int in,out;
Attribute a;
int maxMink;
//put the train data into a DenseMatrix Class, looking for easier matrix operations
train = new DenseMatrix(IStrain.getNumInstances(),Attributes.getNumAttributes());
maxMink = 0;
for(int i=0;i<IStrain.getNumInstances();i++){
inst = IStrain.getInstance(i);
in = out = 0;
if(!inst.existsAnyMissingValue())
maxMink++;
for(int j=0;j<Attributes.getNumAttributes();j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute() == Attribute.INPUT){
train.set(i, j, inst.getAllInputValues()[in]);
in++;
}
else{
train.set(i, j, inst.getAllOutputValues()[out]);
out++;
}
}
}
System.out.println("\nProcessing the train partition");
if(mink > maxMink)
mink = maxMink;
E = impute_llsq_l2_blind(train,IStrain);
//convert the estimated matrix to a String matrix ready to be printed
X = new String[IStrain.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data
data2string(E,X,IStrain);
write_results(output_train_name,X,IStrain);
System.out.println("Done");
//*************************************************************************
//proceed with the test partition
//*************************************************************************
test = new DenseMatrix(IStest.getNumInstances(),Attributes.getNumAttributes());
maxMink = 0;
for(int i=0;i<IStest.getNumInstances();i++){
inst = IStest.getInstance(i);
in = out = 0;
if(!inst.existsAnyMissingValue())
maxMink++;
for(int j=0;j<Attributes.getNumAttributes();j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute() == Attribute.INPUT){
test.set(i, j, inst.getAllInputValues()[in]);
in++;
}
else{
test.set(i, j, inst.getAllOutputValues()[out]);
out++;
}
}
}
System.out.println("\nProcessing the test partition");
if(mink > maxMink)
mink = maxMink;
E = impute_llsq_l2_blind(test,IStest);
//convert the estimated matrix to a String matrix ready to be printed
X = new String[IStest.getNumInstances()][Attributes.getNumAttributes()];//matrix with transformed data
data2string(E,X,IStest);
write_results(output_test_name,X,IStest);
System.out.println("Done");
}
/**
* Function that applies the Local Least Squares Imputation to a given array
* @param train The Matrix with the data
* @param IS The original Instance set, used for reference (attributes, etc.)
* @return A new allocated DenseMatrix of same size than 'train' which has the missing values imputed
*/
public DenseMatrix impute_llsq_l2_blind(DenseMatrix train,InstanceSet IS){
int m,n,total,minexp,fid,m_gene_include,k_max,len_miss,len_nomiss,in,out;
Attribute a;
ArrayList<Integer> gene_include = new ArrayList<Integer>();
ArrayList<Integer> missidxj = new ArrayList<Integer>();
ArrayList<Integer> nomissidxj = new ArrayList<Integer>();
ArrayList<Double> guess = new ArrayList<Double>();
Instance inst;
DenseMatrix tmp,E;
DenseVector X=null,estimate=null;
m = train.numRows();
n = train.numColumns();
total = m*n;
//the number of minimal experiments for estimating missing values by llsq
minexp = 2;
//minexp=n*0.3;
fid = 1;
//initial guess
f_rowaverage = false; // turn-off default rowaverage
//f_rowaverage = true; % default rowaverage
//if no complete example is available, force row average :(
if( f_rowaverage || mink==0) {
System.out.println("consider all instances/genes after imputing missing values by row-average.");
E = impute_rowavg(train,minexp,IS);
//[E]=impute_knn(miss_matrix);
for(int i=0;i<m;i++){
gene_include.add(i);
}
f_rowaverage=true;
mink = Math.min(initialMink, E.numRows()-1);
}else{
System.out.println("exclude instances/genes that have missing values for accurate imputation.");
E = train.copy();
for(int i=0;i<m;i++){
if(!IS.getInstance(i).existsAnyMissingValue())
gene_include.add(i);
}
//set minexp to 0 since we do not perform rowaverage
minexp=0;
f_rowaverage=false;
}
m_gene_include = gene_include.size();
k_max = m_gene_include;
System.out.println("\n");
System.out.println("----------------------------------------------");
System.out.println("LLSimpute/L2/ITER");
System.out.println("----------------------------------------------");
System.out.println("\n");
System.out.println("miss_matrix("+m+","+n+") total: "+total+" minexp: "+minexp+" f_rowaverage: "+f_rowaverage+"\n");
System.out.print("Estimating missing values...");
for(int i=0;i<m;i++){
missidxj.clear();
nomissidxj.clear();
inst = IS.getInstance(i);
//get the attributes with missing values, and without them
in = out = 0;
for(int j=0;j<Attributes.getNumAttributes();j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute() == Attribute.INPUT){
if(inst.getInputMissingValues()[in])
missidxj.add(j);
else
nomissidxj.add(j);
in++;
}
else{
if( inst.getOutputMissingValues()[out])
missidxj.add(j);
else
nomissidxj.add(j);
out++;
}
}
len_miss = missidxj.size();
len_nomiss = nomissidxj.size();
if ( ((len_nomiss < minexp) || (len_nomiss < 2)) && (f_rowaverage)){
System.out.println(i+"th gene: skip due to nomiss_exp("+len_nomiss+")<"+minexp+" or < 2");
}else if (len_miss > 0){
//if fig==1
//fprintf('%dth: gene apply llsq --- %d missing\n', i, len_miss);
//end
similargene(i,missidxj,nomissidxj,m,n,train,E,gene_include,m_gene_include);
//answer=[answer; matrix(i,missidxj)'];
// for mink
//Apart=A(1:mink,:);
tmp = new DenseMatrix(mink,A.numColumns());
for(int j=0;j<mink;j++){
for(int k=0;k<A.numColumns();k++){
tmp.set(j, k, A.get(j, k));
}
}
//Apart=Apart';
Apart = new DenseMatrix(tmp.numColumns(),tmp.numRows());
tmp.transpose(Apart);
//Bpart=B(1:mink,:);
Bpart = new DenseMatrix(mink,B.numColumns());
for(int j=0;j<mink;j++){
for(int k=0;k<B.numColumns();k++){
Bpart.set(j, k, B.get(j, k));
}
}
//linear combination of experiments
//X = pinv(Apart)*w';
tmp = pinv(Apart);
X = new DenseVector(tmp.numRows());
tmp.mult(w,X);
//guess = [guess; Bpart'*X];
estimate = new DenseVector(Bpart.numColumns());
tmp = new DenseMatrix(Bpart.numColumns(),Bpart.numRows());
Bpart.transpose(tmp);
tmp.mult(X, estimate);
for(int j=0;j<estimate.size();j++)
guess.add(estimate.get(j));
}//if
}//i
//store estimated values in the final matrix which will be printed
int s = 0;
for(int i=0;i<m;i++){
missidxj.clear();
nomissidxj.clear();
inst = IS.getInstance(i);
//get the attributes with missing values, and without them
in = out = 0;
for(int j=0;j<Attributes.getNumAttributes();j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute() == Attribute.INPUT){
if(inst.getInputMissingValues()[in])
missidxj.add(j);
else
nomissidxj.add(j);
in++;
}
else{
if( inst.getOutputMissingValues()[out])
missidxj.add(j);
else
nomissidxj.add(j);
out++;
}
}
len_miss = missidxj.size();
len_nomiss = nomissidxj.size();
if ( ((len_nomiss < minexp) || (len_nomiss < 2)) && (f_rowaverage)){
//skip
}else if (len_miss > 0){
for(int j=0;j<missidxj.size();j++){
E.set(i, missidxj.get(j), guess.get(s));
s++;
}
}
}
return E;
}
/**
* Computes the most similar (nearest) instances to a given one. The result are stoed
* in matrix A,B and vector w, which are fields of the current object.
* @param i The number of the given instance in the E matrix (i.e. the row number)
* @param missidxj Indices of the missing attributes
* @param nomissidxj Indices of the non-missing (complete) attributes
* @param m Rows of the E matrix
* @param n Columns of the E matrix
* @param miss_matrix Original matrix which has no missing value imputed, same size of E
* @param E Working matrix which has the previous found missing values estimated
* @param gene_include The indices of the genes (rows or instances) which we will consider as neighbours to instance i
* @param m_gene_include Number of neighbours to be considered (size of gene_include)
*/
public void similargene(int i,ArrayList<Integer> missidxj,ArrayList<Integer> nomissidxj,int m,
int n, DenseMatrix miss_matrix,DenseMatrix E,ArrayList<Integer> gene_include,int m_gene_include){
int mm1,mm2,pos;
DenseMatrix BB1,tmp2,tmp3,tmp4;
DenseVector AA1,BB2,tmp;
double AA2,distance;
ArrayList<IndexValuePair> sorted;
int gene[];
// L2-norm distance calculation
mm1=1;
mm2=m_gene_include;
AA1 = new DenseVector(nomissidxj.size());
for(int j=0;j<nomissidxj.size();j++){
AA1.set(j,E.get(i,nomissidxj.get(j)));
}
BB1 = new DenseMatrix(gene_include.size(),nomissidxj.size());
for(int j=0;j<gene_include.size();j++){
for(int k=0;k<nomissidxj.size();k++){
BB1.set(j, k, E.get(gene_include.get(j), nomissidxj.get(k)));
}
}
tmp = new DenseVector(AA1);
for(int j=0;j<tmp.size();j++)
tmp.set(j, tmp.get(j)*tmp.get(j));
AA2 = sum(tmp);
tmp2 = new DenseMatrix(BB1);
for(int j=0;j<tmp2.numRows();j++)
for(int k=0;k<tmp2.numColumns();k++)
tmp2.set(j, k, Math.pow(tmp2.get(j, k),2));
BB2 = sumbyRows(tmp2);
//distance=repmat(AA2,1,mm2)+repmat(BB2',mm1,1)-2*AA1*BB1';
//let's begin with the operations
tmp2 = new DenseMatrix(1,1);
tmp2.set(0, 0, AA2);
//tmp2=repmat(AA2,1,mm2)
tmp2 = repmat(tmp2,1,mm2);
//tmp4 = BB2', since BB2 is a column vector, tmp4 will be a row vector
tmp4 = new DenseMatrix(1,BB2.size());
for(int j=0;j<BB2.size();j++)
tmp4.set(0,j,BB2.get(j));;
tmp3 = repmat(tmp4,mm1,1);
//tmp2 = repmat(AA2,1,mm2)+repmat(BB2',mm1,1)
tmp2.add(tmp3);
tmp3 = new DenseMatrix(1,AA1.size());
for(int j=0;j<AA1.size();j++)
tmp3.set(0, j, AA1.get(j));
//tmp4 = -2*AA1*BB1'
tmp4 = new DenseMatrix(tmp3.numRows(),BB1.numRows());
tmp3.transBmult(-2.0,BB1, tmp4);
//tmp2 = distance!
tmp2.add(tmp4);
//sort the distances
sorted = new ArrayList<IndexValuePair>();
for(int j=0;j<tmp2.numColumns();j++)
sorted.add(new IndexValuePair(tmp2.get(0, j),j));
Collections.sort(sorted);
// gene number
if(f_rowaverage)
gene = new int[sorted.size()-1];
else
gene = new int[sorted.size()];
for(int j=0,k=0;j<sorted.size();j++){
pos = sorted.get(j).index;
if(gene_include.get(pos)!=i){
gene[k] = gene_include.get(pos);
k++;
}
}
//A=E(gene,nomissidxj);
A = new DenseMatrix(gene.length,nomissidxj.size());
for(int j=0;j<gene.length;j++){
for(int k=0;k<nomissidxj.size();k++){
A.set(j, k, E.get(gene[j], nomissidxj.get(k)));
}
}
//B=E(gene,missidxj);
B = new DenseMatrix(gene.length,missidxj.size());
for(int j=0;j<gene.length;j++){
for(int k=0;k<missidxj.size();k++){
B.set(j, k, E.get((int)gene[j], missidxj.get(k)));
}
}
//w=miss_matrix(i,nomissidxj);
w = new DenseVector(nomissidxj.size());
for(int j=0;j<nomissidxj.size();j++){
w.set(j, miss_matrix.get(i, nomissidxj.get(j)));
}
}
/**
* Perform the row-average of given matrix
* @param miss_matrix The original matrix with all missing values
* @param minexp The minimum number of non-missing values to compute the row-average
* @param IS The reference InstanceSet
*
*/
public DenseMatrix impute_rowavg(DenseMatrix miss_matrix,int minexp,InstanceSet IS){
int exp,nomissidxj,in,out;
int m = miss_matrix.numRows();
int n = miss_matrix.numColumns();
double avg;
Attribute a;
int gene0,gene1;
Instance inst;
DenseMatrix E = miss_matrix.copy();
System.out.println("Generating row-averaged E...");
gene0=0;
gene1=0;
for (int i=0;i<m;i++){
avg = 0;
exp = 0;
inst = IS.getInstance(i);
in = out = 0;
for(int j=0;j<n;j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute()==Attribute.INPUT){
if(!inst.getInputMissingValues(in)){
avg += miss_matrix.get(i, j);
exp++;
}
in++;
}
if(a.getDirectionAttribute()==Attribute.OUTPUT){
if(!inst.getOutputMissingValues(out)){
avg += miss_matrix.get(i, j);
exp++;
}
out++;
}
}
if(exp == n)
gene0++;
else if(exp < minexp)
gene1++;
avg = (double)avg / exp;
in = out = 0;
for(int j=0;j<n;j++){
a = Attributes.getAttribute(j);
if(a.getDirectionAttribute()==Attribute.INPUT){
if(inst.getInputMissingValues(in)){
E.set(i, j, avg);
}
in++;
}
if(a.getDirectionAttribute()==Attribute.OUTPUT){
if(inst.getOutputMissingValues(out)){
E.set(i, j, avg);
}
out++;
}
}
}
System.out.println("the number of genes that have no non-missing entries: "+gene0);
System.out.println("the number of genes that have less than "+ minexp +" non-missing entries: "+gene1);
return E;
}
/**
* Computes the pseudoinverse of matrix A -> pinv(A) = V * pinv(S) * U'
* That is, Moore-Penrose pseudoinverse of a matrix
* If A is square and not singular, then pinv(A) is an expensive way to compute inv(A)
* @param A The matrix from we compute the pseudoinverse
* @return The pseudoinverse of matrix A
*/
public static DenseMatrix pinv(DenseMatrix A) {
DenseMatrix inv,tmp,tmp2;
DenseMatrix pinvSingVal;
double tol;
double sing[];
// double data[] = new double[]{64, 2,3,61,60, 6, 9,55,54,12,13,51,17,47,46,20,21,43,
// 40,26,27,37,36,30,32,34,35,29,28,38,41,23,22,44,45,19,49,15,14,52,53,11, 8,58,59, 5, 4,62};
// double square[] = new double[]{1,2,3, 5,8,7, 8,1,4};
// DenseMatrix A = new DenseMatrix(8,6);
DenseMatrix B = new DenseMatrix(3,3);
// for(int i=0,k=0;i<8;i++){
// for(int j=0;j<6;j++,k++){
// A.set(i, j, data[k]);
// }
// }
// for(int i=0,k=0;i<3;i++){
// for(int j=0;j<3;j++,k++){
// B.set(i, j, square[k]);
// }
// }
// inv = inv(B);
// DenseMatrix id = new DenseMatrix(8,8);
// id.zero();
// for(int i=0;i<8;i++)
// id.set(i, i, 1);
SVD svd;
try {
//the pseudoinverse can be computed as:
// pinv(A) = V * pinv(S) * U'
//where U,V and S are obtained from the Singular Value Decomposition of A
svd = SVD.factorize(A);
//compute the pseudoinverse of the Singular values
//i.e. the reciprocal of those singular values (the inverse)
//since the singular values are in the diagonal of the matrix
sing = svd.getS();
pinvSingVal = new DenseMatrix(svd.getVt().numRows(), svd.getU().numColumns());
pinvSingVal.zero();
//we also state the minimum threshold for the singular values
tol = Math.max(A.numRows(), A.numColumns());
tol *= sing[0] * eps;
//we take the pinv of Singular values vector.
//since it is a diagonal matrix, the pseudoinverse is the inverse of the
//diagonal elemnts, in a matrix with transposed dimensions
for(int i=0;i<sing.length;i++){
//if the singular value is too small, the reciprocal will be enormous!
//for this reason, we only use the values of at least 'tol'
if(sing[i] > tol)
pinvSingVal.set(i, i, 1.0/sing[i]);
}
//compute tmp = V * pinv(S)
tmp = new DenseMatrix(svd.getVt().numColumns(),pinvSingVal.numColumns());
svd.getVt().transAmult(pinvSingVal, tmp);
//At last, compute the pseudoinverse
tmp2 = new DenseMatrix(svd.getU().numRows(),svd.getU().numColumns());
for(int i=0;i<tmp2.numRows();i++){
for(int j=0;j<tmp2.numColumns();j++){
tmp2.set(i, j, svd.getU().get(i, j));
}
}
inv = new DenseMatrix(A.numColumns(),A.numRows());
tmp.transBmult(tmp2, inv);
// for(int i=0;i<inv.numRows();i++){
// for(int j=0;j<inv.numColumns();j++){
// System.out.print(inv.get(i, j)+" ");
// }
// System.out.println();
// }
return inv;
} catch (NotConvergedException e) {
System.err.println("Error: The SVD did not converge :(");
e.printStackTrace();
System.exit(1);
}
return null;
}
/**
* Computes the inverse of a square non-singular Matrix
* @param A The Matrix from which we will compute the inverse
* @return A new allocated matrix which contains the inverse of A
*/
public static DenseMatrix inv(DenseMatrix A){
DenseMatrix identity = new DenseMatrix(A.numRows(),A.numRows());
DenseLU lu;
//compute the LU decomposition of A
lu = DenseLU.factorize(A);
//create the Identity matrix which would be the result of
//A�A'
identity.zero();
for(int i=0;i<identity.numRows();i++){
identity.set(i, i, 1);
}
//solve the inverse solving the system (note that identity is overwritten!)
lu.solve(identity);
return identity;
}
/**
* Compute the sum of all members of vector v
* @param v The reference vector
* @return Summatory of all elements of v
*/
public double sum(DenseVector v){
double total = 0;
for(int i=0;i<v.size();i++)
total += v.get(i);
return total;
}
/**
* From a given matrix mat, it performs the summatory by rows of such matrix
* @param mat The reference matrix
* @return A new allocated array with the sum of each row's elements
*/
public DenseVector sumbyRows(DenseMatrix mat){
DenseVector v = new DenseVector(mat.numRows());
for(int i=0;i<mat.numRows();i++){
v.set(i, 0);
for(int j=0;j<mat.numColumns();j++){
v.add(i, mat.get(i, j));
}
}
return v;
}
/**
* Replicate and tile an array.
* B = repmat(A,m,n) creates a large matrix B consisting of an m-by-n tiling of copies of A. The size of B is [size(A,1)*m, (size(A,2)*n].
* The statement repmat(A,n) creates an n-by-n tiling.
* @param mat The original matrix
* @param m Number of rowwise replications
* @param n Number of columnwise replications
* @return A new allocated matrix with the tiling of matrix mat
*/
public DenseMatrix repmat(DenseMatrix mat,int m, int n){
int totalRow = m * mat.numRows();
int totalCol = n * mat.numColumns();
DenseMatrix newMat = new DenseMatrix(totalRow,totalCol);
for(int i=0;i<totalRow;i++){
for(int j=0;j<totalCol;j++){
newMat.set(i, j, mat.get(i%mat.numRows(), j%mat.numColumns()));
}
}
return newMat;
}
/**
* Parse the DenseMatrix of INPUT real values to a String 2D array, ready for printing
* to a file. It also fits the values to the original bounds if needed.
* @param mat The DenseMatrix with the input values in double format
* @param X The output String matrix, ready to be printed
* @param IS The InstanceSet with the original values, used to obtain the OUTPUT values
*/
protected void data2string(DenseMatrix mat, String [][] X,InstanceSet IS){
Attribute a;
Instance inst;
double value;
int in,out;
for(int i=0;i<X.length;i++){
in = 0;
out = 0;
inst = IS.getInstance(i);
for(int j=0;j<X[i].length;j++){
a = Attributes.getAttribute(j);
value = mat.get(i, j);
if(a.getType() != Attribute.NOMINAL){
if(value < a.getMinAttribute())
value = a.getMinAttribute();
else if(value > a.getMaxAttribute())
value = a.getMaxAttribute();
}
if(a.getType() == Attribute.REAL)
X[i][j] = String.valueOf(value);
else if(a.getType() == Attribute.INTEGER)
X[i][j] = String.valueOf(Math.round(value));
else{
value = Math.round(value);
if(value >= a.getNumNominalValues())
value = a.getNumNominalValues()-1;
if(value < 0)
value = 0;
X[i][j] = a.getNominalValue((int)value);
}
}
}
}
/** Write data matrix X to disk, in KEEL format
* @param output The file to which we print
* @param X The 2D array with the values of the attributes parsed to a string
* @param IS The reference InstanceSet
*/
protected void write_results(String output,String[][] X,InstanceSet IS){
//File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
try {
FileWriter file_write = new FileWriter(output);
file_write.write(IS.getHeader());
//now, print the normalized data
file_write.write("@data\n");
for(int i=0;i<X.length;i++){
//System.out.println(i);
file_write.write(X[i][0]);
for(int j=1;j<X[i].length;j++){
file_write.write(","+X[i][j]);
}
file_write.write("\n");
}
file_write.close();
} catch (IOException e) {
System.out.println("IO exception = " + e );
System.exit(-1);
}
}
}