/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MatlabICA.java
* Copyright (C) 2002 Sugato Basu and Mikhail Bilenko
*
*/
package weka.attributeSelection;
import java.io.*;
import java.util.*;
import weka.core.*;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.Filter;
/**
* Class for performing independent components analysis/transformation. <p>
*
* Valid options are:<p>
* -D <br>
* Don't normalize the input data. <p>
*
* -T <br>
* Transform through the IC space and back to the original space. <p>
*
* -N <br> num
* Number of independant components
*
* -A <br> approach
* ICA Approach
*
* -F <br> function
* ICA function
*
* @author Sugato Basu
* @author Mikhail Bilenko
* @version $Revision: 1.2 $
*/
public class MatlabICA extends AttributeEvaluator
implements AttributeTransformer, OptionHandler {
/** The data to transform analyse/transform */
private Instances m_trainInstances;
/** Keep a copy for the class attribute (if set) */
private Instances m_trainCopy;
/** The header for the transformed data format */
private Instances m_transformedFormat;
/** The header for data transformed back to the original space */
private Instances m_originalSpaceFormat;
/** Data has a class set */
private boolean m_hasClass;
/** Class index */
private int m_classIndex;
/** Number of attributes */
private int m_numAttribs;
/** Number of instances */
private int m_numInstances;
/** Name of the Matlab program file that computes ICA */
protected String m_ICAMFile = new String("/var/local/MatlabICA.m");
/** Will hold the mixing matrix */
protected double [][] m_mixingMatrix;
/** Will hold the inverse of the mixing matrix */
protected double [][] m_inverseMixingMatrix;
/** Will hold the independent components */
protected double [][] m_independentComponents;
/** A timestamp suffix for matching vectors with attributes */
String m_timestamp = null;
/** Name of the file where attribute names will be stored */
String m_icaAttributeFilename = null;
/** Name of the file where attribute names will be stored */
String m_icaAttributeFilenameBase = new String("/var/local/ICAattributes");
/** Name of the file where dataMatrix will be stored */
public String m_dataFilename = new String("/var/local/ICAdataMatrix.txt");
/** Name of the file where mixingMatrix will be stored */
public String m_mixingMatrixFilename = null;
public String m_mixingMatrixFilenameBase = new String("/var/local/ICAmixingMatrix");
/** Name of the file where inverseMixingMatrix will be stored */
public String m_inverseMixingMatrixFilename = new String("/var/local/ICAinverseMixingMatrix.txt");
/** Name of the file where independentComponents will be stored */
public String m_independentComponentsFilename = null;
public String m_independentComponentsFilenameBase = new String("/var/local/ICAindependentComponents");
/** Filters for original data */
private ReplaceMissingValues m_replaceMissingFilter;
private Normalize m_normalizeFilter;
private Remove m_attributeFilter;
/** The number of attributes in the ic transformed data */
private int m_outputNumAtts = -1;
/** normalize the input data? */
private boolean m_normalize = true;
/** transform the data through the ic space and back to the original
space ? */
private boolean m_transBackToOriginal = false;
/** The attribute evaluator to use */
private ASEvaluation m_eval = new weka.attributeSelection.ChiSquaredAttributeEval();
/** load eigenvalues of covariance matrix from file? */
protected boolean m_loadEigenValuesFromFile = false;
/** set m_loadEigenValuesFromFile */
public void setLoadEigenValuesFromFile(boolean choice) {
m_loadEigenValuesFromFile = choice;
}
/** get m_loadEigenValuesFromFile */
public boolean getLoadEigenValuesFromFile () {
return m_loadEigenValuesFromFile;
}
/** load eigenvectors of covariance matrix from file? */
protected boolean m_loadEigenVectorsFromFile = false;
/** set m_loadEigenVectorsFromFile */
public void setLoadEigenVectorsFromFile(boolean choice) {
m_loadEigenVectorsFromFile = choice;
}
/** get m_loadEigenVectorsFromFile */
public boolean getLoadEigenVectorsFromFile () {
return m_loadEigenVectorsFromFile;
}
/** number of Independent Components */
protected int m_NumIndependentComponents = 2;
/** set number of Independent Components */
public void setNumIndependentComponents(int n) {
m_NumIndependentComponents = n;
System.out.println("Number of ICA components: " + n);
}
/** get number of Independent Components */
public int getNumIndependentComponents() {
return m_NumIndependentComponents;
}
/* Define possible ICA approaches */
public static final int APPROACH_SYMM = 0;
public static final int APPROACH_DEFL = 1;
public static final Tag[] TAGS_APPROACH = {
new Tag(APPROACH_SYMM, "symm"),
new Tag(APPROACH_DEFL, "defl")
};
protected int m_ICAapproach = APPROACH_SYMM;
/** get ICA approach */
public SelectedTag getICAapproach ()
{
return new SelectedTag(m_ICAapproach, TAGS_APPROACH);
}
/** set ICA approach */
public void setICAapproach (SelectedTag approach)
{
if (approach.getTags() == TAGS_APPROACH) {
System.out.println("Approach: " + approach.getSelectedTag().getReadable());
m_ICAapproach = approach.getSelectedTag().getID();
}
}
/* Define possible ICA functions */
public static final int FUNCTION_TANH = 0;
public static final int FUNCTION_GAUSS = 1;
public static final int FUNCTION_POW3 = 2;
public static final int FUNCTION_SKEW = 3;
public static final Tag[] TAGS_FUNCTION = {
new Tag(FUNCTION_TANH, "tanh"),
new Tag(FUNCTION_GAUSS, "gauss"),
new Tag(FUNCTION_POW3, "pow3"),
new Tag(FUNCTION_SKEW, "skew")
};
protected int m_ICAfunction = FUNCTION_TANH;
/** get ICA function */
public SelectedTag getICAfunction ()
{
return new SelectedTag(m_ICAfunction, TAGS_FUNCTION);
}
/** set ICA function */
public void setICAfunction (SelectedTag function)
{
if (function.getTags() == TAGS_FUNCTION) {
System.out.println("Function: " + function.getSelectedTag().getReadable());
m_ICAfunction = function.getSelectedTag().getID();
}
}
/**
* Returns a string describing this attribute transformer
* @return a description of the evaluator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Performs a independent components analysis and transformation of "
+"the data. Use in conjunction with a Ranker search. Dimensionality "
+"reduction is accomplished by choosing enough eigenvectors to "
+"account for some percentage of the variance in the original data---"
+"default 0.95 (95%). Attribute noise can be filtered by transforming "
+"to the IC space, eliminating some of the worst eigenvectors, and "
+"then transforming back to the original space.";
}
/**
* Returns an enumeration describing the available options. <p>
*
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(3);
newVector.addElement(new Option("\tDon't normalize input data."
, "D", 0, "-D"));
newVector.addElement(new Option("\tTransform through the IC space and "
+"\n\tback to the original space."
, "O", 0, "-O"));
newVector.addElement(new Option("\tNumber of independant components."
, "N", 1, "-N"));
newVector.addElement(new Option("\tICA approach."
, "A", 1, "-A"));
newVector.addElement(new Option("\tICA function."
, "F", 1, "-F"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:<p>
* -D <br>
* Don't normalize the input data. <p>
*
* -T <br>
* Transform through the IC space and back to the original space. <p>
*
* -N <br> num
* Number of independant components
*
* -A <br> approach
* ICA Approach
*
* -F <br> function
* ICA function
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions (String[] options)
throws Exception
{
resetOptions();
String optionString;
setNormalize(!Utils.getFlag('D', options));
setTransformBackToOriginal(Utils.getFlag('O', options));
optionString = Utils.getOption('F', options);
if (optionString.length() != 0) {
setICAfunction(new SelectedTag(Integer.parseInt(optionString), TAGS_FUNCTION));
}
optionString = Utils.getOption('A', options);
if (optionString.length() != 0) {
setICAapproach(new SelectedTag(Integer.parseInt(optionString), TAGS_APPROACH));
}
optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumIndependentComponents(Integer.parseInt(optionString));
}
}
/**
* Reset to defaults
*/
private void resetOptions() {
m_normalize = false;
m_transBackToOriginal = false;
m_ICAfunction = 0;
m_ICAapproach = 0;
m_eval = new weka.attributeSelection.ChiSquaredAttributeEval();
m_NumIndependentComponents = 2;
}
/**
* Sets the attribute evaluator
*
* @param evaluator the evaluator with all options set.
*/
public void setEvaluator(ASEvaluation evaluator) {
m_eval = evaluator;
}
/**
* Gets the attribute evaluator used
*
* @return the attribute evaluator
*/
public ASEvaluation getEvaluator() {
return m_eval;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String normalizeTipText() {
return "Normalize input data.";
}
/**
* Set whether input data will be normalized.
* @param n true if input data is to be normalized
*/
public void setNormalize(boolean n) {
m_normalize = n;
}
/**
* Gets whether or not input data is to be normalized
* @return true if input data is to be normalized
*/
public boolean getNormalize() {
return m_normalize;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String transformBackToOriginalTipText() {
return "Transform through the IC space and back to the original space. "
+"If only the best n ICs are retained (by setting varianceCovered < 1) "
+"then this option will give a dataset in the original space but with "
+"less attribute noise.";
}
/**
* Sets whether the data should be transformed back to the original
* space
* @param b true if the data should be transformed back to the
* original space
*/
public void setTransformBackToOriginal(boolean b) {
m_transBackToOriginal = b;
}
/**
* Gets whether the data is to be transformed back to the original
* space.
* @return true if the data is to be transformed back to the original space
*/
public boolean getTransformBackToOriginal() {
return m_transBackToOriginal;
}
/**
* Gets the current settings of MatlabICA
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[10];
int current = 0;
if (!getNormalize()) {
options[current++] = " -D";
}
if (getTransformBackToOriginal()) {
options[current++] = " -O";
}
options[current++] = " -A";
options[current++] = "" + getICAapproach().getSelectedTag().getReadable();
options[current++] = " -F";
options[current++] = "" + getICAfunction().getSelectedTag().getReadable();
options[current++] = " -N";
options[current++] = "" + getNumIndependentComponents();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Initializes independent components and performs the analysis
* @param data the instances to analyse/transform
* @exception Exception if analysis fails
*/
public void buildEvaluator(Instances data) throws Exception {
buildAttributeConstructor(data);
}
private void buildAttributeConstructor (Instances data) throws Exception {
System.out.println("data.numInstances: " + data.numInstances());
m_independentComponents = null;
m_outputNumAtts = -1;
m_attributeFilter = null;
if (data.checkForStringAttributes()) {
throw new UnsupportedAttributeTypeException("Can't handle string attributes!");
}
m_trainInstances = data;
System.out.println("ClassIndex is " + m_trainInstances.classIndex());
// make a copy of the training data so that we can get the class
// column to append to the transformed data (if necessary)
m_trainCopy = new Instances(m_trainInstances);
System.out.println("Copied instances");
m_replaceMissingFilter = new ReplaceMissingValues();
m_replaceMissingFilter.setInputFormat(m_trainInstances);
m_trainInstances = Filter.useFilter(m_trainInstances,
m_replaceMissingFilter);
System.out.println("Replaced missing values");
if (m_normalize) {
m_normalizeFilter = new Normalize();
m_normalizeFilter.setInputFormat(m_trainInstances);
m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);
System.out.println("Normalized");
}
// delete any attributes with only one distinct value or are all missing
Vector deleteCols = new Vector();
for (int i=0;i<m_trainInstances.numAttributes();i++) {
if (m_trainInstances.numDistinctValues(i) <=1) {
deleteCols.addElement(new Integer(i));
}
}
System.out.println("Deleted single-value attributes");
if (m_trainInstances.classIndex() >=0) {
// get rid of the class column
m_hasClass = true;
m_classIndex = m_trainInstances.classIndex();
deleteCols.addElement(new Integer(m_classIndex));
System.out.println("Deleted class attributes");
}
// remove columns from the data if necessary
if (deleteCols.size() > 0) {
m_attributeFilter = new Remove();
int [] todelete = new int [deleteCols.size()];
for (int i=0;i<deleteCols.size();i++) {
todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
}
m_attributeFilter.setAttributeIndicesArray(todelete);
m_attributeFilter.setInvertSelection(false);
m_attributeFilter.setInputFormat(m_trainInstances);
m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
}
System.out.println("Removed attributes filtered above");
m_numInstances = m_trainInstances.numInstances();
m_numAttribs = m_trainInstances.numAttributes();
if (m_timestamp == null) {
m_timestamp = getLogTimestamp();
m_icaAttributeFilename = new String(m_icaAttributeFilenameBase + m_timestamp + ".txt");
m_mixingMatrixFilename = new String(m_mixingMatrixFilenameBase + m_timestamp + ".txt");
m_independentComponentsFilename = new String(m_independentComponentsFilenameBase + m_timestamp + ".txt");
}
MatlabPCA.dumpAttributeNames(m_trainInstances, m_icaAttributeFilename);
System.out.println("About to run ICA on " + m_numInstances + " instances, each with " + m_numAttribs + " attributes");
dumpInstances(m_dataFilename);
prepareMatlab(m_ICAMFile);
runMatlab(m_ICAMFile, "/var/local/ICAMatlab.output");
System.out.println("Done training ... now parsing matlab output files");
m_mixingMatrix = readColumnVectors(m_mixingMatrixFilename);
m_inverseMixingMatrix = readColumnVectors(m_inverseMixingMatrixFilename);
m_independentComponents = readColumnVectors(m_independentComponentsFilename);
if (m_mixingMatrix == null || m_independentComponents == null || m_inverseMixingMatrix == null) {
System.out.println("WARNING!! Could not parse matlab output files");
m_originalSpaceFormat = setOutputFormatOriginal();
m_transformedFormat = m_originalSpaceFormat;
m_outputNumAtts = m_originalSpaceFormat.numAttributes();
}
else {
System.out.println("Successfully parsed matlab output files");
System.out.println("MixingMatrix: " + m_mixingMatrix.length + "x" + m_mixingMatrix[0].length);
System.out.println("InverseMixingMatrix: " + m_inverseMixingMatrix.length + "x" + m_inverseMixingMatrix[0].length);
m_transformedFormat = setOutputFormat();
if (m_transBackToOriginal) {
m_originalSpaceFormat = setOutputFormatOriginal();
}
}
// Build the attribute evaluator
if (m_trainInstances.classIndex() >= 0) {
m_eval.buildEvaluator(transformedData());
}
}
/**
* Read column vectors from a text file
* @param name file name
* @return a <code>double[][]</code> value
* @exception Exception if an error occurs
* @returns double[][] array corresponding to vectors
*/
public double[][] readColumnVectors(String name) throws Exception {
BufferedReader r = new BufferedReader(new FileReader(name));
int numAttributes = -1, numVectors = -1;
// number of rows
String s = r.readLine();
try {
numAttributes = (int)Double.parseDouble(s);
} catch (Exception e) {
System.err.println("Couldn't parse " + s + " as Double");
}
// number of columns
s = r.readLine();
try {
numVectors = (int)Double.parseDouble(s);
} catch (Exception e) {
System.err.println("Couldn't parse " + s + " as Double");
}
if (numAttributes == 0 || numVectors == 0)
return null;
double[][] vectors = new double[numAttributes][numVectors];
int i = 0, j = 0;
while ((s = r.readLine()) != null) {
StringTokenizer tokenizer = new StringTokenizer(s);
while (tokenizer.hasMoreTokens()) {
String value = tokenizer.nextToken();
try {
vectors[i][j] = Double.parseDouble(value);
} catch (Exception e) {
System.err.println("Couldn't parse " + value + " as double");
}
j++;
if (j > numVectors) {
System.err.println("Too many vectors(" + j + " instead of " + numVectors + ") in line: " + s);
}
}
if (j != numVectors) {
System.err.println("Too few vectors(" + j + " instead of " + numVectors + ") in line: " + s);
}
j = 0;
i++;
if (i > numAttributes) {
System.err.println("Too many attributes: " + i + " expecting " + numAttributes + " attributes");
}
}
if (i != numAttributes) {
System.err.println("Too few attributes: " + i + " expecting " + numAttributes + " attributes");
}
return vectors;
}
/**
* Returns just the header for the transformed data (ie. an empty
* set of instances. This is so that AttributeSelection can
* determine the structure of the transformed data without actually
* having to get all the transformed data through getTransformedData().
* @return the header of the transformed data.
* @exception Exception if the header of the transformed data can't
* be determined.
*/
public Instances transformedHeader() throws Exception {
if (m_independentComponents == null) {
// throw new Exception("Independent components hasn't been built yet");
System.out.println("WARNING!! Independent components could not be built, returning original data");
}
if (m_transBackToOriginal) {
return m_originalSpaceFormat;
} else {
return m_transformedFormat;
}
}
/**
* Gets the transformed training data.
* @return the transformed training data
* @exception Exception if transformed data can't be returned
*/
public Instances transformedData() throws Exception {
if (m_independentComponents == null) {
// throw new Exception("Independent components hasn't been built yet");
System.out.println("WARNING!! Independent components could not be built, returning original data");
return m_trainCopy;
}
Instances output;
if (m_transBackToOriginal) {
output = new Instances(m_originalSpaceFormat);
} else {
output = new Instances(m_transformedFormat);
}
for (int i=0;i<m_trainCopy.numInstances();i++) {
Instance converted = convertInstance(m_trainCopy.instance(i));
System.out.println("Converted instance: " + converted);
output.add(converted);
}
return output;
}
/**
* Evaluates the merit of a transformed attribute. This is defined
* to be 1 minus the cumulative variance explained. Merit can't
* be meaningfully evaluated if the data is to be transformed back
* to the original space.
* @param att the attribute to be evaluated
* @return the merit of a transformed attribute
* @exception Exception if attribute can't be evaluated
*/
public double evaluateAttribute(int att) throws Exception {
if (m_independentComponents == null) {
// throw new Exception("Independent components hasn't been built yet!");
System.out.println("WARNING!! Independent components could not be built, returning original data");
}
if (!(m_eval instanceof AttributeEvaluator)) {
throw new Exception("Invalid attribute evaluator!");
}
if (m_trainInstances.classIndex() < 0) {
return 1;
} else {
return ((AttributeEvaluator)m_eval).evaluateAttribute(att);
}
}
/**
* Dump data matrix into a file
*/
private void dumpInstances(String tempFile) {
try {
PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(tempFile)));
for (int k = 0; k < m_numInstances; k++) {
Instance instance = m_trainInstances.instance(k);
for (int j = 0; j < m_numAttribs; j++) {
writer.print(instance.value(j) + " ");
}
writer.println();
}
writer.close();
} catch (Exception e) {
System.err.println("Could not create a temporary file for dumping the data matrix: " + e);
}
}
/** Create matlab m-file for ICA
* @param filename file where matlab script is created
*/
public void prepareMatlab(String filename) {
try{
PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(filename)));
writer.println("addpath /var/local;");
writer.println("DATA = load('/var/local/ICAdataMatrix.txt');");
if (m_loadEigenValuesFromFile == true) {
writer.println("E = load('/var/local/PCAeigenValues.txt');");
}
if (m_loadEigenVectorsFromFile == true) {
writer.println("V = load('/var/local/PCAeigenVectors.txt');");
}
writer.print("[IC, A, invA] = fastica(DATA'"); // take transpose of data matrix, to make each instance a column
if (m_loadEigenValuesFromFile == true) {
writer.print(",'pcaD',E");
}
if (m_loadEigenVectorsFromFile == true) {
writer.print(",'pcaE',V");
}
writer.print(",'approach','" + TAGS_APPROACH[m_ICAapproach].getReadable() + "'");
writer.print(",'g','" + TAGS_FUNCTION[m_ICAfunction].getReadable() + "'");
writer.print(",'numOfIC'," + m_NumIndependentComponents);
writer.println(",'displayMode','off','stabilization','on');");
writer.println("[ICnumRows, ICnumCols] = size(IC);");
writer.println("[AnumRows, AnumCols] = size(A);");
writer.println("[invAnumRows, invAnumCols] = size(invA);\n");
writer.println("save " + m_mixingMatrixFilename + " AnumRows AnumCols A -ASCII -DOUBLE");
writer.println("save " + m_inverseMixingMatrixFilename + " invAnumRows invAnumCols invA -ASCII -DOUBLE");
writer.println("save " + m_independentComponentsFilename + " ICnumRows ICnumCols IC -ASCII -DOUBLE");
writer.close();
}
catch (Exception e) {
System.err.println("Could not create matlab file: " + e);
}
}
/** Run matlab in command line with a given argument
* @param inFile file to be input to Matlab
* @param outFile file where results are stored
*/
public static void runMatlab(String inFile, String outFile) {
// call matlab to do the dirty work
try {
int exitValue;
do {
System.out.println("Starting to run matlab");
Process proc = Runtime.getRuntime().exec("matlab -tty < " + inFile + " > " + outFile);
exitValue = proc.waitFor();
if (exitValue != 0) {
System.err.println("WARNING!!!!! Matlab returned exit value 1, trying again in 5 mins!");
Thread.sleep(300000);
}
} while (exitValue != 0);
System.out.println("End of running matlab, exitValue = " + exitValue);
}
catch (Exception e) {
System.err.println("Problems running matlab: " + e);
}
}
/**
* Return a summary of the analysis
* @return a summary of the analysis.
*/
private String independentComponentsSummary() {
StringBuffer result = new StringBuffer();
double cumulative = 0.0;
Instances output = null;
int numVectors=0;
try {
output = setOutputFormat();
numVectors = (output.classIndex() < 0)
? output.numAttributes()
: output.numAttributes()-1;
} catch (Exception ex) {
}
// Todo: Add IC summary to result string
result.append("\nAttribute ranking filter:\n");
result.append(m_eval.toString());
return result.toString();
}
/**
* Returns a description of this attribute transformer
* @return a String describing this attribute transformer
*/
public String toString() {
if (m_independentComponents == null) {
return "Independent components hasn't been built yet!";
} else {
return "\tIndependent Components Attribute Transformer\n\n"
+independentComponentsSummary();
}
}
/**
* Return a matrix as a String
* @param matrix that is decribed as a string
* @return a String describing a matrix
*/
private String matrixToString(double [][] matrix) {
StringBuffer result = new StringBuffer();
int last = matrix.length - 1;
for (int i = 0; i <= last; i++) {
for (int j = 0; j <= last; j++) {
result.append(Utils.doubleToString(matrix[i][j],6,2)+" ");
if (j == last) {
result.append('\n');
}
}
}
return result.toString();
}
/**
* Convert a ic transformed instance back to the original space
*/
private Instance convertInstanceToOriginal(Instance inst)
throws Exception {
double[] newVals = null;
if (m_hasClass) {
newVals = new double[m_numAttribs+1];
} else {
newVals = new double[m_numAttribs];
}
if (m_hasClass) {
// class is always appended as the last attribute
newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
}
for (int i = 0; i < m_numAttribs; i++) {
for (int j = 0; j < m_outputNumAtts - 1; j++) {
newVals[i] += m_mixingMatrix[i][j] * inst.value(j);
}
}
if (inst instanceof SparseInstance) {
return new SparseInstance(inst.weight(), newVals);
} else {
return new Instance(inst.weight(), newVals);
}
}
/**
* Transform an instance in original (unormalized) format. Convert back
* to the original space if requested.
* @param instance an instance in the original (unormalized) format
* @return a transformed instance
* @exception Exception if instance cant be transformed
*/
public Instance convertInstance(Instance instance) throws Exception {
if (m_independentComponents == null) {
// throw new Exception("convertInstance: Independent components not " +"built yet");
System.out.println("WARNING!! Independent components could not be built, returning original data");
}
double[] newVals = new double[m_outputNumAtts];
Instance tempInst = (Instance)instance.copy();
if (!instance.equalHeaders(m_trainCopy.instance(0))) {
throw new Exception("Can't convert instance: header's don't match: MatlabICA");
}
m_replaceMissingFilter.input(tempInst);
m_replaceMissingFilter.batchFinished();
tempInst = m_replaceMissingFilter.output();
if (m_normalize) {
m_normalizeFilter.input(tempInst);
m_normalizeFilter.batchFinished();
tempInst = m_normalizeFilter.output();
}
if (m_attributeFilter != null) {
m_attributeFilter.input(tempInst);
m_attributeFilter.batchFinished();
tempInst = m_attributeFilter.output();
}
if (m_hasClass) {
newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
}
for (int i=0; i<m_outputNumAtts-1; i++) {
for (int j = 0; j < m_numAttribs; j++) {
newVals[i] += (m_inverseMixingMatrix[i][j] * tempInst.value(j));
}
}
if (!m_transBackToOriginal) {
if (instance instanceof SparseInstance) {
return new SparseInstance(instance.weight(), newVals);
} else {
return new Instance(instance.weight(), newVals);
}
} else {
if (instance instanceof SparseInstance) {
return convertInstanceToOriginal(new SparseInstance(instance.weight(),
newVals));
} else {
return convertInstanceToOriginal(new Instance(instance.weight(),
newVals));
}
}
}
/**
* Set up the header for the IC->original space dataset
*/
private Instances setOutputFormatOriginal() throws Exception {
FastVector attributes = new FastVector();
for (int i = 0; i < m_numAttribs; i++) {
String att = m_trainInstances.attribute(i).name();
attributes.addElement(new Attribute(att));
}
if (m_hasClass) {
attributes.addElement(m_trainCopy.classAttribute().copy());
}
Instances outputFormat =
new Instances(m_trainCopy.relationName()+"->IC->original space",
attributes, 0);
// set the class to be the last attribute if necessary
if (m_hasClass) {
outputFormat.setClassIndex(outputFormat.numAttributes()-1);
}
return outputFormat;
}
/**
* Set the format for the transformed data
* @return a set of empty Instances (header only) in the new format
* @exception Exception if the output format can't be set
*/
private Instances setOutputFormat() throws Exception {
if (m_independentComponents == null) {
return null;
}
double cumulative = 0.0;
FastVector attributes = new FastVector();
for (int i=0; i<m_inverseMixingMatrix.length; i++) {
StringBuffer attName = new StringBuffer("ICAattribute" + i);
attributes.addElement(new Attribute(attName.toString()));
}
if (m_hasClass) {
attributes.addElement(m_trainCopy.classAttribute().copy());
}
Instances outputFormat =
new Instances(m_trainInstances.relationName()+"_independent components",
attributes, 0);
// set the class to be the last attribute if necessary
if (m_hasClass) {
outputFormat.setClassIndex(outputFormat.numAttributes()-1);
}
m_outputNumAtts = outputFormat.numAttributes();
System.out.println("m_outputNumAtts: " + m_outputNumAtts);
return outputFormat;
}
/** Get a timestamp string as a weak uniqueid
* @returns a timestamp string in the form "mmddhhmmssS"
*/
public static String getLogTimestamp() {
Calendar cal = Calendar.getInstance(TimeZone.getDefault());
String DATE_FORMAT = "MMddHHmmssS";
java.text.SimpleDateFormat sdf = new java.text.SimpleDateFormat(DATE_FORMAT);
sdf.setTimeZone(TimeZone.getDefault());
return (sdf.format(cal.getTime()));
}
/**
* Main method for testing this class
* @param argv should contain the command line arguments to the
* evaluator/transformer (see AttributeSelection)
*/
public static void main(String [] argv) {
try {
// String name = "../../data/20newsgroups/different-100_fromCCS.arff";
String name = "/u/ml/software/weka-latest/data/iris.arff";
if (argv.length == 1) {
name = argv[0];
}
else {
System.err.println("No data filename given as argument, running on default file " + name);
}
Reader r = new BufferedReader(new FileReader(name));
Instances data = new Instances(r);
data.setClassIndex(data.numAttributes() - 1);
MatlabICA mica = new MatlabICA();
mica.setNumIndependentComponents(2);
mica.buildEvaluator(data);
mica.transformedData();
}
catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage());
}
}
}