/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ConverterLibSVM.java
* Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece
*/
package mulan.data;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.StringTokenizer;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
import weka.core.Utils;
/**
* Class that converts LibSVM multi-label data sets to Mulan compatible format <p>
*
* @author Grigorios Tsoumakas
* @version $Revision: 0.01 $
*/
public class ConverterLibSVM {
/**
* Converts a multi-label dataset from LibSVM format to the format
* that is compatible with Mulan. It constructs one ARFF and one XML file.
*
* @param path the directory that contains the source file and will contain
* the target files
* @param sourceFilename the name of the source file
* @param relationName the relation name of the arff file that will be
* constructed
* @param targetFilestem the filestem for the target files (.arff and .xml)
*/
public static void convertFromLibSVM(String path, String sourceFilename, String targetFilestem, String relationName) {
BufferedReader aReader = null;
BufferedWriter aWriter = null;
int numLabels = 0;
int numAttributes = 0;
int numInstances = 0;
double meanParsedAttributes = 0;
// Calculate number of labels and attributes
String Line = null;
try {
aReader = new BufferedReader(new FileReader(path + sourceFilename));
while ((Line = aReader.readLine()) != null) {
numInstances++;
StringTokenizer strTok = new StringTokenizer(Line, " ");
while (strTok.hasMoreTokens()) {
String token = strTok.nextToken();
if (token.indexOf(":") == -1) {
// parse label info
StringTokenizer labelTok = new StringTokenizer(token, ",");
while (labelTok.hasMoreTokens()) {
String strLabel = labelTok.nextToken();
int intLabel = Integer.parseInt(strLabel);
if (intLabel > numLabels) {
numLabels = intLabel;
}
}
} else {
// parse attribute info
meanParsedAttributes++;
StringTokenizer attrTok = new StringTokenizer(token, ":");
String strAttrIndex = attrTok.nextToken();
int intAttrIndex = Integer.parseInt(strAttrIndex);
if (intAttrIndex > numAttributes) {
numAttributes = intAttrIndex;
}
}
}
}
numLabels++;
System.out.println("Number of attributes: " + numAttributes);
System.out.println("Number of instances: " + numInstances);
System.out.println("Number of classes: " + numLabels);
System.out.println("Constructing XML file... ");
LabelsMetaDataImpl meta = new LabelsMetaDataImpl();
for (int label = 0; label < numLabels; label++) {
meta.addRootNode(new LabelNodeImpl("Label" + (label + 1)));
}
String labelsFilePath = path + targetFilestem + ".xml";
try {
LabelsBuilder.dumpLabels(meta, labelsFilePath);
System.out.println("Done!");
} catch (LabelsBuilderException e) {
File labelsFile = new File(labelsFilePath);
if (labelsFile.exists()) {
labelsFile.delete();
}
System.out.println("Construction of labels XML failed!");
}
meanParsedAttributes /= numInstances;
boolean Sparse = false;
if (meanParsedAttributes < numAttributes) {
Sparse = true;
System.out.println("Dataset is sparse.");
}
// Define Instances class to hold data
ArrayList<Attribute> attInfo = new ArrayList<Attribute>(numAttributes + numLabels);
Attribute[] att = new Attribute[numAttributes + numLabels];
for (int i = 0; i < numAttributes; i++) {
att[i] = new Attribute("Att" + (i + 1));
attInfo.add(att[i]);
}
ArrayList<String> ClassValues = new ArrayList<String>(2);
ClassValues.add("0");
ClassValues.add("1");
for (int i = 0; i < numLabels; i++) {
att[numAttributes + i] = new Attribute("Label" + (i + 1), ClassValues);
attInfo.add(att[numAttributes + i]);
}
// Re-read file and convert into multi-label arff
int countInstances = 0;
aWriter = new BufferedWriter(new FileWriter(path + targetFilestem + ".arff"));
Instances data = new Instances(relationName, attInfo, 0);
aWriter.write(data.toString());
aReader = new BufferedReader(new FileReader(path + sourceFilename));
while ((Line = aReader.readLine()) != null) {
countInstances++;
// set all values to 0
double[] attValues = new double[numAttributes + numLabels];
Arrays.fill(attValues, 0);
Instance tempInstance = new DenseInstance(1, attValues);
tempInstance.setDataset(data);
// separate class info from attribute info
// ensure class info exists
StringTokenizer strTok = new StringTokenizer(Line, " ");
while (strTok.hasMoreTokens()) {
String token = strTok.nextToken();
if (token.indexOf(":") == -1) {
// parse label info
StringTokenizer labelTok = new StringTokenizer(token, ",");
while (labelTok.hasMoreTokens()) {
String strLabel = labelTok.nextToken();
int intLabel = Integer.parseInt(strLabel);
tempInstance.setValue(numAttributes + intLabel, 1);
}
} else {
// parse attribute info
StringTokenizer AttrTok = new StringTokenizer(token, ":");
String strAttrIndex = AttrTok.nextToken();
String strAttrValue = AttrTok.nextToken();
tempInstance.setValue(Integer.parseInt(strAttrIndex) - 1, Double.parseDouble(strAttrValue));
}
}
if (Sparse) {
SparseInstance tempSparseInstance = new SparseInstance(tempInstance);
aWriter.write(tempSparseInstance.toString() + "\n");
} else {
aWriter.write(tempInstance.toString() + "\n");
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (aReader != null) {
aReader.close();
}
if (aWriter != null) {
aWriter.close();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
private static void createLabelsMetadataFile(String filePath, int numLabels) throws LabelsBuilderException {
}
/**
* Command line interface for the converter
*
* @param args command line arguments
*/
public static void main(String[] args) {
String path = null;
String source = null;
String target = null;
String relationName = "LibSVM";
try {
path = Utils.getOption("path", args);
source = Utils.getOption("source", args);
target = Utils.getOption("target", args);
relationName = Utils.getOption("name", args);
ConverterLibSVM.convertFromLibSVM(path, source, target, relationName);
} catch (Exception e) {
e.printStackTrace();
}
}
}