/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Juli�n Luengo Mart�n 09/10/2007
* @version 0.3
* @since JDK 1.5
* </p>
*/
package keel.Algorithms.SVM.C_SVM;
import java.io.*;
import java.util.*;
import keel.Dataset.*;
import keel.Algorithms.Preprocess.Basic.*;
import org.libsvm.*;
/**
* <p>
* This class is a wrapper to the LibSVM C-SVM classifier, in order to operate with KEEL data sets and parameters.
* </p>
*/
public class svmClassifier {
/*TODO - use the libSVM from org package*/
double[] mean = null;
double[] std_dev = null;
double tempData = 0;
public double [][] probabilities = null;
String[][] X = null; // matrix of transformed data
// values
String[] mostCommon;
int ndatos = 0;
int nentradas = 0;
int tipo = 0;
int direccion = 0;
int nvariables = 0;
int nsalidas = 0;
int nneigh = 1; // number of neighbours
InstanceSet IS;
InstanceSet ISval;
String input_train_name = new String();
String input_validation_name;
String input_test_name = new String();
String output_train_name = new String();
String output_test_name = new String();
String temp = new String();
String data_out = new String("");
String svmType;
String kernelType;
double C;
double eps;
int degree;
double gamma;
double coef0;
double nu;
double p;
int shrinking;
int probability = 0;
long seed;
int nr_weight = 0;
/** Creates a new instance of svmClassifier
* @param fileParam The path to the configuration file with all the parameters in KEEL format
*/
public svmClassifier(String fileParam) {
config_read(fileParam);
IS = new InstanceSet();
ISval = new InstanceSet();
}
// Write data matrix X to disk, in KEEL format
private void write_results(String output) {
// File OutputFile = new File(output_train_name.substring(1,
// output_train_name.length()-1));
try {
FileWriter file_write = new FileWriter(output);
file_write.write(IS.getHeader());
// now, print the normalized data
file_write.write("@data\n");
for (int i = 0; i < ndatos; i++) {
file_write.write(X[i][0]);
for (int j = 1; j < 2; j++) {
file_write.write(" " + X[i][j]);
}
file_write.write("\n");
}
file_write.close();
} catch (IOException e) {
System.out.println("IO exception = " + e);
System.exit( -1);
}
}
private void config_read(String fileParam) {
parseParameters parameters;
parameters = new parseParameters();
parameters.parseConfigurationFile(fileParam);
input_train_name = parameters.getTrainingInputFile();
input_validation_name = parameters.getValidationInputFile();
input_test_name = parameters.getTestInputFile();
output_train_name = parameters.getTrainingOutputFile();
output_test_name = parameters.getTestOutputFile();
seed = Long.parseLong(parameters.getParameter(0));
kernelType = parameters.getParameter(1);
C = Double.parseDouble(parameters.getParameter(2));
eps = Double.parseDouble(parameters.getParameter(3));
degree = Integer.parseInt(parameters.getParameter(4));
gamma = Double.parseDouble(parameters.getParameter(5));
coef0 = Double.parseDouble(parameters.getParameter(6));
nu = Double.parseDouble(parameters.getParameter(7));
p = Double.parseDouble(parameters.getParameter(8));
shrinking = Integer.parseInt(parameters.getParameter(9));
}
/**
* <p>
* Process the training and test files provided in the parameters file to the constructor.
* </p>
*/
public void process() {
double[] outputs;
double[] outputs2;
Instance neighbor;
double dist, mean;
int actual;
int[] N = new int[nneigh];
double[] Ndist = new double[nneigh];
boolean allNull;
svm_problem SVMp = null;
svm_parameter SVMparam = new svm_parameter();
svm_model svr = null;
svm_node SVMn[];
double[] outputsCandidate = null;
boolean same = true;
Vector instancesSelected = new Vector();
Vector instancesSelected2 = new Vector();
//SVM PARAMETERS
SVMparam.C = C;
SVMparam.cache_size = 10; //10MB of cache
SVMparam.degree = degree;
SVMparam.eps = eps;
SVMparam.gamma = gamma;
SVMparam.nr_weight = 0;
SVMparam.nu = nu;
SVMparam.p = p;
SVMparam.shrinking = shrinking;
SVMparam.probability = 0;
if (kernelType.compareTo("LINEAR") == 0) {
SVMparam.kernel_type = svm_parameter.LINEAR;
} else if (kernelType.compareTo("POLY") == 0) {
SVMparam.kernel_type = svm_parameter.POLY;
} else if (kernelType.compareTo("RBF") == 0) {
SVMparam.kernel_type = svm_parameter.RBF;
} else if (kernelType.compareTo("SIGMOID") == 0) {
SVMparam.kernel_type = svm_parameter.SIGMOID;
}
//if(svmType.compareTo("C_SVC")==0){
SVMparam.svm_type = svm_parameter.C_SVC;
/*}else if(svmType.compareTo("NU_SVC")==0){
SVMparam.svm_type = svm_parameter.NU_SVC;
}*/
try {
// Load in memory a dataset that contains a classification problem
IS.readSet(input_train_name, true);
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
X = new String[ndatos][2]; // matrix with transformed data
mostCommon = new String[nvariables];
SVMp = new svm_problem();
SVMp.l = ndatos;
SVMp.y = new double[SVMp.l];
SVMp.x = new svm_node[SVMp.l][nentradas + 1];
for (int l = 0; l < SVMp.l; l++) {
for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) {
SVMp.x[l][n] = new svm_node();
}
}
for (int i = 0; i < ndatos; i++) {
Instance inst = IS.getInstance(i);
SVMp.y[i] = inst.getAllOutputValues()[0];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMp.x[i][n].index = n;
SVMp.x[i][n].value = inst.getAllInputValues()[n];
SVMp.y[i] = inst.getAllOutputValues()[0];
}
//end of instance
SVMp.x[i][nentradas].index = -1;
}
if (svm.svm_check_parameter(SVMp, SVMparam) != null) {
System.err.print("SVM parameter error in training: ");
System.err.println(svm.svm_check_parameter(SVMp, SVMparam));
System.exit( -1);
}
//train the SVM
if (ndatos > 0) {
svr = svm.svm_train(SVMp, SVMparam);
}
ISval.readSet(input_validation_name, false);
ndatos = ISval.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
for (int i = 0; i < ISval.getNumInstances(); i++) {
Instance inst = ISval.getInstance(i);
Attribute a = Attributes.getOutputAttribute(0);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if (tipo != Attribute.NOMINAL) {
X[i][0] = new String(""+(int) ISval.getOutputNumericValue(i, 0));
//new String(String.valueOf((int) inst.getAllOutputValues()[0]));
} else {
X[i][0] = ISval.getOutputNominalValue(i, 0); //new String(inst.getOutputNominalValues(0));
}
// the values used for regression
SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMn[n] = new svm_node();
SVMn[n].index = n;
SVMn[n].value = inst.getAllInputValues()[n];
}
SVMn[nentradas] = new svm_node();
SVMn[nentradas].index = -1;
//pedict the class
if (tipo != Attribute.NOMINAL) {
X[i][1] = new String(String.valueOf((int) Math.round(svm.
svm_predict(svr, SVMn))));
} else {
X[i][1] = new String(a.getNominalValue((int) Math.round(svm.
svm_predict(svr, SVMn))));
}
}
} catch (Exception e) {
System.out.println("Dataset exception = " + e);
e.printStackTrace();
System.exit( -1);
}
write_results(output_train_name);
/** ************************************************************************************ */
try {
// Load in memory a dataset that contains a classification
// problem
IS.readSet(input_test_name, false);
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
X = new String[ndatos][2]; // matrix with transformed data
// data
this.probabilities = new double[ndatos][nsalidas];
mostCommon = new String[nvariables];
for (int i = 0; i < ndatos; i++) {
Instance inst = IS.getInstance(i);
Attribute a = Attributes.getOutputAttribute(0);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if (tipo != Attribute.NOMINAL) {
X[i][0] = new String(""+(int) IS.getOutputNumericValue(i, 0));
//new String(String.valueOf((int) inst.getAllOutputValues()[0]));
} else {
X[i][0] = IS.getOutputNominalValue(i, 0); //new String(inst.getOutputNominalValues(0));
}
SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMn[n] = new svm_node();
SVMn[n].index = n;
SVMn[n].value = inst.getAllInputValues()[n];
}
SVMn[nentradas] = new svm_node();
SVMn[nentradas].index = -1;
//pedict the class
if (tipo != Attribute.NOMINAL) {
X[i][1] = new String(String.valueOf((int) Math.round(svm.
svm_predict(svr, SVMn))));
} else {
X[i][1] = new String(a.getNominalValue((int) Math.round(svm.
svm_predict(svr, SVMn))));
}
// Added by Isaac and José Antonio para SSL.
System.out.println("Calla cojones: "+probabilities[i][0]);
probabilities[i]=svm.probabilitiesPerClass.clone();
}
} catch (Exception e) {
System.out.println("Dataset exception = " + e);
e.printStackTrace();
System.exit( -1);
}
write_results(output_test_name);
}
/**
* <p>
* Process the training and test files provided in the parameters file to the constructor.
* </p>
*/
public void process(InstanceSet train, InstanceSet test) {
double[] outputs;
double[] outputs2;
Instance neighbor;
double dist, mean;
int actual;
int[] N = new int[nneigh];
double[] Ndist = new double[nneigh];
boolean allNull;
svm_problem SVMp = null;
svm_parameter SVMparam = new svm_parameter();
svm_model svr = null;
svm_node SVMn[];
double[] outputsCandidate = null;
boolean same = true;
Vector instancesSelected = new Vector();
Vector instancesSelected2 = new Vector();
//SVM PARAMETERS
SVMparam.C = C;
SVMparam.cache_size = 10; //10MB of cache
SVMparam.degree = degree;
SVMparam.eps = eps;
SVMparam.gamma = gamma;
SVMparam.nr_weight = 0;
SVMparam.nu = nu;
SVMparam.p = p;
SVMparam.shrinking = shrinking;
SVMparam.probability = 0;
if (kernelType.compareTo("LINEAR") == 0) {
SVMparam.kernel_type = svm_parameter.LINEAR;
} else if (kernelType.compareTo("POLY") == 0) {
SVMparam.kernel_type = svm_parameter.POLY;
} else if (kernelType.compareTo("RBF") == 0) {
SVMparam.kernel_type = svm_parameter.RBF;
} else if (kernelType.compareTo("SIGMOID") == 0) {
SVMparam.kernel_type = svm_parameter.SIGMOID;
}
//if(svmType.compareTo("C_SVC")==0){
SVMparam.svm_type = svm_parameter.C_SVC;
/*}else if(svmType.compareTo("NU_SVC")==0){
SVMparam.svm_type = svm_parameter.NU_SVC;
}*/
try {
// Load in memory a dataset that contains a classification problem
//IS.readSet(input_train_name, true);
IS=train;
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = IS.getAttributeDefinitions().getOutputAttribute(0).getNumNominalValues(); // modification by Isaac.
System.out.println("nsalidas = " + nsalidas);
X = new String[ndatos][2]; // matrix with transformed data
mostCommon = new String[nvariables];
SVMp = new svm_problem();
SVMp.l = ndatos;
SVMp.y = new double[SVMp.l];
SVMp.x = new svm_node[SVMp.l][nentradas + 1];
for (int l = 0; l < SVMp.l; l++) {
for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) {
SVMp.x[l][n] = new svm_node();
}
}
for (int i = 0; i < ndatos; i++) {
Instance inst = IS.getInstance(i);
SVMp.y[i] = inst.getAllOutputValues()[0];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMp.x[i][n].index = n;
SVMp.x[i][n].value = inst.getAllInputValues()[n];
SVMp.y[i] = inst.getAllOutputValues()[0];
}
//end of instance
SVMp.x[i][nentradas].index = -1;
}
if (svm.svm_check_parameter(SVMp, SVMparam) != null) {
System.err.print("SVM parameter error in training: ");
System.err.println(svm.svm_check_parameter(SVMp, SVMparam));
System.exit( -1);
}
//train the SVM
if (ndatos > 0) {
svr = svm.svm_train(SVMp, SVMparam);
}
//ISval.readSet(input_validation_name, false);
ISval= new InstanceSet(train);
ndatos = ISval.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = ISval.getAttributeDefinitions().getOutputAttribute(0).getNumNominalValues();
for (int i = 0; i < ISval.getNumInstances(); i++) {
Instance inst = ISval.getInstance(i);
Attribute a = Attributes.getOutputAttribute(0);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if (tipo != Attribute.NOMINAL) {
X[i][0] = new String(""+(int) ISval.getOutputNumericValue(i, 0));
//new String(String.valueOf((int) inst.getAllOutputValues()[0]));
} else {
X[i][0] = ISval.getOutputNominalValue(i, 0); //new String(inst.getOutputNominalValues(0));
}
// the values used for regression
SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMn[n] = new svm_node();
SVMn[n].index = n;
SVMn[n].value = inst.getAllInputValues()[n];
}
SVMn[nentradas] = new svm_node();
SVMn[nentradas].index = -1;
//pedict the class
if (tipo != Attribute.NOMINAL) {
X[i][1] = new String(String.valueOf((int) Math.round(svm.
svm_predict(svr, SVMn))));
} else {
X[i][1] = new String(a.getNominalValue((int) Math.round(svm.
svm_predict(svr, SVMn))));
}
}
} catch (Exception e) {
System.out.println("Dataset exception = " + e);
e.printStackTrace();
System.exit( -1);
}
write_results(output_train_name);
/** ************************************************************************************ */
try {
// Load in memory a dataset that contains a classification
// problem
//IS.readSet(input_test_name, false);
IS= new InstanceSet(test);
int in = 0;
int out = 0;
ndatos = IS.getNumInstances();
nvariables = Attributes.getNumAttributes();
nentradas = Attributes.getInputNumAttributes();
nsalidas = Attributes.getOutputNumAttributes();
X = new String[ndatos][2]; // matrix with transformed data
// data
this.probabilities = new double[ndatos][nsalidas];
mostCommon = new String[nvariables];
for (int i = 0; i < ndatos; i++) {
Instance inst = IS.getInstance(i);
Attribute a = Attributes.getOutputAttribute(0);
direccion = a.getDirectionAttribute();
tipo = a.getType();
if (tipo != Attribute.NOMINAL) {
X[i][0] = new String(""+(int) IS.getOutputNumericValue(i, 0));
//new String(String.valueOf((int) inst.getAllOutputValues()[0]));
} else {
X[i][0] = IS.getOutputNominalValue(i, 0); //new String(inst.getOutputNominalValues(0));
}
SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
SVMn[n] = new svm_node();
SVMn[n].index = n;
SVMn[n].value = inst.getAllInputValues()[n];
}
SVMn[nentradas] = new svm_node();
SVMn[nentradas].index = -1;
//pedict the class
if (tipo != Attribute.NOMINAL) {
X[i][1] = new String(String.valueOf((int) Math.round(svm.
svm_predict(svr, SVMn))));
} else {
X[i][1] = new String(a.getNominalValue((int) Math.round(svm.
svm_predict(svr, SVMn))));
}
// Added by Isaac and José Antonio para SSL.
System.out.println("Calla cojones: "+probabilities[i][0]);
probabilities[i]=svm.probabilitiesPerClass.clone();
}
} catch (Exception e) {
System.out.println("Dataset exception = " + e);
e.printStackTrace();
System.exit( -1);
}
write_results(output_test_name);
}
}