/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Pedro Gonz�lez (University of Jaen) 15/08/2004
* @author Modified by Crist�bal J. Carmona (University of Jaen) 15/04/2010
* @version 1.0
* @since JDK1.5
* </p>
*/
package keel.Algorithms.Subgroup_Discovery.SDIGA.Calculate;
import keel.Dataset.*;
import org.core.*;
import java.io.IOException;
import java.util.Vector;
import java.util.StringTokenizer;
public class Calculate {
/**
* <p>
* Calculate the values of subgroup discovery quality measures
* with respect to the rules extracted by the algorithm
* </p>
*/
/**
* <p>
* Sets the value of the gen of an example as an lost value
* lost = max value of the variable + 1
* </p>
* @param example Position of the example in the dataset
* @param pos Position of the variable
**/
public static void setLost (int example, int pos) {
StCalculate.tabla[example].ejemplo[pos] = StCalculate.var[pos].max+1;
}
/**
* <p>
* Returns if the value of the gen of an example is a lost value or not
* lost = max value of the variable + 1
* </p>
* @param example Position of the example in the dataset
* @param pos Position of the variable
* @return If the example is a lost value
**/
public static boolean getLost (int example, int pos) {
if (StCalculate.tabla[example].ejemplo[pos] == StCalculate.var[pos].max+1)
return true;
else
return false;
}
/**
* <p>
* Dataset interpretation - read the dataset and stores the values
* The attribute designed in "outputs" at the dataset file is the target variable
* If it is not established, the last one is taken as output - defined in the methods
* that manages the dataset.
* All the variables except this are stored in "tabla.ejemplo"
* Target variable is stored in "tabla.clase"
* </p>
* @param input_file File to capture
**/
public static void CaptureDataset (String input_file)
throws IOException {
try {
int i, j;
// Declares the dataset
StCalculate.Data = new InstanceSet();
// Load in memory the dataset
StCalculate.Data.readSet(input_file,false);
// Sets the number of instances (examples)
StCalculate.n_eje = StCalculate.Data.getNumInstances();
// Sets the number of input attributes (variables)
StCalculate.num_vars = Attributes.getInputNumAttributes();
// Check that there is only one output variable
if (Attributes.getOutputNumAttributes()>1) {
System.out.println("This algorithm can not process MIMO datasets");
System.out.println("All outputs but the first one will be removed");
}
if (Attributes.getOutputNumAttributes()<1) {
System.out.println("This algorithm can not process datasets without outputs");
System.out.println("Zero-valued output generated");
}
// Chek that the output variable is nominal
if (Attributes.getOutputAttribute(0).getType()!=Attribute.NOMINAL) {
// If the output variables is not enumeratad, the algorithm can not be run
try {
throw new IllegalAccessException("Finish");
} catch( IllegalAccessException term) {
System.err.println("Target variable is not a discrete one.");
System.err.println("Algorithm can not be run.");
System.out.println("Program aborted.");
System.exit(-1);
}
}
// Set the number of classes of the output attribute - this attribute must be nominal
StCalculate.n_clases = Attributes.getOutputAttribute(0).getNumNominalValues();
StCalculate.name_class = new String[StCalculate.n_clases];
for(int pos=0; pos<StCalculate.n_clases; pos++){
StCalculate.name_class[pos] = Attributes.getOutputAttribute(0).getNominalValue(pos);
}
// Screen output of the output variable and selected class
System.out.println ( "Output variable: " + Attributes.getOutputAttribute(0).getName()); // + "; Target class number: " + StCalculate.n_clasObj);
// Set the variables characteristics
StCalculate.var = new TypeVar[StCalculate.num_vars];
for (i=0; i<StCalculate.num_vars; i++) {
StCalculate.var[i] = new TypeVar();
StCalculate.var[i].nombre = Attributes.getInputAttribute(i).getName();
if (Attributes.getInputAttribute(i).getType()==Attribute.NOMINAL) {
// Nominal (enumerated) variable - Discrete type
StCalculate.var[i].tipoDato = 'e';
StCalculate.var[i].continua = false;
StCalculate.var[i].valores = new Vector(Attributes.getInputAttribute(i).getNominalValuesList());
StCalculate.var[i].min = 0; // Enumerated values are translated into values from 0 to number of elements - 1
StCalculate.var[i].max = Attributes.getInputAttribute(i).getNumNominalValues()-1;
StCalculate.var[i].n_etiq = Attributes.getInputAttribute(i).getNumNominalValues();
// Update max number of values for discrete vars
if (StCalculate.var[i].n_etiq > StCalculate.MaxValores)
StCalculate.MaxValores = StCalculate.var[i].n_etiq;
}
else if (Attributes.getInputAttribute(i).getType()==Attribute.REAL) {
// Real: Continuous type
StCalculate.var[i].tipoDato = 'r';
StCalculate.var[i].continua = true;
StCalculate.var[i].min = (float)Attributes.getInputAttribute(i).getMinAttribute();
StCalculate.var[i].max = (float)Attributes.getInputAttribute(i).getMaxAttribute();
StCalculate.var[i].n_etiq = StCalculate.Param.n_etiq;
// Update the max number of labels for cont variables and number of values
if (StCalculate.var[i].n_etiq > StCalculate.MaxEtiquetas)
StCalculate.MaxEtiquetas = StCalculate.var[i].n_etiq;
if (StCalculate.var[i].n_etiq > StCalculate.MaxValores)
StCalculate.MaxValores = StCalculate.var[i].n_etiq;
}
else {
// Integer: Continuous type
StCalculate.var[i].tipoDato = 'i';
StCalculate.var[i].continua = true;
StCalculate.var[i].min = (float)Attributes.getInputAttribute(i).getMinAttribute();
StCalculate.var[i].max = (float)Attributes.getInputAttribute(i).getMaxAttribute();
StCalculate.var[i].n_etiq = StCalculate.Param.n_etiq;
// Update the max number of labels for cont variables and number of values
if (StCalculate.var[i].n_etiq > StCalculate.MaxEtiquetas)
StCalculate.MaxEtiquetas = StCalculate.var[i].n_etiq;
if (StCalculate.var[i].n_etiq > StCalculate.MaxValores)
StCalculate.MaxValores = StCalculate.var[i].n_etiq;
}
}
// Fill the "tabla" structure with the data from dataset
StCalculate.tabla = new TTable[StCalculate.n_eje];
for (i=0; i<StCalculate.n_eje; i++) {
// num attribute is not used
StCalculate.tabla[i] = new TTable();
StCalculate.tabla[i].fcubierto = false; // Set example to not covered - fuzzy
StCalculate.tabla[i].ccubierto = false; // Set example to not covered - crisp
StCalculate.tabla[i].ejemplo = new float[StCalculate.num_vars];
// Stores de values for all the input variables
Instance inst = StCalculate.Data.getInstance(i);
double instValues[] = new double[StCalculate.num_vars];
instValues = inst.getAllInputValues();
// Gets all the input attributes of the instance, converting enumerated to consecutive integers
for (j=0; j<StCalculate.num_vars; j++) {
if (inst.getInputMissingValues(j))
// If the value is a lost one, sets as max value + 1
setLost (i, j); // old :StCalculate.tabla[i].ejemplo[j] = StCalculate.var[j].max+1;
else {
// Stores the value
// NOTE: automatic translation from enum to integer for nominal values
StCalculate.tabla[i].ejemplo[j] = (float) instValues[j];
}
}
// Set the value for the target variable of the example
double classValue[] = new double[1];
classValue = inst.getAllOutputValues();
StCalculate.tabla[i].clase = (int) classValue[0];
}
} catch (Exception e) {
System.out.println("DBG: Exception in readSet");
e.printStackTrace();
}
}
/**
* <p>
* It returns the number (position) of the attribute name indicated.
* </p>
* @param _name Name of the attribute.
* @return Position of the attribute.
*/
public static int getNumAttribute(String _name) {
int i;
// The correct input attributes are chosen with the functions getInputNumAttribute
// and getInputAttribute
for (i=0; i<Attributes.getInputNumAttributes(); i++) {
if (Attributes.getInputAttribute(i).getName().equals(_name))
return i;
}
return -1;
}
/**
* <p>
* Generate the population with the rules obtained by the algorithm
* </p>
* @param nFile File of the rules obtained by the algorithm
**/
public static void CaptureRules (String nFile)
throws IOException {
try {
String file, linea, tok;
StringTokenizer lineasFichero, tokens;
file = Files.readFile(nFile);
file = file.toLowerCase() + "\n ";
lineasFichero = new StringTokenizer(file,"\n\r");
int numero;
numero = 0;
do {
if (!lineasFichero.hasMoreTokens()) break;
linea=lineasFichero.nextToken();
tokens = new StringTokenizer(linea," ,\t");
if (tokens.hasMoreTokens()) {
tok = tokens.nextToken();
if (tok.equalsIgnoreCase("generated")) {
tokens.nextToken();
StCalculate.NumReglasGeneradas++;
}
else if (tok.equalsIgnoreCase("variable")) {
numero++;
String NombreVar = tokens.nextToken();
for (int k=0; k<Attributes.getInputNumAttributes();k++) {
if (NombreVar.equalsIgnoreCase(Attributes.getInputAttribute(k).getName()))
NombreVar = Attributes.getInputAttribute(k).getName();
}
int NumVar = getNumAttribute(NombreVar);
tokens.nextToken();
String ValorVar = tokens.nextToken();
int ValorNum;
if (ValorVar.equalsIgnoreCase("label")) {
// Continuous variable
do {
ValorVar = tokens.nextToken();
ValorNum = Integer.parseInt(ValorVar);
StCalculate.poblac.setCromElem(StCalculate.NumReglasGeneradas-1, NumVar, ValorNum, 1);
tokens.nextToken(); tokens.nextToken(); tokens.nextToken();
if (tokens.hasMoreTokens())
ValorVar = tokens.nextToken();
else
break;
} while (true);
}
else {
// Discrete variable
do {
for (int k=0; k<Attributes.getInputAttribute(NumVar).getNumNominalValues(); k++) {
if (ValorVar.equalsIgnoreCase(Attributes.getInputAttribute(NumVar).getNominalValue(k)))
ValorVar = Attributes.getInputAttribute(NumVar).getNominalValue(k);
}
ValorNum = Attributes.getInputAttribute(NumVar).convertNominalValue(ValorVar);
StCalculate.poblac.setCromElem(StCalculate.NumReglasGeneradas-1, NumVar, ValorNum, 1);
if (tokens.hasMoreTokens())
ValorVar = tokens.nextToken();
else
break;
} while (true);
}
StCalculate.poblac.setCromElem(StCalculate.NumReglasGeneradas-1, NumVar, StCalculate.var[NumVar].n_etiq, 1);
}
else if (tok.equalsIgnoreCase("consecuent:")) {
numero++;
StCalculate.poblac.setIndivNvar(StCalculate.NumReglasGeneradas-1, numero);
numero=0;
String ValorCons = tokens.nextToken();
for (int k=0; k<Attributes.getOutputAttribute(0).getNumNominalValues(); k++) {
if (ValorCons.equalsIgnoreCase(Attributes.getOutputAttribute(0).getNominalValue(k)))
ValorCons = Attributes.getOutputAttribute(0).getNominalValue(k);
}
int ValorClase = Attributes.getOutputAttribute(0).convertNominalValue(ValorCons);
StCalculate.poblac.setIndivNameClass (StCalculate.NumReglasGeneradas-1, ValorCons);
StCalculate.poblac.setIndivNumClass (StCalculate.NumReglasGeneradas-1, ValorClase);
StCalculate.poblac.setIndivTotalClass(StCalculate.NumReglasGeneradas-1, Utils.ExamplesClass(ValorClase));
for (int k=0; k<StCalculate.NumReglasGeneradas-1;k++) {
if (StCalculate.poblac.Compare(k,StCalculate.NumReglasGeneradas-1)) {
StCalculate.poblac.initIndEmp (StCalculate.NumReglasGeneradas-1);
StCalculate.NumReglasGeneradas--;
}
}
}
}
} while (true);
} catch (Exception e) {
System.out.println("DBG: Exception in readRules");
e.printStackTrace();
}
}
/**
* <p>
* Return the number of rules obtained by the algorithm
* </p>
* @param nFile File of rules obtained by the algorithm
* @return The number of rules obtained by the algorithm
**/
public static int CaptureNumRules (String nFile)
throws IOException {
int numero =0;
try {
String file, linea, tok;
StringTokenizer lineasFichero, tokens;
file = Files.readFile(nFile);
file = file.toLowerCase() + "\n ";
lineasFichero = new StringTokenizer(file,"\n\r");
do {
if (!lineasFichero.hasMoreTokens()) break;
linea=lineasFichero.nextToken();
tokens = new StringTokenizer(linea," ,\t");
if (tokens.hasMoreTokens()) {
tok = tokens.nextToken();
if (tok.equalsIgnoreCase("generated")) {
tokens.nextToken();
numero++;
}
}
} while (true);
} catch (Exception e) {
System.out.println("DBG: Exception in readRules");
e.printStackTrace();
}
return numero;
}
/**
* <p>
* Calculate is the main method of the Calculate
* </p>
* @param output_file_tra Name of the training output file
* @param output_file_tst Name of the test output file
* @param input_file_tra Name of the training file
* @param input_file_tst Name of the test file
* @param rule_file Name of the rule file
* @param quality_file Name of the output quality file
* @param nlabels Number of labels for the continuous variables
**/
public static void Calculate (String output_file_tra, String output_file_tst, String input_file_tra, String input_file_tst, String rule_file, String quality_file, int nlabels) throws Exception {
StCalculate.Param = new Param (output_file_tra,output_file_tst,input_file_tra,input_file_tst, rule_file, quality_file, nlabels);
CaptureDataset (StCalculate.Param.input_file_tra);
// Creates "Fuzzy" characteristics and intervals
StCalculate.BaseDatos = new Fuzzy[StCalculate.num_vars][StCalculate.MaxEtiquetas];
for (int x=0; x<StCalculate.num_vars; x++)
for (int y=0; y<StCalculate.MaxEtiquetas; y++)
StCalculate.BaseDatos[x][y] = new Fuzzy();
StCalculate.intervalos = new float[StCalculate.num_vars][StCalculate.MaxEtiquetas];
// Setting and file writing of fuzzy sets characteristics for continuous variables
Semantics.Initialise();
// Create and initilize gain information array
StCalculate.GI = new float[StCalculate.num_vars];
Gain.Init();
// Set all the examples as not covered
for (int ej=0; ej<StCalculate.n_eje; ej++) {
StCalculate.tabla[ej].fcubierto = false; // Set example to not covered - fuzzy
StCalculate.tabla[ej].ccubierto = false; // Set example to not covered - crisp
}
// Read the number of rules of the rule_file
StCalculate.Param.long_poblacion = CaptureNumRules (StCalculate.Param.rul_file);
System.out.print ("The file contains " + StCalculate.Param.long_poblacion + " rules");
// Creates and initialices the population
StCalculate.poblac = new Population(StCalculate.Param.long_poblacion, StCalculate.num_vars, StCalculate.var);
StCalculate.poblac.initPopEmp ();
// Variables Initialization
StCalculate.total_ej_cubiertos = 0;
StCalculate.NumReglasGeneradas=0;
// Reed the rules of the file
CaptureRules (StCalculate.Param.rul_file);
// Counts the number of examples of each class
System.out.println (" (" + StCalculate.NumReglasGeneradas + " are different)");
StCalculate.poblac.CalcPobOutput(StCalculate.Param.output_file_tra); // Covered examples are marked
// Read the dataset, store values and echo to output and seg files
CaptureDataset (StCalculate.Param.input_file_tst);
// Creates "Fuzzy" characteristics and intervals
StCalculate.BaseDatos = new Fuzzy[StCalculate.num_vars][StCalculate.MaxEtiquetas];
for (int x=0; x<StCalculate.num_vars; x++)
for (int y=0; y<StCalculate.MaxEtiquetas; y++)
StCalculate.BaseDatos[x][y] = new Fuzzy();
StCalculate.intervalos = new float[StCalculate.num_vars][StCalculate.MaxEtiquetas];
// Setting and file writing of fuzzy sets characteristics for continuous variables
Semantics.Initialise();
// Create and initilize gain information array
StCalculate.GI = new float[StCalculate.num_vars];
Gain.Init();
// Set all the examples as not covered
for (int ej=0; ej<StCalculate.n_eje; ej++) {
StCalculate.tabla[ej].fcubierto = false; // Set example to not covered - fuzzy
StCalculate.tabla[ej].ccubierto = false; // Set example to not covered - crisp
}
// Read the number of rules of the rule_file
StCalculate.Param.long_poblacion = CaptureNumRules (StCalculate.Param.rul_file);
System.out.print ("The file contains " + StCalculate.Param.long_poblacion + " rules");
// Creates and initialices the population
StCalculate.poblac = new Population(StCalculate.Param.long_poblacion, StCalculate.num_vars, StCalculate.var);
StCalculate.poblac.initPopEmp ();
// Variables Initialization
StCalculate.total_ej_cubiertos = 0;
StCalculate.NumReglasGeneradas=0;
// Reed the rules of the file
CaptureRules (StCalculate.Param.rul_file);
// Counts the number of examples of each class
System.out.println (" (" + StCalculate.NumReglasGeneradas + " are different)");
StCalculate.poblac.CalcPob(StCalculate.Param.measure_file); // Covered examples are marked
StCalculate.poblac.CalcPobOutput(StCalculate.Param.output_file_tst); // Covered examples are marked
System.out.println ("Program finished\n");
}
}