/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Writed by Cristóbal J. Carmona (University of Jaen) 24/06/2010 * @version 2.0 * @since JDK1.5 * </p> */ package keel.Algorithms.Subgroup_Discovery.SDAlgorithm; import java.io.IOException; import java.util.Arrays; import java.text.DecimalFormat; import java.util.Collections; import org.core.Files; public class SD { /** * <p> * It is the main class of the SD algorithm * </p> */ private String input_file_tra; private String input_file_eval; private String input_file_tst; private String output_file_tra; private String output_file_tst; private String rule_file; private String measure_file; private String algorithm; private int g; private int beamWidth; private int numRules; private float minSupp; private int muestPorClaseTrain[]; private int muestPorClaseEval[]; private int muestPorClaseTest[]; private int[] valorClases; // Class values private int nClases; // Number of classes private Complex storeSelectors; // The possible selectors private SetRules setFinalRules; // Final rules obtained private EvaluateRules evRules; private SetData dataTra; // Train data private SetData dataEva; // Evaluat data private SetData dataTst; // Test data private int classTra[]; private int classTst[]; private double time; private String theExit; private int nAttributes; private String[] nameAttributes; private String[] nameClasses; private boolean Continuous = false; public boolean isOk(){ return (!Continuous); } /** * <p> * Constructs the object of SD_algorithm * </p> * @param input_ftrain Name of the training file * @param input_feval Name of the evaluating file * @param input_ftest Name of the test file * @param output_ftrain Name of the output training file * @param output_ftest Name of the output test file * @param arule_file Name of the rule file * @param ameasure_file Name of the measure file * @param alg Name of the algorithm * @param abeamWidth Value of the width for the beam * @param ag Value of g parameter for the algorithm * @param aminSupp Value of minSupp parameter for the algorithm * @param anumRules Value of anumRules parameter for the algorithm */ public SD(String input_ftrain, String input_feval, String input_ftest, String output_ftrain, String output_ftest, String arule_file, String ameasure_file, String alg, int abeamWidth, int ag, float aminSupp, int anumRules) { algorithm = alg; System.out.println("Executing "+ algorithm); input_file_tra = input_ftrain; input_file_eval = input_feval; input_file_tst = input_ftest; output_file_tra = output_ftrain; output_file_tst = output_ftest; rule_file = arule_file; measure_file = ameasure_file; beamWidth = abeamWidth; g = ag; minSupp = aminSupp; numRules = anumRules; Dataset train = new Dataset(); Dataset eval = new Dataset(); Dataset test = new Dataset(); // Check if the data sets have continuous variables try { train.readSet(input_file_tra, true); if (train.hayAtributosContinuos()){ System.err.println("SD_algorithm may not work properly with continuous attributes.\nPlease discretize the data base"); Continuous = true; } eval.readSet(input_file_eval, false); if (eval.hayAtributosContinuos()){ System.err.println("SD_algorithm may not work properly with continuous attributes.\nPlease discretize the data base"); Continuous = true; } test.readSet(input_file_tst, false); } catch (IOException e) { System.err.println("There was a problem while reading the data-set files:"); System.err.println("-> " + e); System.exit(0); } theExit = new String(""); theExit = test.copiaCabeceraTest(); // We obtain the training and test datasets System.out.println("\nGenerating datasets"); dataTra = new SetData(); dataEva = new SetData(); dataTst = new SetData(); train.calculaMasComunes(); eval.calculaMasComunes(); test.calculaMasComunes(); dataTra = createDataset(train); dataEva = createDataset(eval); dataTst = createDataset(test); classTra = train.getC(); nClases = train.getnClasses(); nAttributes = train.getnentradas(); int[] auxiliar = train.getC(); Arrays.sort(auxiliar); valorClases = new int[nClases]; valorClases[0] = auxiliar[0]; int valor = 0; for (int i = 1; i < nClases; i++) { int j; for (j = valor; auxiliar[j] == valorClases[i - 1]; j++) { ; } valorClases[i] = auxiliar[j]; valor = j; } muestPorClaseTrain = new int[nClases]; for (int j = 0; j < nClases; j++) { muestPorClaseTrain[j] = 0; for (int i = 0; i < dataTra.size(); i++) { if (valorClases[j] == classTra[i]) { muestPorClaseTrain[j]++; } } } classTst = test.getC(); muestPorClaseTest = new int[nClases]; for (int j = 0; j < nClases; j++) { muestPorClaseTest[j] = 0; for (int i = 0; i < dataTst.size(); i++) { if (valorClases[j] == classTst[i]) { muestPorClaseTest[j]++; } } } int [] clasesEval; clasesEval = eval.getC(); muestPorClaseEval = new int[nClases]; for (int j = 0; j < nClases; j++) { muestPorClaseEval[j] = 0; for (int i = 0; i < dataEva.size(); i++) { if (valorClases[j] == clasesEval[i]) { muestPorClaseEval[j]++; } } } time = System.currentTimeMillis(); //Time nameAttributes = train.dameNombres(); nameClasses = train.dameClases(); if (nameClasses == null){ nameClasses = new String[nClases]; for (int i = 0; i < nClases; i++){ nameClasses[i] = ""+valorClases[i]; } } } /** * <p> * Creates a dataset through the files of the parameteres * </p> * @param Dataset A dataset of a file * @return The dataset created. A linked list with samples */ private SetData createDataset(Dataset mis_datos) { SetData datos = new SetData(); //Create a new dataset int tam = mis_datos.getnentradas(); //The number of input attributes double[] vars = new double[tam]; //The vector with the values of the attributes double[][] X; int[] C; int clase = 0; //Variable with the value of the class X = mis_datos.getX(); C = mis_datos.getC(); for (int i = 0; i < mis_datos.getndatos(); i++) { //System.out.print("\n"+i+":"); for (int j = 0; (j < tam); j++) { //System.out.print(" "+X[i][j]); if (mis_datos.isMissing(i, j)) { vars[j] = mis_datos.masComun(j); } else { vars[j] = X[i][j]; } } clase = C[i]; Instance m = new Instance(vars, clase, tam); m.setPosFile(i); datos.addDato(m); } return datos; } /** * <p> * Execute the algorithm SD * </p> */ public void execute() { makeSelectors(); SetData datosTrainAux = new SetData(); datosTrainAux = dataTra.copiaConjDatos(); SDClasses(datosTrainAux, valorClases); time = System.currentTimeMillis() - time; if(setFinalRules.size() == 0){ //If there are not rules with good support System.out.println("\nThere are not rules with good level of support"); } else { evRules = new EvaluateRules(setFinalRules, dataEva, dataTst, muestPorClaseEval, muestPorClaseTest, nameClasses, measure_file); generateExit(); } System.out.println("\n\nExecuting finished\n"); } /** * <p> * Create the total set of selectrs for obtaining the possible rules * </p> */ private void makeSelectors() { int totalAtributos = dataTra.getDato(0).getNattributes(); int ejemplos = dataTra.size(); double[][] lista = new double[ejemplos + 1][totalAtributos]; //To see != for (int i = 0; i < totalAtributos; i++) { // For each attribute lista[0][i] = dataTra.getDato(0).getMuest()[i]; //Init lista[1][i] = Double.POSITIVE_INFINITY; //Mark } for (int i = 0; i < totalAtributos; i++) { // For each attribute for (int j = 1; j < ejemplos; j++) { //For each example double valor = dataTra.getDato(j).getMuest()[i]; int k = 0; while (!(Double.isInfinite(lista[k][i]))) { //While not mark if (lista[k][i] == valor) { // It is the same break; } k++; } if (Double.isInfinite(lista[k][i])) { //Final position list lista[k][i] = valor; lista[k + 1][i] = Double.POSITIVE_INFINITY; } } } storeSelectors = new Complex(nClases); //Selectors for (int i = 0; i < totalAtributos; i++) { // For each attribute for (int h = 0; h < ejemplos; h++) { // For each example if (Double.isInfinite(lista[h][i])) { break; // Next attribute } for (int j = 0; j < 4; j++) { // <>,<=,> Selector s = new Selector(i, j, lista[h][i]); // We take the value for each attribute [attr,op,value] storeSelectors.addSelector(s); // Introduce if not the same } } } } /** * <p> * It obtains the rules for the values of the class * </p> * @param datosTrainAux It is the examples of the training set * @param valorClases It is the values of the classes */ private void SDClasses(SetData datosTrainAux, int[] valorClases) { setFinalRules = new SetRules(); setFinalRules.addNameClasses(nameClasses); setFinalRules.addNameClass(nameAttributes[nAttributes]); System.out.println("\n Extracting rules for the different classes:"); for (int i = 0; i < nClases; i++) { //For each class SD(datosTrainAux, valorClases[i]); } } /** * <p> * It obtains the rules for a value of the class * </p> * @param train It is the examples of the training set * @param clase It is the class to study */ private void SD(SetData train, int clase) { boolean continuar = false; System.out.println("\n We search the best rules for class " + nameClasses[clase]); SetRules beam = new SetRules(); SetRules newbeam = new SetRules(); beam.addNameClasses(nameClasses); beam.addNameClass(nameClasses[clase]); //Create the initial beam for (int i = 0; i < storeSelectors.size(); i++) { Complex aux = new Complex(nClases); aux.setClas(clase); aux.adjuntaNombreAtributos(nameAttributes); aux.addSelector(storeSelectors.getSelector(i)); evaluateRuleInit(aux, train); beam.addRegla(aux); } //Sort out the Beam Collections.sort(beam.getConjReglas()); beam.eliminaSubsumidos(beam.size()); beam.deleteRulesLowSupport(beamWidth, minSupp); beam.deleteEqualAttributes(beamWidth); for (int j = beam.size() - 1; beam.size() > beamWidth; j--) { beam.deleteRegla(j); } //Copy the beam in newbeam newbeam.addNameClasses(nameClasses); newbeam.addNameClass(nameClasses[clase]); newbeam.addReglas(beam); do { // while improvement continuar = false; for (int i = 0; i < storeSelectors.size(); i++) { Selector s = storeSelectors.getSelector(i); for (int j = 0; j < beam.size(); j++) { Complex aux2 = beam.getRule(j); Complex aux = new Complex(nClases); boolean sigue = true; for (int h = 0; (h < aux2.size()) && (sigue); h++) { Selector s2 = aux2.getSelector(h); aux.addSelector(s2); if (s2.compareTo(s) < 2) { // It is the same attribute sigue = false; // Don´t add this attribute } } if (sigue) { //This is a new selector to add to the rule aux.addSelector(s); aux.setClas(clase); aux.adjuntaNombreAtributos(nameAttributes); //Evaluate TP/|E| and q_g boolean improvement = evaluateRule(aux, train, newbeam); if ((improvement)&&(isRelevant(aux, newbeam)&&(aux!=null))){ //The new rule is added to newbeam newbeam.addRegla(aux); //There is an improvement in NewBeam continuar = true; } } } } // Sort out newbeam Collections.sort(newbeam.getConjReglas()); // Eliminate rules not valid newbeam.eliminaSubsumidos(newbeam.size()); newbeam.deleteNull(); newbeam.deleteEqual(newbeam.size()); for (int j = newbeam.size() - 1; newbeam.size() > beamWidth; j--) { newbeam.deleteRegla(j); } beam.deleteAll(); // Copy newbeam in beam beam.addReglas(newbeam); } while (continuar); // if number of rules is lower than beamWidth if ((numRules != 0)&&(numRules < beamWidth)){ int conta = 0; for(int i=0; i<beamWidth && conta<numRules; i++){ if(beam.getRule(i).getSup() > minSupp){ setFinalRules.addRegla(beam.getRule(i)); conta++; } } } else { for(int i=0; i<beamWidth; i++){ if(beam.getRule(i).getSup() > minSupp){ setFinalRules.addRegla(beam.getRule(i)); } } } } /** * <p> * Check if the new rule is significant * </p> * @param Complex The rule to analyse * @param SetRules A set of rules to compare * @return True if it is significant */ private boolean isRelevant(Complex c, SetRules newBeam) { Complex rule; boolean relevant = true; for(int i=0; i<newBeam.size(); i++){ rule = newBeam.getRule(i); if((c.getTP() < rule.getTP())&&(c.getFP() > rule.getFP())) relevant = false; } return(relevant); } /** * <p> * Evaluate the new rule with respect the rules of newbeam. * </p> * @param c The rule to evaluate * @param e The set of data to check * @param newbeam The set of rules to check * @return If there is an improvement in this rule */ private boolean evaluateRule(Complex c, SetData e, SetRules newbeam){ boolean improvement = true; int i; int cl; float tp = 0; float fp = 0; c.deleteDistrib(); for (i = 0; i < e.size(); i++) { cl = e.getDato(i).getClas(); if (c.cover(e.getDato(i))) { c.incrementDistrib(cl); if (cl == c.getClas()) { tp++; } else fp++; } } c.setTP(tp); c.setFP(fp); c.setQg(tp/(fp+g)); c.setSup(tp/e.size()); c.adjustDistrib(); for (i=0; i<newbeam.size(); i++){ float aux_q_g = (float) newbeam.getRule(i).getQg(); if (c.getQg() <= aux_q_g){ improvement = false; } } if(improvement){ if (c.getSup() <= minSupp){ improvement = false; } } return improvement; } /** * <p> * Evaluate a new rule at the initialisation * </p> * @param c The rule to evaluate * @param e The set of data to check * @return If there is an improvement in this rule */ private void evaluateRuleInit(Complex c, SetData e){ int i; int cl; float tp = 0; float fp = 0; c.deleteDistrib(); for (i = 0; i < e.size(); i++) { cl = e.getDato(i).getClas(); if (c.cover(e.getDato(i))) { c.incrementDistrib(cl); if (cl == c.getClas()) { tp++; } else fp++; } } c.setQg(tp/(fp+g)); c.setSup(tp/e.size()); c.adjustDistrib(); } /** * <p> * Generate the exit files * </p> */ private void generateExit() { String cad = ""; DecimalFormat d = new DecimalFormat("0.000"); // Print screen cad = setFinalRules.printString(); time = (double) time / 1000; //cad += "\n\n" + evRules.printString() + "\nTime: " + d.format(time); cad += "\n\nTime: " + d.format(time); // Print the result of rules and quality measures in "rule file" Files.writeFile(rule_file, cad); // Print results of train and test Files.writeFile(output_file_tra, theExit + evRules.exitResult(dataTra)); Files.writeFile(output_file_tst, theExit + evRules.exitResult(dataTst)); } }