/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Writed by Alberto Fernández (University of Granada) 15/01/2006 * @author Modified by Cristóbal J. Carmona (University of Jaen) 24/06/2010 * @version 2.0 * @since JDK1.5 * </p> */ package keel.Algorithms.Subgroup_Discovery.SDAlgorithm; import java.text.DecimalFormat; import org.core.Files; public class EvaluateRules { /** * <p> * Evaluate the rules obtained by the algorithm * </p> */ private int nClases; private int nDatos; private int contClases[]; private int tam; private double ant; private double cob; private double conf; private double compl; private double rel; private double ati; private double sens; private double porcAciertoTr; private double porcAciertoTst; private double muestCubiertas; private int muestrasCubiertasTotales[]; private String measure_file; private SetData train; private SetData test; private SetRules reglas; private String[] valorNombreClases; /** * <p> * Calculate the quality measures of the rules obtained by the algorithm * </p> * @param setRul Set of final rules * @param setTra Set of train data * @param setTst Set of test data * @param examClassTra Number of examples for each class in the train data * @param examClassTst Number of examples for each class in the test data * @param valueNameClass Name for each class * @param mea_file Name of the measure file */ public EvaluateRules(SetRules setRul, SetData setTra, SetData setTst, int[] examClassTra, int[] examClassTst, String[] valueNameClass, String mea_file) { reglas = setRul; train = setTra.copiaConjDatos(); test = setTst.copiaConjDatos(); nClases = setRul.getUltimaRegla().getNClass(); nDatos = setTra.size(); measure_file = mea_file; this.valorNombreClases = valueNameClass; // Calculate the train calculaIndicesTra(train, examClassTra); System.out.print("\n\nTrain index: "); DecimalFormat d = new DecimalFormat("0.0000"); System.out.print("\n\n#Rules: " + d.format(tam) + "\n#Vars: " + d.format(ant) + "\nCoverage: " + d.format(cob) + "\nSignificance: " + d.format(rel) + "\nUnusualness: " + d.format(ati) + "\nAccuracy: " + d.format(porcAciertoTr) + "\nSupport: " + d.format(compl) + "\nConfidence: " + d.format(conf)); // Calculate test for (int j = 0; j < test.size(); j++) { test.getDato(j).setCovered(0); } calculaIndicesTst(test, examClassTst); System.out.print("\n\n#Rules: " + d.format(tam) + "\n#Vars: " + d.format(ant) + "\nCoverage: " + d.format(cob) + "\nSignificance: " + d.format(rel) + "\nUnusualness: " + d.format(ati) + "\nAccuracy: " + d.format(porcAciertoTr) + "\nSupport: " + d.format(compl) + "\nConfidence: " + d.format(conf)); } /** * <p> * Print the results in a String * </p> * @return Results in a string */ public String printString() { String cad = "####Average results for test data####\n"; DecimalFormat sixDecimals = new DecimalFormat("0.0000"); cad += "Avg. Rule length: " + sixDecimals.format(tam) + "\n"; cad += "Avg. Number of attributes by rule: " + sixDecimals.format(ant) + "\n"; cad += "Avg. Coverage: " + sixDecimals.format(cob) + "\n"; cad += "Avg. Significance: " + sixDecimals.format(rel) + "\n"; cad += "Avg. Unusualness: " + sixDecimals.format(ati) + "\n"; cad += "Avg. Support: " + sixDecimals.format(compl) + "\n"; cad += "Avg. Confidence: " + sixDecimals.format(conf) + "\n"; cad += "Accuracy Training: " + sixDecimals.format(porcAciertoTr) + "\n"; cad += "Accuracy Test: " + sixDecimals.format(porcAciertoTst); return cad; } /** * <p> * Print the quality measures in the measure file * </p> * @param cad String with the values to introduce */ public void printMeasure(String cad) { } /** * <p> * Calculate the quality of the set of rules for the training file * </p> * @param SetData Set of data to study * @param muestPorClase Number of examples for each class */ private void calculaIndicesTra(SetData datos, int[] muestPorClase) { int i, j; int aciertos; nDatos = datos.size(); contClases = new int[nClases]; for (i = 0; i < nClases; i++) { contClases[i] = muestPorClase[i]; } tam = reglas.size(); // Calculate Tam // Number of attributes for (i = 0, ant = 0; i < reglas.size(); i++) { ant += reglas.getRule(i).size(); } //Add the variables of the consequent ant += reglas.size(); ant = (double) ant / tam; //Nº attributes per rule // Calculate the distrib muestCubiertas = 0; //Number of covered examples int muestBienCubiertas = 0; int[][] instCubiertas = new int[tam][nClases]; for (j = 0; j < nDatos; j++) { datos.getDato(j).setCovered(0); } for (i = 0; i < reglas.size(); i++) { for (j = 0; j < nClases; j++) { instCubiertas[i][j] = 0; } } muestCubiertas = 0; for (i = 0; i < reglas.size(); i++) { for (j = 0; j < nDatos; j++) { Instance m = datos.getDato(j); if (reglas.getRule(i).cover(m)) { muestCubiertas++; instCubiertas[i][m.getClas()]++; if (reglas.getRule(i).getClas() == m.getClas()) { if (m.getCovered() == 0) { muestBienCubiertas++; m.addCovered(); } } } } } //Calculate coverage cob = (double) muestCubiertas / (tam*nDatos); //Calculate support compl = (double) muestBienCubiertas / nDatos; //Calculate confidence conf = (double) muestBienCubiertas / muestCubiertas; //Calculate unusualness ati = 0; double val; for(i = 0; i < reglas.size(); i++){ val = evaluateUnus(reglas.getRule(i),datos); ati += val; } ati /= (double) reglas.size(); //Calculate significance double sigParcial = 0; double[] pCondi = new double[reglas.size()]; //Factor normalizador -> coverage for (i = 0; i < reglas.size(); i++) { pCondi[i] = 0; for (j = 0; j < nClases; j++) { pCondi[i] += instCubiertas[i][j]; } pCondi[i] *= (double) 1.0 / nDatos; } rel = 0; for (i = 0; i < reglas.size(); i++) { sigParcial = 0; for (j = 0; j < nClases; j++) { double logaritmo = (double) instCubiertas[i][j] / (contClases[j] * pCondi[i]); if ((logaritmo != 0)&&(!Double.isNaN(logaritmo))&&(!Double.isInfinite(logaritmo))){ logaritmo = Math.log(logaritmo); logaritmo *= (double) instCubiertas[i][j]; sigParcial += logaritmo; } } rel += sigParcial * 2; } rel /= (double) reglas.size(); //Correct classified examples double voto[] = new double[nClases]; aciertos = 0; int clases[] = contClases; int clase, cl; double distribucion[], max; int clasePorDefecto = 0; for (i = 0, clase = -1; i < nClases; i++) { if (clases[i] > clase) { clasePorDefecto = i; clase = clases[i]; } } for (i = 0; i < datos.size(); i++) { for (j = 0; j < nClases; j++) { voto[j] = 0; } for (j = 0; j < reglas.size(); j++) { if (reglas.getRule(j).cover(datos.getDato(i))) { distribucion = reglas.getRule(j).getDistrib(); for (int k = 0; k < nClases; k++) { voto[k] += distribucion[k]; } } } //System.out.println(""); for (j = 0, max = 0, cl = 0; j < nClases; j++) { if (voto[j] > max) { max = voto[j]; cl = j; } } if (max == 0) { cl = clasePorDefecto; } if (cl == datos.getDato(i).getClas()) { aciertos++; } } porcAciertoTr = (double) aciertos / datos.size(); } /** * <p> * Calculate the quality measures * </p> * @param SetData Set of data to study * @param muestPorClase Number of examples for each class */ private void calculaIndicesTst(SetData datos, int[] muestPorClase) { float medVar = 0; float medCob = 0; float medSig = 0; float medUnu = 0; float medSen = 0; float medCon = 0; float medSup = 0; int j; int aciertos; nDatos = datos.size(); tam = reglas.size(); // Calculate Tam contClases = new int[nClases]; for(int i = 0; i < nClases; i++) { contClases[i] = muestPorClase[i]; } muestrasCubiertasTotales = new int[nDatos]; for(int i=0; i < nDatos; i++) muestrasCubiertasTotales[i] = 0; DecimalFormat sixDecimals = new DecimalFormat("0.0000"); String cad = "#Rule \t #Vars \t Cov \t Sign \t Unus \t Acc \t Sens \t Supp \t Cnf\n"; Files.writeFile(measure_file, cad); //For each rule we calculate the quality measures for(int i=0; i<reglas.size(); i++){ calculaIndicesRule(i, datos); //Add the values to the average results medVar += ant+1; medCob += cob; medSig += rel; medUnu += ati; medSen += sens; medCon += conf; //Print the rule cad = i +"\t"+ sixDecimals.format(ant+1) +"\t"+ sixDecimals.format(cob) +"\t"+ sixDecimals.format(rel) +"\t"+ sixDecimals.format(ati) +"\t ---- \t"+ sixDecimals.format(sens) +"\t" +sixDecimals.format(compl) +"\t"+ sixDecimals.format(conf); Files.addToFile(measure_file, cad+"\n"); } //Correct classified examples double voto[] = new double[nClases]; aciertos = 0; int clases[] = contClases; int cl; double distribucion[], max; int clasePorDefecto = 0; for (int i=0, clase = -1; i < nClases; i++) { if (clases[i] > clase) { clasePorDefecto = i; clase = clases[i]; } } for (int i = 0; i < datos.size(); i++) { for (j = 0; j < nClases; j++) { voto[j] = 0; } for (j = 0; j < reglas.size(); j++) { if (reglas.getRule(j).cover(datos.getDato(i))) { distribucion = reglas.getRule(j).getDistrib(); for (int k = 0; k < nClases; k++) { voto[k] += distribucion[k]; } } } //System.out.println(""); for (j = 0, max = 0, cl = 0; j < nClases; j++) { if (voto[j] > max) { max = voto[j]; cl = j; } } if (max == 0) { cl = clasePorDefecto; } if (cl == datos.getDato(i).getClas()) { aciertos++; } } porcAciertoTst = (double) aciertos / datos.size(); medVar /= tam; medCob /= tam; medSig /= tam; medUnu /= tam; medSen /= tam; medCon /= tam; for(int i=0; i<nDatos; i++){ if(muestrasCubiertasTotales[i]==1) medSup++; } medSup /= nDatos; //Print the average results cad = "---\t"+ sixDecimals.format(medVar) +"\t"+ sixDecimals.format(medCob) +"\t"+ sixDecimals.format(medSig) +"\t"+ sixDecimals.format(medUnu) +"\t"+ sixDecimals.format(porcAciertoTst) +"\t"+ sixDecimals.format(medSen) +"\t"+ sixDecimals.format(medSup) +"\t"+ sixDecimals.format(medCon); Files.addToFile(measure_file, cad); } private void calculaIndicesRule(int pos, SetData datos){ ant = reglas.getRule(pos).size(); // Calculate the distrib muestCubiertas = 0; //Number of covered examples int muestBienCubiertas = 0; int[][] instCubiertas = new int[tam][nClases]; for (int j = 0; j < nDatos; j++) { datos.getDato(j).setCovered(0); } for (int i = 0; i < reglas.size(); i++) { for (int j = 0; j < nClases; j++) { instCubiertas[i][j] = 0; } } muestCubiertas = 0; for (int j = 0; j < nDatos; j++) { Instance m = datos.getDato(j); if (reglas.getRule(pos).cover(m)) { muestCubiertas++; muestrasCubiertasTotales[j]=1; instCubiertas[pos][m.getClas()]++; if (reglas.getRule(pos).getClas() == m.getClas()) { if (m.getCovered() == 0) { muestBienCubiertas++; m.addCovered(); } } } } //Calculate coverage cob = (double) muestCubiertas / nDatos; //Calculate support compl = (double) muestBienCubiertas / nDatos; //Calculate confidence if(muestCubiertas!=0) conf = (double) muestBienCubiertas / muestCubiertas; else conf = 0; //Calculate sensitivity sens = (double) muestBienCubiertas / datos.getExamplesClass(reglas.getRule(pos).getClas()); //Calculate unusualness ati = 0; double val; val = evaluateUnus(reglas.getRule(pos),datos); ati += val; //Calculate significance double sigParcial = 0; double[] pCondi = new double[reglas.size()]; //Factor normalizador -> coverage pCondi[pos] = 0; for (int j = 0; j < nClases; j++) { pCondi[pos] += instCubiertas[pos][j]; } pCondi[pos] *= (double) 1.0 / nDatos; rel = 0; sigParcial = 0; for (int j = 0; j < nClases; j++) { double logaritmo = (double) instCubiertas[pos][j] / (contClases[j] * pCondi[pos]); if ((logaritmo != 0)&&(!Double.isNaN(logaritmo))&&(!Double.isInfinite(logaritmo))){ logaritmo = Math.log10(logaritmo); logaritmo *= (double) instCubiertas[pos][j]; sigParcial += logaritmo; } } rel += sigParcial * 2; } /** * <p> * Generate a string with the classification of the total examples for a data set * </p> * </p> * @param Data The data set to study * @return String with the result */ public String exitResult(SetData Data) { String cadena = new String(""); double voto[] = new double[nClases]; int clases[] = new int[nClases]; double distribucion[], max; int j, cl, clasePorDefecto = 0; for (int i = 0; i < Data.size(); i++) { clases[Data.getDato(i).getClas()]++; } for (int i = 0, clase = -1; i < nClases; i++) { if (clases[i] > clase) { clasePorDefecto = i; clase = clases[i]; } } for (int i = 0; i < Data.size(); i++) { for (j = 0; j < nClases; j++) { voto[j] = 0; } for (j = 0; j < reglas.size(); j++) { if (reglas.getRule(j).cover(Data.getDato(i))) { distribucion = reglas.getRule(j).getDistrib(); for (int k = 0; k < nClases; k++) { voto[k] += distribucion[k]; } } } for (j = 0, max = 0, cl = 0; j < nClases; j++) { if (voto[j] > max) { max = voto[j]; cl = j; } } if (max == 0) { cl = clasePorDefecto; } cadena += new String(valorNombreClases[Data.getDato(i).getClas()] + " " + valorNombreClases[cl] + "\n"); } return cadena; } /** * <p> * Evaluation of the unusualness measures * </p> * @param c Complex to evaluate * @param e Data set */ private double evaluateUnus(Complex c, SetData e) { double n, ncond, nclascond, nclas; int cl; double val = 0; n = 0; ncond = 0; nclascond = 0; nclas = 0; for (int i = 0; i < e.size(); i++) { cl = e.getDato(i).getClas(); n++; if (c.cover(e.getDato(i))) { c.incrementDistrib(cl); ncond++; if (cl == c.getClas()) { nclascond++; } } if (cl == c.getClas()) { nclas++; } } if (n != 0 && ncond != 0) { val = (ncond / n) * ((nclascond / ncond) - (nclas / n)); } else { val = Double.MIN_VALUE; } return (val); } }