/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Rosa Venzala (University of Granada) 02/06/2008 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 16/12/2008 * @version 1.1 * @since JDK1.2 * </p> */ package keel.Algorithms.Hyperrectangles.EACH; public class RuleQualityEvaluation { /** * <p> * To evaluate the rules * </p> */ private int numClasses; private int numTrainClasses; private int numTestClasses; private int numData; private int contClasses[]; private int size; private double before; private double cob; private double compl; private double rel; private double ati; private double trainAcc; private double testAcc; private double samplesCovered; private EachDataSet train; private EachDataSet test; private RuleSet rules; private String[] valueClassNames; private String[] valueClassNamesTrain; /** * <p> * Calculates the final statistical for a set of rules and a set of data * </p> * @param conjreg Set of rules(complex) final * @param conjTrn Set of data Train * @param conjTst Set of data Test * @param muestPorClaseTrain int[] Number of examples of each class in the Train set * @param muestPorClaseTest int[] Number of examples of each class in the Test set * @param valorNombreClases String[] Labels for each class */ public RuleQualityEvaluation(RuleSet conjreg, EachDataSet conjTrn, EachDataSet conjTst, int[] muestPorClaseTrain, int[] muestPorClaseTest, String[] valorNombreClases,String []valorNombreClasesTest) { rules = conjreg; //referencia this.valueClassNames = valorNombreClases; this.valueClassNamesTrain = valorNombreClasesTest; train = conjTrn.copyDataSet(); test = conjTst.copyDataSet(); numClasses = conjreg.getLastRule().getNClases(); numTrainClasses=numClasses; numTestClasses=muestPorClaseTest.length; numData = conjTrn.size(); // Calculos en Entrenamiento computeIndexes(train, muestPorClaseTrain, 0); System.out.print("\n\nTrain Statistics: "); System.out.print("\n\n Size of the rule set: " + size + "\nAverage number of attributes per rule: " + before + "\nCoverage: " + cob); // System.out.print("\n\t Confidence: " + conf + " ComplMed: " + complmed + // " Compl: " + compl); System.out.print("\nSupport: " + compl); System.out.print("\nRelevance: " + rel + "\nUnusualness: " + ati); System.out.print("\nAccuracy: " + trainAcc); // Calculos en test computeIndexes(test, muestPorClaseTest, 1); System.out.print("\n\nTest Statistics:"); System.out.print("\n\n Size of the rule set: " + size + "\nAverage number of attributes per rule: " + before + "\nCoverage: " + cob); // System.out.print("\n\t Confidence: " + conf + " ComplMed: " + complmed + // " Compl: " + compl); System.out.print("\nSupport: " + compl); System.out.print("\nRelevance: " + rel + "\nUnusualness: " + ati); System.out.println("\nAccuracy: " + testAcc+"\n-----------------------------"); } /** * <p> * Get Accuracyy Train * </p> * @return percent */ public double getAccuracyTrain(){ return trainAcc; } /** * <p> * Get Accuracyy Test * </p> * @return percent */ public double getAccuracyTest(){ return testAcc; } /** * <p> * Prints on a string the statistical results * </p> * @return a string with the statistical results */ public String printString() { String cad=""; // String cad = "####Average results for test data####\n"; //cad += "Avg. Confidence; " + conf + " ; \n "; //cad += "Avg. Suppport; " + complmed + " ; \n "; /* cad += "Avg. Rule length: " + tam + "\n"; cad += "Avg. Number of attributes by rule: " + ant + "\n"; cad += "Avg. Coverage: " + cob + "\n"; cad += "Avg. Support: " + compl + "\n"; cad += "Avg. Significance: " + rel + "\n"; cad += "Avg. Unusualness: " + ati + "\n\n";*/ cad += "Accuracy Training: " + trainAcc + "\n"; ; cad += "Accuracy Test: " + testAcc; return cad; } /** * <p> * Calculates all the statistical, especially percent accuracy * </p> * @param datos Set of data (Train or Test) * @param muestPorClase int[] Number of examples for each class in the set of data * @param code Codigo Train or Test */ private void computeIndexes(EachDataSet datos, int[] muestPorClase, int code) { int i, j; int aciertos; numData = datos.size(); if (code == 0) numClasses=numTrainClasses; else numClasses=numTestClasses; int contClasesTra[]=new int[numTrainClasses]; // contamos el numero de mustras por clase contClasses = new int[numClasses]; for (i = 0; i < numClasses; i++) { contClasses[i] = muestPorClase[i]; } size = rules.size(); // calculamos Tam // calculamos n atributos por regla medio for (i = 0, before = 0; i < rules.size(); i++) { before += rules.getRule(i).size(); } before = (double) before / size; //N de atributos por regla medio // calculamos la distrib samplesCovered = 0; //n ejemplos cubiertos por las reglas int muestBienCubiertas = 0; int[][] instCubiertas = new int[size][numClasses]; for (j = 0; j < numData; j++) { datos.getData(j).setCovered(0); } for (i = 0; i < rules.size(); i++) { for (j = 0; j < numClasses; j++) { instCubiertas[i][j] = 0; } } samplesCovered = 0; for (i = 0; i < rules.size(); i++) { for (j = 0; j < numData; j++) { Sample m = datos.getData(j); if (rules.getRule(i).isCovered(m)) { samplesCovered++; instCubiertas[i][m.getClassSelector()]++; if (rules.getRule(i).getClassAttribute() == m.getClassSelector()) { if (m.getCovered() == 0) { muestBienCubiertas++; m.incrementCovered(); } } } } } //System.err.println("Muestras cubiertas -> "+muestCubiertas); //System.err.println("Total datos -> "+nDatos); //cob = (double) muestCubiertas / (nDatos * tam * tam); //COV = 1/nRSUM[Cov(Ri)] -- Cov(Ri) = n(Condi)/N // cob = samplesCovered / (size * numData); //Cobertura -> porcentaje de ejemplos cubiertos por cada regla / n de reglas // Calculamos completitud y completitud media [support] compl = (double) muestBienCubiertas / numData; // Calculamos la relevancia (significance) double sigParcial = 0; double[] pCondi = new double[rules.size()]; //Factor normalizador -> coverage for (i = 0; i < rules.size(); i++) { pCondi[i] = 0; for (j = 0; j < numClasses; j++) { pCondi[i] += instCubiertas[i][j]; } pCondi[i] *= (double) 1.0 / numData; } rel = 0; for (i = 0; i < rules.size(); i++) { sigParcial = 0; for (j = 0; j < numClasses; j++) { double logaritmo = (double) instCubiertas[i][j] / (contClasses[j] * pCondi[i]); if ((logaritmo != 0) && (!Double.isNaN(logaritmo)) && (!Double.isInfinite(logaritmo))) { logaritmo = Math.log(logaritmo); logaritmo *= (double) instCubiertas[i][j]; sigParcial += logaritmo; } } rel += sigParcial * 2; } rel /= (double) rules.size(); // Calculamos la atipicidad de las reglas (unusualness) [ati] double aux; for (i = 0, aux = 0; i < rules.size(); i++) { // para cada regla double ncondi, pcond, pclase, pcondclase; int cl = rules.getRule(i).getClassAttribute(); for (j = 0, ncondi = 0; j < numTrainClasses; j++) { ncondi += rules.getRule(i).getDistributionClass(j); //ncondi } pcond = ncondi / numData; // pclase = (double) contClases[cl] / nDatos; pclase = (double) contClasesTra[cl] / numData; pcondclase = rules.getRule(i).getDistributionClass(cl) / numData; aux += pcond * (pcondclase - pclase); } ati = aux / rules.size(); //Ahora el porcentaje de aciertos int voto[] = new int[numClasses]; aciertos = 0; int clases[] = contClasses; //new int[nClases]; //int verificados[] = new int[nClases]; int clase, cl; int distribucion[], max; int clasePorDefecto = 0; double volumen; int ndimensiones; /*for (i = 0; i < datos.size(); i++) { clases[datos.getDato(i).getClase()]++; }*/ for (i = 0, clase = -1; i < numClasses; i++) { if (clases[i] > clase) { clasePorDefecto = i; clase = clases[i]; } } int clRegActivada=clasePorDefecto; for (i = 0; i < datos.size(); i++) { // Para el conjunto completo de datos for (j = 0; j < numClasses; j++) { //Inicializo voto a 0 voto[j] = 0; //verificados[j] = 1; } volumen=1000000; ndimensiones=100; for (j = 0; j < rules.size(); j++) { // vemos que reglas verifican a la muestra if (rules.getRule(j).isCovered(datos.getData(i))) { if( (rules.getRule(j).getDimensions()< ndimensiones) ||((rules.getRule(j).getDimensions()==ndimensiones)&&(rules.getRule(j).getVolume()<volumen))){ //si tienen la misma dimension es ahora cuando comparamos las cantidades clRegActivada=rules.getRule(j).getClassAttribute(); distribucion = rules.getRule(j).getDistribution(); /*for (int k = 0; k < nClases; k++) { voto[k] += distribucion[k]; //verificados[k]++; }*/ //ACTUALIZAR EL VOLUMEN volumen=rules.getRule(j).getVolume(); ndimensiones=rules.getRule(j).getDimensions(); } } } /*for (int k = 0; k < nClases; k++) { voto[k] /= verificados[k]; }*/ //System.out.println(""); for (j = 0, max = 0, cl = 0; j < numClasses; j++) { //Obtengo la clase que me da mis reglas //System.out.print(" Voto["+j+"]="+voto[j]); if (voto[j] > max) { max = voto[j]; cl = j; } } if (max == 0) { //No se ha activado ninguna regla cl = clasePorDefecto; //System.out.println("X defecto -> "+code); } cl=clRegActivada; if(code==0) {if (cl == datos.getData(i).getClassSelector()) {aciertos++;}} else{ if (valueClassNames[cl] == valueClassNamesTrain[datos.getData(i).getClassSelector()]) { aciertos++; } } } System.out.print("\n\n Accuracy: " + (double)aciertos/datos.size() + " ... total data: " + datos.size()); if (code == 0) { trainAcc = (double) aciertos / datos.size(); } else { testAcc = (double) aciertos / datos.size(); System.out.println("aciertos y total "+aciertos+" "+datos.size()); } } /** * <p> * Generates a string with out-put lists * </p> * @param datos Set of data to compare with the set of rules * @return A string with pairs (original class; calculated class;) */ public String out(EachDataSet datos,boolean train) { int clRegActivada; if (train) numClasses=numTrainClasses; else numClasses=numTestClasses; String cadena = new String(""); int voto[] = new int[numClasses]; int clases[] = new int[numClasses]; int distribucion[], max; int j, cl, clasePorDefecto = 0; double volumen; int ndimensiones; for (int i = 0; i < datos.size(); i++) { clases[datos.getData(i).getClassSelector()]++; } for (int i = 0, clase = -1; i < numClasses; i++) { if (clases[i] > clase) { clasePorDefecto = i; clase = clases[i]; } } clRegActivada=clasePorDefecto; if(!train){ for (j = 0; j < rules.size(); j++) { complexEvaluation(rules.getRule(j),datos); } } for (int i = 0; i < datos.size(); i++) { // Para el conjunto completo de datos for (j = 0; j < numClasses; j++) { //Inicializo voto a 0 voto[j] = 0; } volumen=1000000; ndimensiones=100000; for (j = 0; j < rules.size(); j++) { // vemos que reglas verifican a la muestra if ((rules.getRule(j).isCovered(datos.getData(i)))) { if( (rules.getRule(j).getDimensions()< ndimensiones) ||((rules.getRule(j).getDimensions()==ndimensiones)&&(rules.getRule(j).getVolume()<volumen))){ //o tiene menos dimensiones o estas son iguales pero es mas pequeño clRegActivada=rules.getRule(j).getClassAttribute(); distribucion = rules.getRule(j).getDistribution(); /* for (int k = 0; k < nClases; k++) { voto[k] += distribucion[k]; }*/ //ACTUALIZAR EL VOLUMEN y las dimensiones volumen=rules.getRule(j).getVolume(); ndimensiones=rules.getRule(j).getDimensions(); } } } for (j = 0, max = 0, cl = 0; j < numClasses; j++) { //Obtengo la clase que me da mis reglas if (voto[j] > max) { max = voto[j]; cl = j; } } if (max == 0) { //No se ha activado ninguna regla cl = clasePorDefecto; } if(train){ cl=clRegActivada; cadena += new String(valueClassNames[datos.getData(i).getClassSelector()] + " " + valueClassNames[cl] + "\n"); } else{ cl=clRegActivada; cadena += new String(valueClassNamesTrain[datos.getData(i).getClassSelector()] + " " + valueClassNames[cl] + "\n"); //es valorNombreClases de train no de test pq la indexacion de las reglas es la referente a train } } return cadena; } /** * <p> * Evaluation of the complex over the example set for see the matching class * </p> * @param c Complex to evaluate * @param e Set of data */ private void complexEvaluation(Complex c, EachDataSet e) { c.removeDistribution(); for (int i = 0; i < e.size(); i++) { int cl = e.getData(i).getClassSelector(); if (c.isCovered(e.getData(i))) { //System.out.println("dato "+i+" es cubierto por "); c.incrementDistribution(cl); } } } }