/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.ImbalancedClassification.Ensembles.Preprocess.Instance_Selection.EUSCHCQstat; import keel.Algorithms.ImbalancedClassification.Ensembles.Preprocess.Basic.KNN; import org.core.*; import keel.Dataset.*; /** * Class to implement a chromosome for the EUS-CHC metho * @author Created by Salvador Garc�a L�pez (UJA) [19-07-2004] * @author Modified by Mikel Galar Idoate (UPNA) [03-05-13] * @version 1.1 (12-05-14) * @since JDK 1.5 */ public class Chromosome implements Comparable { /*Cromosome data structure*/ boolean cuerpo[]; /*Useful data for cromosomes*/ double calidad; boolean cruzado; boolean valido; boolean prediction[]; /** * Construct a random chromosome of specified size * @param size */ public Chromosome (int size) { double u; int i; cuerpo = new boolean[size]; for (i=0; i<size; i++) { u = Randomize.Rand(); if (u < 0.5) { cuerpo[i] = false; } else { cuerpo[i] = true; } } cruzado = true; valido = true; } /** * It creates a copied chromosome * @param size * @param a Chromosome to copy */ public Chromosome (int size, Chromosome a) { int i; cuerpo = new boolean[size]; for (i=0; i<cuerpo.length; i++) cuerpo[i] = a.getGen(i); calidad = a.getCalidad(); cruzado = false; valido = true; prediction = a.prediction.clone(); } /** * It returns a given gen of the chromsome * @param indice * @return */ public boolean getGen (int indice) { return cuerpo[indice]; } /** * Ite returns the fitness of the chrom. * @return */ public double getCalidad () { return calidad; } /** * It sets a value for a given chrom. * @param indice * @param valor */ public void setGen (int indice, boolean valor) { cuerpo[indice] = valor; } /** * Function that evaluates a cromosome */ public void evalua (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], double train[][], double trainR[][], int trainN[][], boolean trainM[][], int clasesT[], String wrapper, int K, String evMeas, boolean MS, boolean pFactor, double P, int posID, int nPos, boolean distanceEu, keel.Dataset.Attribute entradas[], boolean[][] anteriores, boolean[][] salidasAnteriores) { int i, j, l=0, m, h; int aciertosP = 0, aciertosN = 0; int totalP = 0, totalN = 0; double beta; double precision, recall; int vecinos[]; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int s, claseObt; prediction = new boolean[train.length]; int negID = -1; for (i = 0; i < datos.length; i++) if(clases[i] != posID) { negID = clases[i]; break; } if (MS) { s = genesActivos() + nPos; vecinos = new int[K]; conjS = new double[s][train[0].length]; conjR = new double[s][train[0].length]; conjN = new int[s][train[0].length]; conjM = new boolean[s][train[0].length]; clasesS = new int[s]; h=0; for (m=0, l=0; m<cuerpo.length; m++, h++) { for (;h<clasesT.length && clasesT[h]==posID;h++); if (getGen(m)) { //the instance must be copied to the solution for (j=0; j<train[h].length; j++) { conjS[l][j] = train[h][j]; conjR[l][j] = trainR[h][j]; conjN[l][j] = trainN[h][j]; conjM[l][j] = trainM[h][j]; } clasesS[l] = clasesT[h]; l++; } } for (m=0; m<train.length; m++) { if (clasesT[m] == posID) { for (j=0; j<train[m].length; j++) { conjS[l][j] = train[m][j]; conjR[l][j] = trainR[m][j]; conjN[l][j] = trainN[m][j]; conjM[l][j] = trainM[m][j]; } clasesS[l] = clasesT[m]; l++; } } if (wrapper.equalsIgnoreCase("k-NN")) { for (i=0; i<datos.length; i++) { claseObt = KNN.evaluacionKNN2(K, conjS, conjR, conjN, conjM, clasesS, datos[i], real[i], nominal[i], nulos[i], Math.max(posID, negID) + 1, distanceEu, vecinos); if (claseObt >= 0) if (clases[i] == claseObt && clases[i] == posID) { aciertosP++; totalP++; prediction[i] = true; } else if (clases[i] != claseObt && clases[i] == posID) { totalP++; prediction[i] = false; } else if (clases[i] == claseObt && clases[i] != posID) { aciertosN++; totalN++; prediction[i] = true; } else if (clases[i] != claseObt && clases[i] != posID) { totalN++; prediction[i] = false; } } } } else { s = genesActivos(); vecinos = new int[K]; conjS = new double[s][train[0].length]; conjR = new double[s][train[0].length]; conjN = new int[s][train[0].length]; conjM = new boolean[s][train[0].length]; clasesS = new int[s]; for (j=0, l=0; j<train.length; j++) { if (cuerpo[j]) { //the instance must be copied to the solution for (m=0; m<train[j].length; m++) { conjS[l][m] = train[j][m]; conjR[l][m] = trainR[j][m]; conjN[l][m] = trainN[j][m]; conjM[l][m] = trainM[j][m]; } clasesS[l] = clasesT[j]; l++; } } for (i=0; i<datos.length; i++) { claseObt = KNN.evaluacionKNN2(K, conjS, conjR, conjN, conjM, clasesS, datos[i], real[i], nominal[i], nulos[i], Math.max(posID, negID) + 1, distanceEu, vecinos); if (claseObt >= 0) if (clases[i] == claseObt && clases[i] == posID) { aciertosP++; totalP++; prediction[i] = true; } else if (clases[i] != claseObt && clases[i] == posID) { totalP++; prediction[i] = false; } else if (clases[i] == claseObt && clases[i] != posID) { aciertosN++; totalN++; prediction[i] = true; } else if (clases[i] != claseObt && clases[i] != posID) { totalN++; prediction[i] = false; } } } if (evMeas.equalsIgnoreCase("geometric mean")) { calidad = Math.sqrt(((double)aciertosP/(double)totalP)*((double)aciertosN/(double)totalN)); } else if (evMeas.equalsIgnoreCase("auc")) { if (totalP < totalN) calidad = (((double)aciertosP / ((double)totalP)) * ((double)aciertosN / ((double)totalN))) + ((1.0 - ((double)aciertosN / ((double)totalN)))*((double)aciertosP / ((double)totalP)))/2.0 + ((1.0 - ((double)aciertosP / ((double)totalP)))*((double)aciertosN / ((double)totalN)))/2.0; else calidad = (((double)aciertosN / ((double)totalN)) * ((double)aciertosP / ((double)totalP))) + ((1.0 - ((double)aciertosP / ((double)totalP)))*((double)aciertosN / ((double)totalN)))/2.0 + ((1.0 - ((double)aciertosN / ((double)totalN)))*((double)aciertosP / ((double)totalP)))/2.0; } else if (evMeas.equalsIgnoreCase(("cost-sensitive"))) { calidad = ((double)totalN - aciertosN) + ((double)totalP - aciertosP) * (double)totalN/(double)totalP; calidad /= (2*(double)totalN); calidad = 1 - calidad; } else if (evMeas.equalsIgnoreCase(("kappa"))) { double sumDiagonales = 0.0, sumTrTc = 0.0; sumDiagonales = aciertosP + aciertosN; sumTrTc = totalP * (totalN - aciertosN) + totalN * (totalP - aciertosP); calidad = (((double)datos.length * sumDiagonales - sumTrTc) / ((double)datos.length * (double)datos.length - sumTrTc)); } else { precision = (((double)aciertosP / ((double)totalP))) / (((double)aciertosP / ((double)totalP)) + (1.0 - ((double)aciertosN / ((double)totalN)))); recall = (((double)aciertosP / ((double)totalP))) / (((double)aciertosP / ((double)totalP)) + (1.0 - ((double)aciertosP / ((double)totalP)))); calidad = (2 * precision * recall)/(recall + precision); } if (pFactor) { if (MS) { beta = (double)genesActivos()/(double)nPos; } else { beta = (double)genes0Activos(clasesT)/(double)genes1Activos(clasesT); } calidad -= Math.abs(1.0-beta)*P; } if (anteriores[0] != null) { /* Calcular la distancia de Hamming mínima entre el cromosoma y anteriores[][] */ double q = -Double.MAX_VALUE; for (i = 0; i < anteriores.length && anteriores[i] != null; i++) { double qaux = Qstatistic(anteriores[i], cuerpo, clases.length); if (q < qaux) q = qaux; } double peso = (double)(anteriores.length - i) / (double) (anteriores.length); double IR = (double)totalN / (double)totalP * 0.1; calidad = calidad * (1.0 / peso) * (1.0 / IR) - q * peso; } cruzado = false; } private double Qstatistic(boolean[] v1, boolean[] v2, int n) { double[][] t = new double[2][2]; double ceros = 0; if (v1.length < n) n = v1.length; for (int i = 0; i < n; i++) { if (v1[i] == v2[i] && v1[i] == true) t[0][0]++; else if (v1[i] == v2[i] && v1[i] == false) t[1][1]++; else if (v1[i] != v2[i] && v1[i] == true) t[1][0]++; else t[0][1]++; if (!v2[i]) ceros++; } if (ceros == n) return 2.0; return (t[1][1] * t[0][0] - t[0][1] * t[1][0]) / (t[1][1] * t[0][0] + t[0][1] * t[1][0]); } /** * Function that does the CHC diverge */ public void divergeCHC (double r, Chromosome mejor, double prob) { int i; for (i=0; i<cuerpo.length; i++) { if (Randomize.Rand() < r) { if (Randomize.Rand() < prob) { cuerpo[i] = true; } else { cuerpo[i] = false; } } else { cuerpo[i] = mejor.getGen(i); } } cruzado = true; } public boolean estaEvaluado () { return !cruzado; } public int genesActivos () { int i, suma = 0; for (i=0; i<cuerpo.length; i++) { if (cuerpo[i]) suma++; } return suma; } public int genes0Activos (int clases[]) { int i, suma = 0; for (i=0; i<cuerpo.length; i++) { if (cuerpo[i] && clases[i] == 0) suma++; } return suma; } public int genes1Activos (int clases[]) { int i, suma = 0; for (i=0; i<cuerpo.length; i++) { if (cuerpo[i] && clases[i] == 1) suma++; } return suma; } public boolean esValido () { return valido; } public void borrar () { valido = false; } /** * Function that lets compare cromosomes for an easilier sort */ public int compareTo (Object o1) { if (this.calidad > ((Chromosome)o1).calidad) return -1; else if (this.calidad < ((Chromosome)o1).calidad) return 1; else return 0; } /** * Prints the chrosome into a string value */ public String toString() { int i; String temp = "["; for (i=0; i<cuerpo.length; i++) if (cuerpo[i]) temp += "1"; else temp += "0"; temp += ", " + String.valueOf(calidad) + ", " + String.valueOf(genesActivos()) + "]"; return temp; } }