/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: Cromosoma.java
*
* Auxiliriary class to represent chromosomes for Instance selection methods
*
* @author Written by Salvador Garc�a (University of Granada) 20/07/2004
* @version 0.1
* @since JDK1.5
*
*/
package keel.Algorithms.Preprocess.Instance_Selection.SSMA;
import java.util.Arrays;
import keel.Algorithms.Preprocess.Basic.*;
import org.core.*;
public class Cromosoma implements Comparable {
/*Cromosome data structure*/
boolean cuerpo[];
/*Index for nearest neighbours*/
int vecinos[][];
/*Useful data for cromosomes*/
double fitness;
double fitnessAc;
boolean evaluado;
boolean valido;
/**
* Builder. Construct a random chromosome of specified size
*
* @param K Number of neighbors of the KNN algorithm
* @param size Size of the chromosome
* @param dMatrix Distance matrix
* @param datos Reference to the training set
* @param real Reference to the training set (real valued)
* @param nominal Reference to the training set (nominal valued)
* @param nulo Reference to the training set (null values)
* @param distanceEu True= Euclidean distance; False= HVDM
*/
public Cromosoma (int K, int size, double dMatrix[][], double datos[][], double real[][], int nominal[][], boolean nulo[][], boolean distanceEu) {
double u;
int i, j;
cuerpo = new boolean[size];
vecinos = new int[size][K];
for (i=0; i<size; i++) {
u = Randomize.Rand();
if (u < 0.5) {
cuerpo[i] = false;
} else {
cuerpo[i] = true;
}
}
evaluado = false;
valido = true;
for (i=0; i<size; i++) {
for (j=0; j<K; j++) {
vecinos[i][j] = obtenerCercano(vecinos[i],j,dMatrix, i, datos, real, nominal, nulo, distanceEu);
}
}
}//end-method
/**
* Builder. Copies a chromosome of specified size
*
* @param K Number of neighbors of the KNN algorithm
* @param size Size of the chromosome
* @param a Chromosome to copy
*/
public Cromosoma (int K, int size, Cromosoma a) {
int i, j;
cuerpo = new boolean[size];
vecinos = new int[size][K];
for (i=0; i<cuerpo.length; i++) {
cuerpo[i] = a.getGen(i);
for (j=0; j<K; j++) {
vecinos[i][j] = a.getVecino(i,j);
}
}
fitness = a.getFitness();
fitnessAc = a.getFitnessAc();
evaluado = true;
valido = true;
}//end-method
/**
* Builder. Creates a chromosome from two parents
*
* @param K Number of neighbors of the KNN algorithm
* @param a First chromosome
* @param b Second chromosome
* @param pCross Probability of crossing
* @param size Size of the chromosome
*/
public Cromosoma (int K, Cromosoma a, Cromosoma b, double pCross, int size) {
int i;
cuerpo = new boolean[size];
vecinos = new int[size][K];
for (i=0; i<cuerpo.length; i++) {
if (Randomize.Rand() < pCross) {
cuerpo[i] = b.getGen(i);
} else {
cuerpo[i] = a.getGen(i);
}
}
evaluado = false;
valido = true;
}//end-method
/**
* Mutation operator
*
* @param K Number of neighbors of the KNN algorithm
* @param pMut Mutation probability
* @param dMatrix Distance matrix
* @param datos Reference to the training set
* @param real Reference to the training set (real valued)
* @param nominal Reference to the training set (nominal valued)
* @param nulo Reference to the training set (null values)
* @param distanceEu True= Euclidean distance; False= HVDM
*/
public void mutation (int K, double pMut, double dMatrix[][], double datos[][], double real[][], int nominal[][], boolean nulo[][], boolean distanceEu) {
int i, j;
for (i=0; i<cuerpo.length; i++) {
if (Randomize.Rand() < pMut) {
cuerpo[i] = !cuerpo[i];
}
}
for (i=0; i<cuerpo.length; i++) {
for (j=0; j<K; j++) {
vecinos[i][j] = obtenerCercano(vecinos[i],j,dMatrix, i, datos, real, nominal, nulo, distanceEu);
}
}
}//end-method
/**
* Obtain the nearest neighbour given a mask (cromosome)
*
* @param vecinos Array of neighbors
* @param J instance to search
* @param dMatrix Distance matrix
* @param index Index of the chromosome of reference
* @param datos Reference to the training set
* @param real Reference to the training set (real valued)
* @param nominal Reference to the training set (nominal valued)
* @param nulo Reference to the training set (null values)
* @param distanceEu True= Euclidean distance; False= HVDM
*
* @return Nearest instance to J
*/
public int obtenerCercano (int vecinos[], int J, double dMatrix[][], int index, double datos[][], double real[][], int nominal[][], boolean nulo[][], boolean distanceEu) {
double minDist;
int minPos, i, j;
double dist;
boolean perfect, cont;
if (dMatrix == null) {
perfect = false;
i = 0;
do {
for ( ; i < cuerpo.length && !cuerpo[i]; i++);
cont = true;
for (j=0; j<J && cont; j++) {
if (vecinos[j] == i) {
cont = false;
i++;
}
}
perfect = cont;
} while (!perfect);
minPos = i;
if (minPos == cuerpo.length)
return 0;
minDist = KNN.distancia(datos[index],real[index], nominal[index], nulo[index], datos[minPos], real[minPos], nominal[minPos], nulo[minPos], distanceEu);
for (i=minPos+1; i<cuerpo.length; i++) {
if (cuerpo[i]) {
cont = true;
for (j=0; j<J && cont; j++) {
if (vecinos[j] == i) {
cont = false;
}
}
if (cont) {
dist = KNN.distancia(datos[index],real[index], nominal[index], nulo[index], datos[i], real[i], nominal[i], nulo[i], distanceEu);
if (minDist > dist) {
minPos = i;
minDist = dist;
}
}
}
}
} else {
perfect = false;
i = 0;
do {
for (; i < cuerpo.length && !cuerpo[i]; i++);
cont = true;
for (j=0; j<J && cont; j++) {
if (vecinos[j] == i) {
cont = false;
i++;
}
}
perfect = cont;
} while (!perfect);
minPos = i;
if (minPos == cuerpo.length)
return 0;
minDist = dMatrix[index][minPos];
for (i=minPos+1; i<cuerpo.length; i++) {
if (cuerpo[i]) {
cont = true;
for (j=0; j<J && cont; j++) {
if (vecinos[j] == i) {
cont = false;
}
}
if (cont) {
if (minDist > dMatrix[index][i]) {
minPos = i;
minDist = dMatrix[index][i];
}
}
}
}
}
return minPos;
}//end-method
/**
* Get the value of a gene
*
* @param indice Index of the gene
*
* @return Value of the especified gene
*/
public boolean getGen (int indice) {
return cuerpo[indice];
}//end-method
/**
* Get the j-neighbour of a given instance
*
* @param indicei Instance to search
* @param indicej Order of the neighbor
*
* @return Index to the neighbor found
*/
public int getVecino (int indicei, int indicej) {
return vecinos[indicei][indicej];
}//end-method
/**
* Get the fitness of a chromosome
*
* @return Fitness of the chromosome
*/
public double getFitness () {
return fitness;
}//end-method
/**
* Get the accuracy fitness of a chromosome
*
* @return Accuracy fitness of the chromosome
*/
public double getFitnessAc () {
return fitnessAc;
}//end-method
/**
* Performs a full evaluation of a chromosome
*
* @param nClases Number of clases
* @param K Number of neighbors of the KNN algorithm
* @param clases Output attribute of the instances
*/
public void evaluacionCompleta (int nClases, int K, int clases[]) {
double contador = 0;
int i, j;
int votos[];
int maxPos=0, maxValue;
votos = new int[nClases];
for (i=0; i<vecinos.length; i++) {
Arrays.fill(votos,0);
for (j=0; j<K; j++) {
votos[clases[vecinos[i][j]]]++;
}
maxValue = votos[0];
maxPos = 0;
for (j=1; j<nClases; j++) {
if (votos[j] > maxValue) {
maxValue = votos[j];
maxPos = j;
}
}
if (clases[i] == maxPos)
contador++;
}
fitness = contador*50.0/(double)cuerpo.length + (((double)cuerpo.length - (double)this.genesActivos())/(double)cuerpo.length)*50.0;
fitnessAc = contador;
evaluado = true;
}//end-method
/**
* Tests if the chromosome is valid
*
* @return True if the chromosome is valid. False, if not.
*/
public boolean esValido () {
return valido;
}//end-method
/**
* Marks a chromosome for deletion
*/
public void borrar () {
valido = false;
}//end-method
/**
* Set the value of a gene
*
* @param pos Index of the gene
* @param valor Value to set
*/
public void setGen (int pos, boolean valor) {
cuerpo[pos] = valor;
}//end-method
/**
* Tests if the chromosome is already evaluated
*
* @return True if the chromosome is already evaluated. False, if not.
*/
public boolean estaEvaluado () {
return evaluado;
}//end-method
/**
* Count the number of genes set to 1
*
* @return Number of genes set to 1 in the chromosome
*/
public int genesActivos () {
int i, suma = 0;
for (i=0; i<cuerpo.length; i++) {
if (cuerpo[i]) suma++;
}
return suma;
}//end-method
/**
* Performs the local search procedure of SSMA
*
* @param nClases Number of clases
* @param K Number of neighbors of the KNN algorithm
* @param clases Output attribute of the instances
* @param dMatrix Distance matrix
* @param umbral Current threshold
* @param datos Reference to the training set
* @param real Reference to the training set (real valued)
* @param nominal Reference to the training set (nominal valued)
* @param nulo Reference to the training set (null values)
* @param distanceEu True= Euclidean distance; False= HVDM
*
* @return Amount of evaluations spent
*/
public double optimizacionLocal (int nClases, int K, int clases[], double dMatrix[][], double umbral, double datos[][], double real[][], int nominal[][], boolean nulo[][], boolean distanceEu) {
int n, pos, i, j, k, tmp;
double evaluaciones = 0;
double ev;
int visitas[];
n = this.genesActivos();
visitas = new int[n];
for (j=0, k=0; j<cuerpo.length; j++) {
if (cuerpo[j]) {
visitas[k] = j;
k++;
}
}
for (j=0; j<visitas.length; j++) {
pos = Randomize.Randint (j, visitas.length-1);
tmp = visitas[j];
visitas[j] = visitas[pos];
visitas[pos] = tmp;
}
i = 0;
while (i < n) {
ev = evaluacionParcial(nClases, K, clases, visitas[i], dMatrix, umbral, datos, real, nominal, nulo, distanceEu);
if (ev >= 0) {
n--;
i = 0;
visitas = new int[n];
try {
for (j = 0, k = 0; j < cuerpo.length; j++) {
if (cuerpo[j]) {
visitas[k] = j;
k++;
}
}
} catch (Exception e) {
i = n;
}
for (j=0; j<visitas.length; j++) {
pos = Randomize.Randint (j, visitas.length-1);
tmp = visitas[j];
visitas[j] = visitas[pos];
visitas[pos] = tmp;
}
} else {
i++;
}
evaluaciones += Math.abs(ev);
}
return evaluaciones;
}//end-method
/**
* Performs ta partial evaluation
*
* @param nClases Number of clases
* @param K Number of neighbors of the KNN algorithm
* @param clases Output attribute of the instances
* @param ref Instance adjusted
* @param dMatrix Distance matrix
* @param umbral Current threshold
* @param datos Reference to the training set
* @param real Reference to the training set (real valued)
* @param nominal Reference to the training set (nominal valued)
* @param nulo Reference to the training set (null values)
* @param distanceEu True= Euclidean distance; False= HVDM
*
* @return Amount of evaluations spent
*/
public double evaluacionParcial (int nClases, int K, int clases[], int ref, double dMatrix[][], double umbral, double datos[][], double real[][], int nominal[][], boolean nulo[][], boolean distanceEu) {
int i, j;
int vecinosTemp[][];
double ganancia = 0; //an instance just been dropped
int contador = 0;
int votos[];
int maxPosAnterior=0, maxPosNuevo = 0,maxValue;
boolean evaluar;
votos = new int[nClases];
vecinosTemp = new int[cuerpo.length][K];
cuerpo[ref] = false;
for (i=0; i<cuerpo.length; i++) {
evaluar = false;
for (j=0; j<K; j++) {
if (vecinos[i][j] == ref) {
evaluar = true;
vecinosTemp[i][j] = obtenerCercano(vecinosTemp[i],j,dMatrix, i, datos, real, nominal, nulo, distanceEu);
}
else {
vecinosTemp[i][j] = vecinos[i][j];
}
}
if (evaluar) {
contador++;
Arrays.fill(votos, 0);
for (j = 0; j < K; j++) {
votos[clases[vecinos[i][j]]]++;
}
maxValue = votos[0];
maxPosAnterior = 0;
for (j = 1; j < nClases; j++) {
if (votos[j] > maxValue) {
maxValue = votos[j];
maxPosAnterior = j;
}
}
Arrays.fill(votos, 0);
for (j = 0; j < K; j++) {
votos[clases[vecinosTemp[i][j]]]++;
}
maxValue = votos[0];
maxPosNuevo = 0;
for (j = 1; j < nClases; j++) {
if (votos[j] > maxValue) {
maxValue = votos[j];
maxPosNuevo = j;
}
}
if (clases[i] == maxPosAnterior && clases[i] != maxPosNuevo) {
ganancia--;
}
else if (clases[i] != maxPosAnterior && clases[i] == maxPosNuevo) {
ganancia++;
}
}
}
if (Math.round(ganancia) >= (double)umbral) {
for (i=0; i<cuerpo.length; i++) {
for (j=0; j<K; j++) {
vecinos[i][j] = vecinosTemp[i][j];
}
}
fitness += (ganancia*50.0/(double)cuerpo.length + (1.0/(double)cuerpo.length)*50.0);
fitnessAc += ganancia;
return ((double)contador)/((double)cuerpo.length);
} else {
cuerpo[ref] = true;
return (((double)contador)/((double)cuerpo.length))*(-1);
}
}//end-method
/**
* Compare to Method
*
* @param o1 Chromosome to compare
*
* @return Relative order between the chromosomes
*/
public int compareTo (Object o1) {
double valor1 = this.fitness;
double valor2 = ((Cromosoma)o1).fitness;
if (valor1 > valor2)
return -1;
else if (valor1 < valor2)
return 1;
else return 0;
}//end-method
/**
* Test if two chromosome differ in only one gene
*
* @param a Chromosome to compare
*
* @return Position of the difference, if only one is found. Otherwise, -1
*/
public int differenceAtOne (Cromosoma a) {
int i;
int cont = 0, pos = -1;
for (i=0; i<cuerpo.length && cont < 2; i++)
if (cuerpo[i] != a.getGen(i)) {
pos = i;
cont++;
}
if (cont >= 2)
return -1;
else return pos;
}//end-method
/**
* To String Method
*
* @return String representation of the chromosome
*/
public String toString() {
int i;
String temp = "[";
for (i=0; i<cuerpo.length; i++)
if (cuerpo[i])
temp += "1";
else
temp += "0";
temp += ", " + String.valueOf(fitness) + ", " + String.valueOf(genesActivos()) + "]";
return temp;
}//end-method
}//end-class