/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
//
// Cromosoma.java
//
// Salvador Garc�a L�pez
//
// Created by Salvador Garc�a L�pez 19-7-2004.
// Copyright (c) 2004 __MyCompanyName__. All rights reserved.
//
package keel.Algorithms.Preprocess.Instance_Selection.ZhangTS;
import keel.Algorithms.Preprocess.Basic.*;
import org.core.*;
import java.util.Vector;
public class Cromosoma implements Comparable {
/*Cromosome data structure*/
boolean cuerpo[];
/*Useless data for cromosomes*/
double calidad;
boolean cruzado;
boolean valido;
double errorRate;
/*Construct a random cromosome of specified size*/
public Cromosoma (int size) {
double u;
int i;
cuerpo = new boolean[size];
for (i=0; i<size; i++) {
u = Randomize.Rand();
if (u < 0.5) {
cuerpo[i] = false;
} else {
cuerpo[i] = true;
}
}
cruzado = true;
valido = true;
}
/*Create a copied cromosome*/
public Cromosoma (int size, Cromosoma a) {
int i;
cuerpo = new boolean[size];
for (i=0; i<cuerpo.length; i++)
cuerpo[i] = a.getGen(i);
calidad = a.getCalidad();
errorRate = a.getErrorRate();
cruzado = false;
valido = true;
}
/*Construct a cromosome from a bit array*/
public Cromosoma (boolean datos[]) {
int i;
cuerpo = new boolean[datos.length];
for (i=0; i<datos.length; i++)
cuerpo[i] = datos[i];
cruzado = true;
valido = true;
}
public boolean getGen (int indice) {
return cuerpo[indice];
}
public double getCalidad () {
return calidad;
}
public double getErrorRate () {
return errorRate;
}
/*Function that calculates the error threshold of a cromosome*/
public void evaluaError (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], boolean distanceEu) {
int i, j;
int vecinoCercano;
double dist, minDist;
int fallos = 0;
for (i=0; i<datos.length; i++) {
vecinoCercano = -1;
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<datos.length; j++) {
if (cuerpo[j]) { //It is in S
dist = KNN.distancia(datos[i], real[i], nominal[i], nulos[i], datos[j], real[j], nominal[j], nulos[j], distanceEu);
if (dist < minDist && dist != 0) {
minDist = dist;
vecinoCercano = j;
}
}
}
if (vecinoCercano >= 0) {
if (clases[i] != clases[vecinoCercano])
fallos++;
} else fallos++;
}
errorRate = (double)(fallos)/(double)(datos.length);
}
/*Funcion that calculate the error threshold of a cromosome*/
private double evaluaError2 (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], boolean distanceEu) {
int i, j;
int vecinoCercano;
double dist, minDist;
int fallos = 0;
for (i=0; i<datos.length; i++) {
vecinoCercano = -1;
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<datos.length; j++) {
if (cuerpo[j]) { //Est� en S
dist = KNN.distancia(datos[i], real[i], nominal[i], nulos[i], datos[j], real[j], nominal[j], nulos[j], distanceEu);
if (dist < minDist && dist != 0) {
minDist = dist;
vecinoCercano = j;
}
}
}
if (vecinoCercano >= 0) {
if (clases[i] != clases[vecinoCercano])
fallos++;
} else fallos++;
}
return (double)(fallos)/(double)(datos.length);
}
/*Function that returns the best neighbor in N+*/
public Cromosoma getSnextNplus (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], boolean distanceEu, Vector movs) {
int i, j, k;
int best;
double minError, error;
double distOrig;
double dist, minDist;
Vector <Integer> misclassified = new Vector <Integer>();
Vector <Integer> NS = new Vector <Integer> ();
boolean add;
int vecinoCercano;
Cromosoma temporal;
/*Searching for the neighboor with less error rate and distance*/
best = -1;
minError = Double.POSITIVE_INFINITY;
for (i=0; i<cuerpo.length; i++) {
if (!cuerpo[i] && !movs.contains(new Integer(i))) {
cuerpo[i] = true;
error = evaluaError2(datos, real, nominal, nulos, clases, distanceEu);
if (error < minError) {
minError = error;
best = i;
} else if (error == minError) {
dist = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[i] = false;
cuerpo[best] = true;
distOrig = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[best] = false;
cuerpo[i] = true;
if (dist < distOrig) {
best = i;
}
}
cuerpo[i] = false;
}
}
if (minError < errorRate) { //criterion 1
cuerpo[best] = true;
temporal = new Cromosoma (cuerpo.length, this);
cuerpo[best] = false;
return temporal;
} else { //criterion 2
/*Calculating the misclassified instances*/
for (i=0; i<cuerpo.length; i++) {
vecinoCercano = -1;
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<datos.length; j++) {
if (cuerpo[j]) { //It is in S
dist = KNN.distancia(datos[i], real[i], nominal[i], nulos[i], datos[j], real[j], nominal[j], nulos[j], distanceEu);
if (dist < minDist && dist != 0) {
minDist = dist;
vecinoCercano = j;
}
}
}
if (vecinoCercano >= 0)
if (clases[i] != clases[vecinoCercano])
misclassified.addElement(new Integer(i));
}
/*Storing the solutions that have classified correctly some of the previous misclassified instances*/
for (k=0; k<datos.length; k++) {
add = false;
if (!cuerpo[k] && !movs.contains(new Integer(k))) {
cuerpo[k] = true;
for (i=0; i<misclassified.size() && !add; i++) {
vecinoCercano = -1;
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<datos.length; j++) {
if (cuerpo[j]) { //It is in S
dist = KNN.distancia(datos[((Integer)(misclassified.elementAt(i))).intValue()], real[((Integer)(misclassified.elementAt(i))).intValue()], nominal[((Integer)(misclassified.elementAt(i))).intValue()], nulos[((Integer)(misclassified.elementAt(i))).intValue()], datos[j], real[j], nominal[j], nulos[j], distanceEu);
if (dist < minDist && dist != 0) {
minDist = dist;
vecinoCercano = j;
}
}
}
if (vecinoCercano >= 0)
if (clases[((Integer)(misclassified.elementAt(i))).intValue()] == clases[vecinoCercano]) {
NS.addElement(new Integer(k));
add = true;
}
}
cuerpo[k] = false;
}
}
if (NS.isEmpty()) {
if (best < 0)
best = Randomize.Randint (0, cuerpo.length-1);
cuerpo[best] = true;
temporal = new Cromosoma (cuerpo.length, this);
cuerpo[best] = false;
return temporal;
} else {
/*With the instances that have classified, at least, one missclasified instance, get the
neighboor with less error rate and distance*/
best = -1;
minError = Double.POSITIVE_INFINITY;
for (i=0; i<NS.size(); i++) {
cuerpo[((Integer)(NS.elementAt(i))).intValue()] = true;
error = evaluaError2(datos, real, nominal, nulos, clases, distanceEu);
if (error < minError) {
minError = error;
best = ((Integer)(NS.elementAt(i))).intValue();
} else if (error == minError) {
dist = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[((Integer)(NS.elementAt(i))).intValue()] = false;
cuerpo[best] = true;
distOrig = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[best] = false;
cuerpo[((Integer)(NS.elementAt(i))).intValue()] = true;
if (dist < distOrig) {
best = ((Integer)(NS.elementAt(i))).intValue();
}
}
cuerpo[((Integer)(NS.elementAt(i))).intValue()] = false;
}
cuerpo[best] = true;
temporal = new Cromosoma (cuerpo.length, this);
cuerpo[best] = false;
return temporal;
}
}
}
/*Function that return the best neighbor in N-*/
public Cromosoma getSnextNminus (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], boolean distanceEu, Vector movs) {
int i;
int best;
double minError, error;
double distOrig;
double dist;
Cromosoma temporal;
/*Searching the neighbor with less error rate and distance*/
best = -1;
minError = Double.POSITIVE_INFINITY;
for (i=0; i<cuerpo.length; i++) {
if (cuerpo[i]) {
cuerpo[i] = false;
error = evaluaError2(datos, real, nominal, nulos, clases, distanceEu);
if (error < minError) {
minError = error;
best = i;
} else if (error == minError) {
dist = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[i] = true;
cuerpo[best] = false;
distOrig = distancia (datos, real, nominal, nulos, clases, distanceEu);
cuerpo[best] = true;
cuerpo[i] = false;
if (dist < distOrig) {
best = i;
}
}
cuerpo[i] = true;
}
}
cuerpo[best] = false;
temporal = new Cromosoma (cuerpo.length, this);
cuerpo[best] = true;
return temporal;
}
/*Function that calculates the distance between the train set and the cromosome*/
public double distancia (double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], boolean distanceEu) {
int i, j;
double dist, minDist;
double suma = 0;
for (i=0; i<datos.length; i++) {
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<datos.length; j++) {
if (cuerpo[j] && clases[j] == clases[i]) { //It is in S and it is of the same class
dist = KNN.distancia(datos[i], real[i], nominal[i], nulos[i], datos[j], real[j], nominal[j], nulos[j], distanceEu);
if (dist < minDist) {
minDist = dist;
}
}
}
suma += minDist;
}
return suma;
}
public int genesActivos () {
int i, suma = 0;
for (i=0; i<cuerpo.length; i++) {
if (cuerpo[i]) suma++;
}
return suma;
}
/*Function that lets compare cromosomes to sort easily*/
public int compareTo (Object o1) {
if (this.calidad > ((Cromosoma)o1).calidad)
return -1;
else if (this.calidad < ((Cromosoma)o1).calidad)
return 1;
else return 0;
}
/*Function that informs about if a cromosome is different only in a bit, and obtains the
position of this bit. In case of have more differences, it returns -1*/
public int differenceAtOne (Cromosoma a) {
int i;
int cont = 0, pos = -1;
for (i=0; i<cuerpo.length && cont < 2; i++)
if (cuerpo[i] != a.getGen(i)) {
pos = i;
cont++;
}
if (cont >= 2)
return -1;
else return pos;
}
public String toString() {
int i;
String temp = "[";
for (i=0; i<cuerpo.length; i++)
if (cuerpo[i])
temp += "1";
else
temp += "0";
temp += ", " + String.valueOf(calidad) + "," + String.valueOf(errorRate) + ", " + String.valueOf(genesActivos()) + "]";
return temp;
}
}