/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Salvador Garcia (University of Jaén) 6/06/2009
* @version 0.1
* @since JDK1.5
* </p>
*/
package keel.Algorithms.Hyperrectangles.EHS_CHC;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.Arrays;
import keel.Algorithms.Preprocess.Basic.*;
import keel.Dataset.*;
import org.core.*;
public class EHS_CHC extends Metodo {
/* Paths and names of I/O files */
private String ficheroReferencia;
/* Own parameters of the algorithm */
private long semilla;
private double alfa;
private double beta;
private double r;
private double prob0to1Rec;
private double prob0to1Div;
private int tamPoblacion;
private int nEval;
public boolean filtering;
private int K=3;
/* Data structures */
protected InstanceSet referencia;
/* Data matrix */
double datosReferencia[][];
int clasesReferencia[];
double datosTest[][];
int clasesTest[];
double distancias[][];
/* Extra */
boolean nulosTest[][];
boolean nulosReferencia[][];
int nominalTest[][];
int nominalReferencia[][];
double realTest[][];
double realReferencia[][];
public EHS_CHC(String ficheroScript) {
/* Read of the script file */
leerConfiguracion(ficheroScript);
/* Read of data files */
try {
training = new InstanceSet();
training.readSet(ficheroTraining, true);
/* Normalize and check the data */
normalizarTrain();
} catch (Exception e) {
System.err.println(e);
System.exit(1);
}
try {
test = new InstanceSet();
test.readSet(ficheroTest, false);
/* Normalize the data */
normalizarTest();
} catch (Exception e) {
System.err.println(e);
System.exit(1);
}
try {
referencia = new InstanceSet();
referencia.readSet(ficheroReferencia, false);
/* Normalize the data */
normalizarReferencia();
} catch (Exception e) {
System.err.println(e);
System.exit(1);
}
}
private void normalizarTrain() throws CheckException {
int i, j, k;
Instance temp;
double caja[];
StringTokenizer tokens;
boolean nulls[];
/* Check if dataset corresponding with a classification problem */
if (Attributes.getOutputNumAttributes() < 1) {
throw new CheckException(
"This dataset haven�t outputs, so it not corresponding to a classification problem.");
} else if (Attributes.getOutputNumAttributes() > 1) {
throw new CheckException("This dataset have more of one output.");
}
if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
throw new CheckException(
"This dataset have an input attribute with float values, so it not corresponding to a classification problem.");
}
entradas = Attributes.getInputAttributes();
salida = Attributes.getOutputAttribute(0);
nEntradas = Attributes.getInputNumAttributes();
tokens = new StringTokenizer(training.getHeader(), " \n\r");
tokens.nextToken();
relation = tokens.nextToken();
datosTrain = new double[training.getNumInstances()][Attributes
.getInputNumAttributes()];
clasesTrain = new int[training.getNumInstances()];
caja = new double[1];
nulosTrain = new boolean[training.getNumInstances()][Attributes
.getInputNumAttributes()];
nominalTrain = new int[training.getNumInstances()][Attributes
.getInputNumAttributes()];
realTrain = new double[training.getNumInstances()][Attributes
.getInputNumAttributes()];
for (i = 0; i < training.getNumInstances(); i++) {
temp = training.getInstance(i);
nulls = temp.getInputMissingValues();
datosTrain[i] = training.getInstance(i).getAllInputValues();
for (j = 0; j < nulls.length; j++)
if (nulls[j]) {
datosTrain[i][j] = 0.0;
nulosTrain[i][j] = true;
}
caja = training.getInstance(i).getAllOutputValues();
clasesTrain[i] = (int) caja[0];
for (k = 0; k < datosTrain[i].length; k++) {
if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
nominalTrain[i][k] = (int) datosTrain[i][k];
datosTrain[i][k] /= Attributes.getInputAttribute(k)
.getNominalValuesList().size() - 1;
} else {
realTrain[i][k] = datosTrain[i][k];
datosTrain[i][k] -= Attributes.getInputAttribute(k)
.getMinAttribute();
datosTrain[i][k] /= Attributes.getInputAttribute(k)
.getMaxAttribute()
- Attributes.getInputAttribute(k).getMinAttribute();
}
}
}
}
/*
* This function builds the data matrix for classification reference and
* normalizes inputs values
*/
private void normalizarReferencia() throws CheckException {
int i, j, k;
Instance temp;
double caja[];
boolean nulls[];
/* Check if dataset corresponding with a classification problem */
if (Attributes.getOutputNumAttributes() < 1) {
throw new CheckException(
"This dataset haven�t outputs, so it not corresponding to a classification problem.");
} else if (Attributes.getOutputNumAttributes() > 1) {
throw new CheckException("This dataset have more of one output.");
}
if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
throw new CheckException(
"This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
}
datosReferencia = new double[referencia.getNumInstances()][Attributes
.getInputNumAttributes()];
clasesReferencia = new int[referencia.getNumInstances()];
caja = new double[1];
nulosReferencia = new boolean[referencia.getNumInstances()][Attributes
.getInputNumAttributes()];
nominalReferencia = new int[referencia.getNumInstances()][Attributes
.getInputNumAttributes()];
realReferencia = new double[referencia.getNumInstances()][Attributes
.getInputNumAttributes()];
/* Get the number of instances that have a null value */
for (i = 0; i < referencia.getNumInstances(); i++) {
temp = referencia.getInstance(i);
nulls = temp.getInputMissingValues();
datosReferencia[i] = referencia.getInstance(i).getAllInputValues();
for (j = 0; j < nulls.length; j++)
if (nulls[j]) {
datosReferencia[i][j] = 0.0;
nulosReferencia[i][j] = true;
}
caja = referencia.getInstance(i).getAllOutputValues();
clasesReferencia[i] = (int) caja[0];
for (k = 0; k < datosReferencia[i].length; k++) {
if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
nominalReferencia[i][k] = (int) datosReferencia[i][k];
datosReferencia[i][k] /= Attributes.getInputAttribute(k)
.getNominalValuesList().size() - 1;
} else {
realReferencia[i][k] = datosReferencia[i][k];
datosReferencia[i][k] -= Attributes.getInputAttribute(k)
.getMinAttribute();
datosReferencia[i][k] /= Attributes.getInputAttribute(k)
.getMaxAttribute()
- Attributes.getInputAttribute(k).getMinAttribute();
}
}
}
}
/*
* This function builds the data matrix for classification test and
* normalizes inputs values
*/
private void normalizarTest() throws CheckException {
int i, j, k;
Instance temp;
double caja[];
boolean nulls[];
/* Check if dataset corresponding with a classification problem */
if (Attributes.getOutputNumAttributes() < 1) {
throw new CheckException(
"This dataset haven�t outputs, so it not corresponding to a classification problem.");
} else if (Attributes.getOutputNumAttributes() > 1) {
throw new CheckException("This dataset have more of one output.");
}
if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
throw new CheckException(
"This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
}
datosTest = new double[test.getNumInstances()][Attributes
.getInputNumAttributes()];
clasesTest = new int[test.getNumInstances()];
caja = new double[1];
nulosTest = new boolean[test.getNumInstances()][Attributes
.getInputNumAttributes()];
nominalTest = new int[test.getNumInstances()][Attributes
.getInputNumAttributes()];
realTest = new double[test.getNumInstances()][Attributes
.getInputNumAttributes()];
for (i = 0; i < test.getNumInstances(); i++) {
temp = test.getInstance(i);
nulls = temp.getInputMissingValues();
datosTest[i] = test.getInstance(i).getAllInputValues();
for (j = 0; j < nulls.length; j++)
if (nulls[j]) {
datosTest[i][j] = 0.0;
nulosTest[i][j] = true;
}
caja = test.getInstance(i).getAllOutputValues();
clasesTest[i] = (int) caja[0];
for (k = 0; k < datosTest[i].length; k++) {
if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
nominalTest[i][k] = (int) datosTest[i][k];
datosTest[i][k] /= Attributes.getInputAttribute(k)
.getNominalValuesList().size() - 1;
} else {
realTest[i][k] = datosTest[i][k];
datosTest[i][k] -= Attributes.getInputAttribute(k)
.getMinAttribute();
datosTest[i][k] /= Attributes.getInputAttribute(k)
.getMaxAttribute()
- Attributes.getInputAttribute(k).getMinAttribute();
}
}
}
}
public void ejecutar() {
int i, l, j, k;
int nClases;
Hyper database [];
String cadena = "";
Cromosoma poblacion[], newPob[], pobTemp[];
int baraje[];
int ev = 0;
Cromosoma C[];
int tmp, pos;
int tamC;
int d;
int tamData;
int stat = 0;
boolean state [] = new boolean[1];
boolean marcas[];
int claseObt;
int nSel = 0;
double conjS[][];
double conjR[][];
int conjN[][];
boolean conjM[][];
int clasesS[];
long tiempos[];
tiempos = new long[5];
/* Getting the number of differents classes */
nClases = 0;
for (i = 0; i < clasesTrain.length; i++)
if (clasesTrain[i] > nClases)
nClases = clasesTrain[i];
nClases++;
tiempos[0] = System.currentTimeMillis();
if (filtering) {
/*Inicialization of the flagged instances vector for a posterior copy*/
marcas = new boolean[datosReferencia.length];
for (i=0; i<datosReferencia.length; i++)
marcas[i] = false;
for (i=0; i<datosReferencia.length; i++) {
/*Apply KNN to the instance*/
claseObt = KNN.evaluacionKNN2 (K, datosReferencia, realReferencia, nominalReferencia, nulosReferencia, clasesReferencia, datosReferencia[i], realReferencia[i], nominalReferencia[i], nulosReferencia[i], nClases, true);
if (claseObt == clasesTrain[i]) { //agree with your majority, it is included in the solution set
marcas[i] = true;
nSel++;
}
}
/*Building of the S set from the flags*/
conjS = new double[nSel][datosReferencia[0].length];
conjR = new double[nSel][datosReferencia[0].length];
conjN = new int[nSel][datosReferencia[0].length];
conjM = new boolean[nSel][datosReferencia[0].length];
clasesS = new int[nSel];
for (i=0, l=0; i<datosReferencia.length; i++) {
if (marcas[i]) { //the instance will be copied to the solution
for (j=0; j<datosReferencia[0].length; j++) {
conjS[l][j] = datosReferencia[i][j];
conjR[l][j] = realReferencia[i][j];
conjN[l][j] = nominalReferencia[i][j];
conjM[l][j] = nulosReferencia[i][j];
}
clasesS[l] = clasesReferencia[i];
l++;
}
}
tamData = conjS.length;
database = new Hyper[tamData];
database = composeHyper (conjS, conjN, conjM, clasesS, database, nClases);
d = database.length / 4;
} else {
tamData = datosReferencia.length;
database = new Hyper[tamData];
database = composeHyper (datosReferencia, nominalReferencia, nulosReferencia, clasesReferencia, database, nClases);
d = database.length / 4;
}
/******************************************************************/
/*From here, the Hyperrectangle selection algorithm can be coupled*/
/******************************************************************/
tiempos[0]=System.currentTimeMillis() - tiempos[0];
System.out.println("EHS_CHC " + relation + " Cosntruccion de Hiperectangulos realizado en "+ (double) (tiempos[0]) / 1000.0+ "s");
System.out.println("El numero de Hiperrectangulos es : "+database.length);
System.out.println("Construyendo el modelo");
tiempos[1] = System.currentTimeMillis();
distancias=new double[database.length][datosReferencia.length];
for(int ii=0;ii<database.length;ii++)
Arrays.fill(distancias[ii],-1);
/*Random inicialization of the population*/
Randomize.setSeed (semilla);
poblacion = new Cromosoma[tamPoblacion];
baraje = new int[tamPoblacion];
for (i=0; i<tamPoblacion; i++)
poblacion[i] = new Cromosoma (database.length);
/*Initial evaluation of the poblation*/
for (i=0; i<tamPoblacion; i++)
poblacion[i].evalua(datosReferencia, nominalReferencia, nulosReferencia, clasesReferencia, database,distancias,alfa, nClases,beta);
/*Until stop condition*/
while (ev < nEval) {
C = new Cromosoma[tamPoblacion];
/*Selection(r) of C(t) from P(t)*/
for (i=0; i<tamPoblacion; i++)
baraje[i] = i;
for (i=0; i<tamPoblacion; i++) {
pos = Randomize.Randint (i, tamPoblacion-1);
tmp = baraje[i];
baraje[i] = baraje[pos];
baraje[pos] = tmp;
}
for (i=0; i<tamPoblacion; i++)
C[i] = new Cromosoma (database.length, poblacion[baraje[i]]);
/*Structure recombination in C(t) constructing C'(t)*/
tamC = recombinar (C, d, database.length);
newPob = new Cromosoma[tamC];
for (i=0, l=0; i<C.length; i++) {
if (C[i].esValido()) { //the cromosome must be copied to the new poblation C'(t)
newPob[l] = new Cromosoma (database.length, C[i]);
l++;
}
}
/*Structure evaluation in C'(t)*/
for (i=0; i<newPob.length; i++) {
newPob[i].evalua(datosReferencia, nominalReferencia, nulosReferencia, clasesReferencia, database,distancias, alfa, nClases, beta);
ev++;
}
/*Selection(s) of P(t) from C'(t) and P(t-1)*/
Arrays.sort(poblacion);
Arrays.sort(newPob);
/*If the best of C' is worse than the worst of P(t-1), then there will no changes*/
if (tamC==0 || newPob[0].getCalidad() < poblacion[tamPoblacion-1].getCalidad()) {
d--;
} else {
pobTemp = new Cromosoma[tamPoblacion];
for (i=0, j=0, k=0; i<tamPoblacion && k<tamC; i++) {
if (poblacion[j].getCalidad() > newPob[k].getCalidad()) {
pobTemp[i] = new Cromosoma (database.length, poblacion[j]);
j++;
} else {
pobTemp[i] = new Cromosoma (database.length, newPob[k]);
k++;
}
}
if (k == tamC) { //there are cromosomes for copying
for (; i<tamPoblacion; i++) {
pobTemp[i] = new Cromosoma (database.length, poblacion[j]);
j++;
}
}
poblacion = pobTemp;
}
/*Last step of the algorithm*/
if (d < 0) {
for (i=1; i<tamPoblacion; i++) {
poblacion[i].divergeCHC (r, poblacion[0], prob0to1Div);
}
for (i=0; i<tamPoblacion; i++)
if (!(poblacion[i].estaEvaluado())) {
poblacion[i].evalua(datosReferencia, nominalReferencia, nulosReferencia, clasesReferencia, database,distancias, alfa, nClases, beta);
ev++;
}
/*Reinicialization of d value*/
d = (int)(r*(1.0-r)*(double)database.length);
}
}
Arrays.sort(poblacion);
database = reduceHyper (database, poblacion[0].getBody());
tiempos[1]=System.currentTimeMillis() - tiempos[1];
System.out.println("EHS_CHC " + relation + " Modelo construido "+ (double) (tiempos[1]) / 60000.0+ "Min");
//distancias=new double[database.length][datosTrain.length];
//for(int ii=0;ii<database.length;ii++)
// Arrays.fill(distancias[ii],-1);
/******************************************************************/
/*Classification task*/
/******************************************************************/
tiempos[2]=System.currentTimeMillis();
if (salida.getType() == Attribute.INTEGER) {
int salidaKNN[][];
int prediccion[][];
/* Output of the training file */
salidaKNN = new int[datosTrain.length][1];
prediccion = new int[datosTrain.length][1];
for (i = 0; i < salidaKNN.length; i++) {
salidaKNN[i][0] = clasesTrain[i];
prediccion[i][0] = evaluacionKNNHyper(database, datosTrain[i], nominalTest[i], nulosTest[i], nClases);
}
tiempos[2]= System.currentTimeMillis() - tiempos[2];
System.out.println("EHS_CHC " + relation + " Train "+ (double) (tiempos[2]) / 1000.0+ "s");
Output.escribeSalida(ficheroSalida[0], salidaKNN, prediccion, entradas, salida, nEntradas, relation);
tiempos[3] = System.currentTimeMillis();
distancias=new double[database.length][datosTest.length];
for(int ii=0;ii<database.length;ii++)
Arrays.fill(distancias[ii],-1);
/* Output of the test file */
salidaKNN = new int[datosTest.length][1];
prediccion = new int[datosTest.length][1];
for (i = 0; i < salidaKNN.length; i++) {
salidaKNN[i][0] = clasesTest[i];
prediccion[i][0] = evaluacionKNNHyper(database, datosTest[i], nominalTest[i], nulosTest[i], nClases);
}
tiempos[3]= System.currentTimeMillis() - tiempos[3];
System.out.println("EHS_CHC " + relation + " Test "+ (double) (tiempos[3]) / 1000.0+ "s");
Output.escribeSalida(ficheroSalida[1], salidaKNN, prediccion, entradas, salida, nEntradas, relation);
} else {
String salidaKNN[][];
String prediccion[][];
/* Output of the training file */
salidaKNN = new String[datosTrain.length][1];
prediccion = new String[datosTrain.length][1];
for (i = 0; i < salidaKNN.length; i++) {
salidaKNN[i][0] = (String) salida.getNominalValuesList().elementAt(clasesTrain[i]);
prediccion[i][0] = (String) salida.getNominalValuesList().elementAt(
evaluacionKNNHyper(database, datosTrain[i], nominalTrain[i], nulosTrain[i], nClases));
}
tiempos[2]= System.currentTimeMillis() - tiempos[2];
System.out.println("EHS_CHC " + relation + " Train "+ (double) (tiempos[2]) / 1000.0+ "s");
Output.escribeSalida(ficheroSalida[0], salidaKNN, prediccion, entradas, salida, nEntradas, relation);
tiempos[3] = System.currentTimeMillis();
/* Output of the test file */
distancias=new double[database.length][datosTrain.length];
for(int ii=0;ii<database.length;ii++)
Arrays.fill(distancias[ii],-1);
salidaKNN = new String[datosTest.length][1];
prediccion = new String[datosTest.length][1];
for (i = 0; i < salidaKNN.length; i++) {
salidaKNN[i][0] = (String) salida.getNominalValuesList().elementAt(clasesTest[i]);
prediccion[i][0] = (String) salida.getNominalValuesList().elementAt(
evaluacionKNNHyper(database, datosTest[i], nominalTest[i], nulosTest[i], nClases, state));
if (state[0] == true) {
stat++;
}
}
tiempos[3]= System.currentTimeMillis() - tiempos[3];
System.out.println("EHS_CHC " + relation + " Test "+ (double) (tiempos[3]) / 1000.0+ "s");
Output.escribeSalida(ficheroSalida[1], salidaKNN, prediccion, entradas, salida, nEntradas, relation);
}
cadena += "Number of rules: "+database.length + "\n";
cadena += "Examples Covered: "+ (double)stat / (double)datosTest.length + "\n";
for (i=0; i<database.length; i++) {
cadena += database[i] + "\n";
}
Fichero.escribeFichero(ficheroSalida[2], cadena);
}
/*Function that determines the cromosomes who have to be crossed and the other ones who have to be removed
It returns the number of remaining cromosomes in the poblation*/
private int recombinar (Cromosoma C[], int d, int len) {
int i, j;
int distHamming;
int tamC = 0;
for (i=0; i<C.length/2; i++) {
distHamming = 0;
for (j=0; j<len; j++)
if (C[i*2].getGen(j) != C[i*2+1].getGen(j))
distHamming++;
if ((distHamming/2) > d) {
for (j=0; j<len; j++) {
if ((C[i*2].getGen(j) != C[i*2+1].getGen(j)) && Randomize.Rand() < 0.5) {
if (C[i*2].getGen(j)) C[i*2].setGen(j,false);
else if (Randomize.Rand() < prob0to1Rec) C[i*2].setGen(j,true);
if (C[i*2+1].getGen(j)) C[i*2+1].setGen(j,false);
else if (Randomize.Rand() < prob0to1Rec) C[i*2+1].setGen(j,true);
}
}
tamC += 2;
} else {
C[i*2].borrar();
C[i*2+1].borrar();
}
}
return tamC;
}
public Hyper [] composeHyper (double train[][], int nominal[][], boolean missing[][], int clases[], Hyper database[], int nClases) {
int i, j, k;
Vector <Integer> visitados = new Vector <Integer> ();
double minDist, dist;
double min, max;
double x[], y[];
boolean marcas[];
int nSel = database.length;
Hyper datared[];
boolean nom[][];
x = new double[train[0].length];
y = new double[train[0].length];
nom = new boolean[train[0].length][];
marcas = new boolean[database.length];
Arrays.fill(marcas, true);
for (i=0; i<train.length; i++) {
visitados.removeAllElements();
minDist = Double.POSITIVE_INFINITY;
for (j=0; j<train.length; j++) {
if (clases[i] != clases[j]) {
dist = EHS_CHC.distancia(train[i], nominal[i], missing[i], train[j], nominal[j], missing[j]);
if (dist < minDist) {
minDist = dist;
}
}
}
//Podriamos limitar a K elementos a ver que pasa
for (j=0; j<train.length; j++) {
if (clases[i] == clases[j]) {
dist = EHS_CHC.distancia(train[i], nominal[i], missing[i], train[j], nominal[j], missing[j]);
if (dist < minDist) {
visitados.add(j);
}
}
}
if (visitados.size() == 0) {
visitados.add(i);
}
for (j=0; j<train[0].length; j++) {
if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) {
nom[j] = new boolean[Attributes.getInputAttribute(j).getNumNominalValues()];
Arrays.fill(nom[j], false);
} else {
nom[j] = new boolean[0];
}
}
for (j=0; j<train[0].length; j++) {
if (nom[j].length == 0) {
min = train[visitados.elementAt(0)][j];
max = train[visitados.elementAt(0)][j];
for (k=1; k<visitados.size(); k++) {
if (train[visitados.elementAt(k)][j] < min) {
min = train[visitados.elementAt(k)][j];
} else if (train[visitados.elementAt(k)][j] > max) {
max = train[visitados.elementAt(k)][j];
}
}
x[j] = min;
y[j] = max;
} else {
for (k=0; k<visitados.size(); k++) {
nom[j][nominal[visitados.elementAt(k)][j]] = true;
}
}
}
database[i] = new Hyper(x,y,nom,clases[i]);
}
/*Remove duplicates*/
for (i=0; i<database.length; i++) {
for (j=i+1; j<database.length && marcas[i]; j++) {
if (database[i].equalTo(database[j])) {
marcas[i] = false;
nSel--;
}
}
}
datared = new Hyper[nSel];
for (i=0, k=0; i<database.length; i++) {
if (marcas[i]) {
datared[k] = new Hyper(database[i].x, database[i].y, database[i].nom, database[i].clase);
k++;
}
}
return datared;
}
public Hyper [] reduceHyper (Hyper database[], boolean marcas[]) {
int i, k;
int nSel = 0;
Hyper datared[];
for (i=0; i<marcas.length; i++)
if (marcas[i])
nSel++;
datared = new Hyper[nSel];
for (i=0, k=0; i<marcas.length; i++) {
if (marcas[i]) {
datared[k] = new Hyper(database[i].x, database[i].y, database[i].nom, database[i].clase);
k++;
}
}
return datared;
}
public void leerConfiguracion(String ficheroScript) {
String fichero, linea, token;
StringTokenizer lineasFichero, tokens;
byte line[];
int i, j;
ficheroSalida = new String[3];
fichero = Fichero.leeFichero(ficheroScript);
lineasFichero = new StringTokenizer(fichero, "\n\r");
lineasFichero.nextToken();
linea = lineasFichero.nextToken();
tokens = new StringTokenizer(linea, "=");
tokens.nextToken();
token = tokens.nextToken();
/* Getting the names of training and test files */
line = token.getBytes();
for (i = 0; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroReferencia = new String(line, i, j - i);
for (i = j + 1; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroTraining = new String(line, i, j - i);
for (i = j + 1; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroTest = new String(line, i, j - i);
/* Getting the path and base name of the results files */
linea = lineasFichero.nextToken();
tokens = new StringTokenizer(linea, "=");
tokens.nextToken();
token = tokens.nextToken();
/* Getting the names of output files */
line = token.getBytes();
for (i = 0; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroSalida[0] = new String(line, i, j - i);
for (i = j + 1; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroSalida[1] = new String(line, i, j - i);
for (i = j + 1; line[i] != '\"'; i++)
;
i++;
for (j = i; line[j] != '\"'; j++)
;
ficheroSalida[2] = new String(line, i, j - i);
/*Getting the seed*/
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
semilla = Long.parseLong(tokens.nextToken().substring(1));
/*Getting the size of the poblation and the number of evaluations*/
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
tamPoblacion = Integer.parseInt(tokens.nextToken().substring(1));
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
nEval = Integer.parseInt(tokens.nextToken().substring(1));
/*Getting the equilibrate alfa factor and r value*/
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
alfa = Double.parseDouble(tokens.nextToken().substring(1));
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
beta = Double.parseDouble(tokens.nextToken().substring(1));
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
r = Double.parseDouble(tokens.nextToken().substring(1));
/*Getting the probability of change bits*/
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
prob0to1Rec = Double.parseDouble(tokens.nextToken().substring(1));
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
prob0to1Div = Double.parseDouble(tokens.nextToken().substring(1));
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
token = tokens.nextToken();
token = token.substring(1);
if (token.equalsIgnoreCase("false")) filtering = false;
else filtering = true;
}
public static int evaluacionKNNHyper (Hyper database[], double ejemplo[], int nominal[], boolean missing[], int nClases) {
int i;
int vecinoCercano;
double minDistancia;
double dist;
int dimensions;
int votes[] = new int[nClases];
int pos, minVotes;
vecinoCercano = -1;
minDistancia = Double.POSITIVE_INFINITY;
Arrays.fill(votes, 0);
for (i = 0; i < database.length; i++) {
dist = distancia(database[i],ejemplo,nominal,missing);
if (dist > 0) {
if (dist < minDistancia) {
minDistancia = dist;
vecinoCercano = i;
}
} else {
dimensions = database[i].dimensions();
if (dimensions > 0) {
minDistancia = 0;
votes[database[i].clase]++;
}
}
}
if (minDistancia > 0) {
return database[vecinoCercano].clase;
} else {
pos = 0;
minVotes = votes[0];
for (i=1; i<votes.length; i++) {
if (votes[i] > minVotes) {
pos = i;
minVotes = votes[i];
}
}
return pos;
}
}
public static int evaluacionKNNHyper (Hyper database[], double ejemplo[], int nominal[], boolean missing[], int nClases, boolean state[]) {
int i;
int vecinoCercano;
double minDistancia;
double dist;
int dimensions;
int votes[] = new int[nClases];
int pos;
double minVolume, volume;
Vector <Integer> cand_rules = new Vector <Integer> ();
vecinoCercano = -1;
minDistancia = Double.POSITIVE_INFINITY;
Arrays.fill(votes, 0);
for (i = 0; i < database.length; i++) {
dist = distancia(database[i],ejemplo,nominal,missing);
if (dist > 0) {
if (dist < minDistancia) {
minDistancia = dist;
vecinoCercano = i;
}
} else {
dimensions = database[i].dimensions();
if (dimensions > 0) {
minDistancia = 0;
cand_rules.add(i);
}
}
}
if (minDistancia > 0) {
state[0] = false;
return database[vecinoCercano].clase;
} else {
state[0] = true;
minVolume = database[cand_rules.elementAt(0)].volume();
pos = 0;
for (i=1; i<cand_rules.size(); i++) {
volume = database[cand_rules.elementAt(i)].volume();
if (volume < minVolume) {
pos = i;
minVolume = volume;
}
}
return database[cand_rules.elementAt(pos)].clase;
}
}
public static double distancia(double ej1[], int nom1[], boolean mis1[], double ej2[], int nom2[], boolean mis2[]) {
int i;
double suma = 0;
for (i = 0; i < ej1.length; i++) {
if (mis1[i] != true && mis2[i] != true) {
if (Attributes.getInputAttribute(i).getType() == Attribute.NOMINAL) {
if (nom1[i] != nom2[i]) {
suma += 1;
}
} else {
suma += (ej1[i] - ej2[i]) * (ej1[i] - ej2[i]);
}
}
}
suma = Math.sqrt(suma);
return suma;
}
public static double distancia (Hyper h1, double ej2[], int nom2[], boolean mis2[]) {
int i;
double suma = 0;
for (i = 0; i < ej2.length; i++) {
if (!mis2[i]) { //the example has no missing value in this attribute
if (h1.nom[i].length == 0) { // real value
if (ej2[i] < h1.x[i])
suma += (h1.x[i] - ej2[i]) * (h1.x[i] - ej2[i]);
else if (ej2[i] > h1.y[i])
suma += (h1.y[i] - ej2[i]) * (h1.y[i] - ej2[i]);
} else {
if (h1.nom[i][nom2[i]] == false) { //the rule does not cover the nominal value
suma += 1;
}
}
}
}
suma = Math.sqrt(suma);
return suma;
}
}