/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 J. Alcal�-Fdez (jalcala@decsai.ugr.es) A. Fern�ndez (alberto.fernandez@ujaen.es) S. Garc�a (sglopez@ujaen.es) F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * * File: CoevolutionAlgorithm.java * * A general framework for Coevolutionary Algorithms. * This class contains all common operations in the development of a * Coevolutionary algorithm. Any Coevolutionary can extend this class and, * by implementing the abstract "evaluate" and "readParameters" method, * getting most of its work already done. * * @author Written by Joaqu�n Derrac (University of Granada) 13/1/2010 * @version 1.1 * @since JDK1.5 * */ package keel.Algorithms.Coevolution; import java.util.StringTokenizer; import keel.Dataset.Attribute; import keel.Dataset.Attributes; import keel.Dataset.Instance; import keel.Dataset.InstanceSet; import org.core.Fichero; public abstract class CoevolutionAlgorithm { //Files protected String outFile[]; protected String testFile; protected String trainFile; protected String referenceFile; //Instance Sets protected InstanceSet train; protected InstanceSet test; protected InstanceSet reference; protected Instance temp; //Data protected int inputAtt; protected Attribute[] inputs; protected Attribute output; protected boolean[] nulls; protected double trainData[][]; protected int trainOutput[]; protected double testData[][]; protected int testOutput[]; protected double referenceData[][]; protected int referenceOutput[]; protected String relation; protected int nClasses; protected int nInstances[]; protected long initialTime; protected double modelTime; protected double trainingTime; protected double testTime; //Naming protected String name; //Random seed protected long seed; /** * Read the configuration and data files, and process it. * * @param script Name of the configuration script * */ protected void readDataFiles(String script){ //Read of the script file readConfiguracion(script); readParameters(script); //Read of training data files try { train = new InstanceSet(); train.readSet(trainFile, true); inputAtt = Attributes.getInputNumAttributes(); inputs = Attributes.getInputAttributes(); output = Attributes.getOutputAttribute(0); //Normalize the data normalizeTrain(); } catch (Exception e) { System.err.println(e); System.exit(1); } //Read of test data files try { test = new InstanceSet(); test.readSet(testFile, false); //Normalize the data normalizeTest(); } catch (Exception e) { System.err.println(e); System.exit(1); } //Read of reference data files try { reference = new InstanceSet(); reference.readSet(referenceFile, false); //Normalize the data normalizeReference(); } catch (Exception e) { System.err.println(e); System.exit(1); } //Now, the data is loaded and preprocessed //Get the number of classes nClasses=Attributes.getOutputAttribute(0).getNumNominalValues(); //And the number of instances on each class nInstances=new int[nClasses]; for(int i=0;i<nClasses;i++){ nInstances[i]=0; } for(int i=0;i<trainOutput.length;i++){ nInstances[trainOutput[i]]++; } }//end-method /** * Reads configuration script, and extracts its contents. * * @param script Name of the configuration script * */ protected void readConfiguracion (String script) { String fichero, linea, token; StringTokenizer lineasFichero, tokens; byte line[]; int i, j; outFile = new String[3]; fichero = Fichero.leeFichero (script); lineasFichero = new StringTokenizer (fichero,"\n\r"); lineasFichero.nextToken(); linea = lineasFichero.nextToken(); tokens = new StringTokenizer (linea, "="); tokens.nextToken(); token = tokens.nextToken(); //Getting the names of training and test files //reference file will be used as comparision line = token.getBytes(); for (i=0; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); trainFile = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); referenceFile = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); testFile = new String (line,i,j-i); //Getting the path and base name of the results files linea = lineasFichero.nextToken(); tokens = new StringTokenizer (linea, "="); tokens.nextToken(); token = tokens.nextToken(); //Getting the names of output files line = token.getBytes(); for (i=0; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); outFile[0] = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); outFile[1] = new String (line,i,j-i); for (i=j+1; line[i]!='\"'; i++); i++; for (j=i; line[j]!='\"'; j++); outFile[2] = new String (line,i,j-i); } //end-method /** * Reads the parameters of the algorithm. * Must be implemented in the subclass. * * @param script Configuration script * */ protected abstract void readParameters(String script); /** * This function builds the data matrix for training data and normalizes inputs values */ protected void normalizeTrain() throws CheckException { StringTokenizer tokens; double minimum[]; double range[]; //Check if dataset corresponding with a classification problem if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException ("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException ("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException ("This dataset have an input attribute with float values, so it not corresponding to a classification problem."); } //Copy the data tokens = new StringTokenizer (train.getHeader()," \n\r"); tokens.nextToken(); relation = tokens.nextToken(); trainData = new double[train.getNumInstances()][inputAtt]; trainOutput = new int[train.getNumInstances()]; for (int i=0; i<train.getNumInstances(); i++) { temp = train.getInstance(i); trainData[i] = temp.getAllInputValues(); trainOutput[i] = (int)temp.getOutputRealValues(0); nulls = temp.getInputMissingValues(); //Clean missing values for (int j=0; j<nulls.length; j++){ if (nulls[j]) { trainData[i][j]=0.0; } } } //Normalice the data minimum=new double[inputAtt]; range=new double[inputAtt]; for (int i=0; i<inputAtt; i++) { if (Attributes.getInputAttribute(i).getType() != Attribute.NOMINAL) { minimum[i]=Attributes.getInputAttribute(i).getMinAttribute(); range[i]=Attributes.getInputAttribute(i).getMaxAttribute()-minimum[i]; } } //Both real and nominal data are normaliced in [0,1] for (int i=0; i<train.getNumInstances(); i++) { for (int j = 0; j < inputAtt; j++) { if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) { if(Attributes.getInputAttribute(j).getNominalValuesList().size()>1){ trainData[i][j] /= Attributes.getInputAttribute(j).getNominalValuesList().size()-1; } }else{ trainData[i][j] -= minimum[j]; trainData[i][j] /= range[j]; } } } } //end-method /** * This function builds the data matrix for test data and normalizes inputs values */ protected void normalizeTest() throws CheckException { StringTokenizer tokens; double minimum[]; double range[]; //Check if dataset corresponding with a classification problem if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException ("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException ("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException ("This dataset have an input attribute with float values, so it not corresponding to a classification problem."); } //Copy the data tokens = new StringTokenizer (test.getHeader()," \n\r"); tokens.nextToken(); tokens.nextToken(); testData = new double[test.getNumInstances()][inputAtt]; testOutput = new int[test.getNumInstances()]; for (int i=0; i<test.getNumInstances(); i++) { temp = test.getInstance(i); testData[i] = temp.getAllInputValues(); testOutput[i] = (int)temp.getOutputRealValues(0); nulls = temp.getInputMissingValues(); //Clean missing values for (int j=0; j<nulls.length; j++){ if (nulls[j]) { testData[i][j]=0.0; } } } //Normalice the data minimum=new double[inputAtt]; range=new double[inputAtt]; for (int i=0; i<inputAtt; i++) { if (Attributes.getInputAttribute(i).getType() != Attribute.NOMINAL) { minimum[i]=Attributes.getInputAttribute(i).getMinAttribute(); range[i]=Attributes.getInputAttribute(i).getMaxAttribute()-minimum[i]; } } //Both real and nominal data are normaliced in [0,1] for (int i=0; i<test.getNumInstances(); i++) { for (int j = 0; j < inputAtt; j++) { if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) { if(Attributes.getInputAttribute(j).getNominalValuesList().size()>1){ testData[i][j] /= Attributes.getInputAttribute(j).getNominalValuesList().size()-1; } } else{ testData[i][j] -= minimum[j]; testData[i][j] /= range[j]; } } } } //end-method /** * This function builds the data matrix for reference data and normalizes inputs values */ protected void normalizeReference() throws CheckException { StringTokenizer tokens; double minimum[]; double range[]; //Check if dataset corresponding with a classification problem if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException ("This dataset haven�t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException ("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException ("This dataset have an input attribute with float values, so it not corresponding to a classification problem."); } //Copy the data tokens = new StringTokenizer (reference.getHeader()," \n\r"); tokens.nextToken(); tokens.nextToken(); referenceData = new double[reference.getNumInstances()][inputAtt]; referenceOutput = new int[reference.getNumInstances()]; for (int i=0; i<reference.getNumInstances(); i++) { temp = reference.getInstance(i); referenceData[i] = temp.getAllInputValues(); referenceOutput[i] = (int)temp.getOutputRealValues(0); nulls = temp.getInputMissingValues(); //Clean missing values for (int j=0; j<nulls.length; j++){ if (nulls[j]) { referenceData[i][j]=0.0; } } } //Normalice the data minimum=new double[inputAtt]; range=new double[inputAtt]; for (int i=0; i<inputAtt; i++) { if (Attributes.getInputAttribute(i).getType() != Attribute.NOMINAL) { minimum[i]=Attributes.getInputAttribute(i).getMinAttribute(); range[i]=Attributes.getInputAttribute(i).getMaxAttribute()-minimum[i]; } } //Both real and nominal data are normaliced in [0,1] for (int i=0; i<reference.getNumInstances(); i++) { for (int j = 0; j < inputAtt; j++) { if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL) { if(Attributes.getInputAttribute(j).getNominalValuesList().size()>1){ referenceData[i][j] /= Attributes.getInputAttribute(j).getNominalValuesList().size()-1; } }else{ referenceData[i][j] -= minimum[j]; referenceData[i][j] /= range[j]; } } } } //end-method /** * Calculates the Euclidean distance between two instances * * @param instance1 First instance * @param instance2 Second instance * @return The Euclidean distance * */ protected double euclideanDistance(double instance1[],double instance2[]){ double length=0.0; for (int i=0; i<instance1.length; i++) { length += (instance1[i]-instance2[i])*(instance1[i]-instance2[i]); } length = Math.sqrt(length); return length; } //end-method /** * Checks if two instances are the same * * @param a First instance * @param b Second instance * @return True if both instances are equal. * */ protected boolean same(double a[],double b[]){ for(int i=0;i<a.length;i++){ if(a[i]!=b[i]){ return false; } } return true; } /** * Generates a string with the contents of the instance * * @param a Instance to print. * * @return A string, with the values of the instance * */ public static String printInstance(int instance[]){ String exit=""; for(int i=0;i<instance.length;i++){ exit+=instance[i]+" "; } return exit; } /** * Sets the time counter * */ protected void setInitialTime(){ initialTime = System.currentTimeMillis(); }//end-method class CheckException extends Exception{ /** * */ private static final long serialVersionUID = 1L; /** * Creates a new instance of CheckException */ public CheckException() { super(); }//end CheckException /** * Does instance a new CheckException with the message * specified and the Vector with all the errors. * @param msg is the message of the exception * @param _errors is a vector with all the errors. */ public CheckException(String msg){ super(msg); }//end ChecktException }//end CheckException }//end-class