/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Albert Orriols (La Salle University Ram�n Lull, Barcelona) 28/03/2004 * @author Modified by Xavi Sol� (La Salle University Ram�n Lull, Barcelona) 03/12/2008 * @version 1.2 * @since JDK1.2 * </p> */ package keel.Algorithms.Genetic_Rule_Learning.UCS; import java.lang.*; import java.io.*; import java.util.*; public class SSFileEnvironment implements Environment { /** * <p> * This is the base class for all the single step problems environments * that read the examples from a file. It uses the Dataset API to get all * the information needed from the file. * </p> */ /** * It's the number of the examples of the problem. */ private int numOfExamples; /** * It's an array of examples */ private double[][] example; /** * It's the array of the actions associated to the * examples. */ private int [] exampleAction; /** * Represents the current example that is examinated */ private int currentExample; /** * It's the constructor of the class. * * @param fileName is the database file name * @param readAttrInfo indicate if the attribute information has to be read. */ public SSFileEnvironment( String fileName, boolean readAttrInfo ) { String line; // The representation parameters are initalized. initRepresentationParameters( readAttrInfo ); //Declaring a new instance. keel.Dataset.InstanceSet iSet = new keel.Dataset.InstanceSet(); try{ iSet.readSet( fileName, readAttrInfo ); if (keel.Dataset.Attributes.getOutputNumAttributes() != 1){ System.err.println ("The number of ouput attributes is "+keel.Dataset.Attributes.getOutputNumAttributes()); System.err.println (" and it has to be 1 in a classification problem."); System.exit(-1); } }catch (keel.Dataset.HeaderFormatException e){ System.out.println ("READING DATASET ERROR. The format of the header is not correct."); e.printStackTrace(); System.exit(-1); }catch (keel.Dataset.DatasetException e){ System.out.println ("DATASET ERROR."); e.printStackTrace(); System.exit(-1); }catch(Exception e){ System.out.println ("LOADING DATASET ERROR. "); e.printStackTrace(); System.exit(-1); } //Extracting the instance information getInstancesInformation( iSet, readAttrInfo ); //Extracting the attributes information if ( readAttrInfo ) getAttributeInformation(); //Normalizing the data normalizeIntegerValues(); //Getting the majority class in the training data set if ( readAttrInfo ) Config.majorityClass = getMajorityClass (); System.out.println ( " > The majority class in the training data set is: " + Config.majorityClass ); //Finally, we print the environment //if ( !readAttrInfo ) printInformation(); //printInformation(); } // end SSFileEnvironment private int getMajorityClass (){ int i, max, majClass; int []number = new int [ Config.numberOfActions ]; // Getting the number of classes Config.numberOfActions = 2; for ( i=0; i<numOfExamples; i++ ){ if ( exampleAction[i] + 1 > Config.numberOfActions ){ Config.numberOfActions = exampleAction[i] + 1; } } // Initializing for ( i=0; i<Config.numberOfActions; i++ ){ number[i] = 0; } // Counting the number of examples per class for ( i=0; i<numOfExamples; i++ ){ number[ exampleAction[i] ] ++; } // Getting the majority class max = number[0]; majClass = 0; for ( i=1; i<Config.numberOfActions; i++ ){ if ( number[i] > max ){ max = number[i]; majClass = i; } } return majClass; }//end getMajorityClass /** * It extracts all the information from the dataset which is needed * by UCS environment. * * @param iSet is the instanceSet. */ private void getInstancesInformation( keel.Dataset.InstanceSet iSet, boolean readAttrInfo ){ //Getting information about attributes and examples numOfExamples = iSet.getNumInstances(); if ( readAttrInfo ) { Config.clLength = keel.Dataset.Attributes.getInputNumAttributes(); Config.ternaryRep = false; Config.relationName = keel.Dataset.Attributes.getRelationName(); } //Initializing environment variables. System.out.println ("Initializing environment variables"); initVariables( Config.clLength, numOfExamples, readAttrInfo ); //Getting all the instances for (int i=0; i<numOfExamples; i++){ example[i] = iSet.getInstance(i).getNormalizedInputValues(); exampleAction[i] = (int) iSet.getInstance(i).getNormalizedOutputValues()[0]; } System.out.println (" > The attributes number (without counting the class attribute) is: "+Config.clLength); System.out.println (" > The examples number is: "+numOfExamples); System.out.println (" > The number of classes is: "+Config.numberOfActions); }//end getInstancesInformation /** * It does organize the attribute information gotten in the instance set * construction. */ private void getAttributeInformation(){ System.out.println ("Getting input attributes information."); keel.Dataset.Attribute[] inputAttr = keel.Dataset.Attributes.getInputAttributes(); //System.out.println ("Displaying input attributes information."); for (int i=0; i<inputAttr.length; i++){ switch (inputAttr[i].getType()){ case keel.Dataset.Attribute.NOMINAL: Config.typeOfAttributes[i] = "integer"; Config.enumConv[i] = inputAttr[i].getNominalValuesList(); Config.attBounds[i][0] = 0; Config.attBounds[i][1] = Config.enumConv[i].size() - 1; break; case keel.Dataset.Attribute.INTEGER: Config.typeOfAttributes[i] = "integer"; Config.attBounds[i][0] = 0; Config.attBounds[i][1] = inputAttr[i].getMaxAttribute() - inputAttr[i].getMinAttribute(); break; case keel.Dataset.Attribute.REAL: Config.typeOfAttributes[i] = "real"; Config.attBounds[i][0] = inputAttr[i].getMinAttribute(); Config.attBounds[i][1] = inputAttr[i].getMaxAttribute(); break; default: System.out.println ("ERROR in attribute type definition."); break; } } System.out.println ("Getting output attribute information"); keel.Dataset.Attribute outAtt = keel.Dataset.Attributes.getOutputAttributes()[0]; //System.out.println ("Displaying output attribute information"); switch (outAtt.getType()){ case keel.Dataset.Attribute.NOMINAL: Config.classConv = outAtt.getNominalValuesList(); Config.numberOfActions = Config.classConv.size(); System.out.println ( " >> Reading the number of classes in the training data set: " + Config.numberOfActions ); break; case keel.Dataset.Attribute.INTEGER: Config.numberOfActions = (int) (outAtt.getMaxAttribute() - outAtt.getMinAttribute() + 1); Config.classConv = new Vector(); int minimumAct = (int)outAtt.getMinAttribute(); for (int k=0; k<Config.numberOfActions; k++){ Config.classConv.add((new Integer(minimumAct+k)).toString()); } break; case keel.Dataset.Attribute.REAL: System.out.println ("ERROR: The class of the dataset has to be a nominal or an integer."); System.exit(0); break; default: System.out.println ("ERROR in output attribute type definition."); } }//end getAttributeInformation /** * <p> * Does initialize the representation parameters of the enviornment * </p> */ private void initRepresentationParameters( boolean readAttrInfo ){ // Initializations currentExample = 0; if ( readAttrInfo ) { // When reading the descriptor it will be modified if there is a real // or integer attribute Config.ternaryRep = false; // The number of actions will be updated while reading the file. Config.numberOfActions = 0; Config.charVector = new char[3]; Config.charVector[0] = '0'; Config.charVector[1] = '1'; Config.charVector[2] = '#'; Config.numberOfCharacters = 3; } }//end initRepresentationParameters /** * Does reserve memory for all the system variables. * * @param attNum is the number of attributes. * @param exNum is the number of examples. */ private void initVariables( int attNum, int exNum, boolean readAttrInfo ){ try{ // Memory Reservation if ( readAttrInfo ) { System.out.println ( " >> Reserving memory for " + attNum + " attributes to store their type and bounds " ); Config.typeOfAttributes = new String [attNum]; Config.attBounds = new double [attNum][2]; Config.enumConv = new Vector[attNum]; } example = new double[exNum][attNum]; exampleAction = new int[exNum]; }catch(Exception e){ e.printStackTrace(); } }//Init variables /** * <p> * Prints to a file the normalized data * </p> */ private void printNormalizedData(String fileName){ String line = null; String outFileName = fileName +".flt"; try{ // The file where the normalized data will be set is opened PrintWriter fout = new PrintWriter(new BufferedWriter(new FileWriter(outFileName))); // The number of examples is written fout.println (example.length); for (int i=0; i<example.length; i++){ for (int j=0; j<Config.clLength; j++){ if (example[i][j] == -1) fout.print ("null\t"); else fout.print (example[i][j]+" "); } fout.println (exampleAction[i]); } fout.close(); }catch (Exception e){ e.printStackTrace(); } }//end printNormalizedData ///////////////////////////////////////////////////////////////////////// // Functions to interact with the environment // ///////////////////////////////////////////////////////////////////////// /** * The function returns the current state. * * @return a double[] with the current state. */ public double[] getCurrentState() { return example[ currentExample ]; } // end getCurrentState /** * Returns the class of the current example * * @return an integer with the class of the current example */ public int getCurrentClass (){ return exampleAction[ currentExample ]; }//end getCurrentClass /** * Does create a new state of the problem. The UCS have to decide the * action to do. * * @return a double[] with the new situation. */ public double[] newState() { currentExample = (int)(Config.rand() * (double)numOfExamples) ; return example[currentExample]; } // end newState /** * It initializes at the first example. It is used in the file * environment to get the examples sequentially. */ public void beginSequentialExamples(){ currentExample = -1; } /** * It returns the new Example of a single step file environment. * @return a double[] with the current example. */ public double[] getSequentialState(){ boolean found = false; currentExample = (currentExample +1) % numOfExamples; return example[currentExample]; }//getSequentialState /** * It return the number of the examples of the database. It's only * used in the file environments. * * @return an integer with the number of examples in the dataset */ public int getNumberOfExamples(){ return numOfExamples; }//end getNumberOfExamples /** * normalizeData * * It normalizes the data to floats between 0 and 1 */ private void normalizeIntegerValues(){ for (int i=0; i<numOfExamples; i++){ for (int j=0; j<Config.clLength; j++){ if ( example[i][j] != Config.unknownValue ){ if ( Config.typeOfAttributes[j].equals("integer") ){ example[i][j] -= Config.attBounds[j][0]; example[i][j] /= ( Config.attBounds[j][1] - Config.attBounds[j][0] ); }else if ( Config.typeOfAttributes[j].equals("real") ){ //example[i][j] = (example[i][j] - Config.attBounds[j][0]) / (Config.attBounds[j][1] - Config.attBounds[j][0]); } } } } }// end normalizeData /** * Does print the examples of the environment. */ private void printExamplesOfEnvironment(){ System.out.println ("Number of Examples: "+numOfExamples); for (int i=0; i<numOfExamples; i++){ System.out.print ("Exemple "+i+": "); for (int j=0; j<Config.clLength; j++){ System.out.print ("\t"+example[i][j]); } System.out.println ("\tAction: "+exampleAction[i]); } }//end printExamplesOfEnvironment /** * Does print the environment to a file. */ private void printEnvironmentToFile(PrintWriter fout){ fout.println ("Number of Examples: "+numOfExamples); for (int i=0; i<numOfExamples; i++){ fout.print ("Exemple "+i+": "); for (int j=0; j<Config.clLength; j++){ fout.print ("\t"+example[i][j]); } fout.println ("\tAction: "+exampleAction[i]); } }//end printEnvironmentToFile ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// private void printInformation (){ System.out.println (" ============================================ "); System.out.println ("\\nn\n == Attribute's types"); for (int i=0; i<Config.typeOfAttributes.length; i++){ System.out.println ("\t Attribute "+i+": "+Config.typeOfAttributes[i]); } System.out.println ("\n == Attribute's Boundaries"); for (int i=0; i<Config.attBounds.length; i++){ System.out.println ("\t Attribute "+i+": "+Config.attBounds[i][0]+","+Config.attBounds[i][1]); } if (Config.enumConv != null){ System.out.println ("\n == Enumeration information"); System.out.println (" > The length of the vector array is:"+Config.enumConv.length); for (int i=0; i<Config.enumConv.length; i++){ if (Config.enumConv[i] != null && Config.enumConv[i].size() != 0){ System.out.print("\t Attribute "+i+": "); for (int j=0; j<Config.enumConv[i].size(); j++){ System.out.print((String)Config.enumConv[i].elementAt(j) +" "); } } } System.out.println(); } if (Config.classConv != null){ System.out.print ("\n == Values that can be taken by the class: "); for (int i=0; i<Config.classConv.size(); i++){ System.out.print ((String) Config.classConv.elementAt(i)+" "); } System.out.println(); } System.out.println ("\n == Data readen from file: attributers + classes:"); for (int i=0; i<example.length; i++){ for (int j=0; j<example[i].length; j++){ System.out.print(example[i][j]+" "); } System.out.println(" -- "+ exampleAction[i]); } System.out.println (" > Number of classes: " + Config.numberOfActions ); }//end printInformation ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// private void copyAttInfo(){ Config.enumConv = new Vector[Config.enumConv.length]; Config.classConv = (Vector)Config.classConv.clone(); } } // end SSFileEnvironment