/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Neural_Networks.NNEP_Common.data; import java.io.BufferedReader; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import net.sf.jclec.IConfigure; import org.apache.commons.configuration.Configuration; /** * <p> * @author Written by Pedro Antonio Gutierrez Penna, Aaron Ruiz Mora (University of Cordoba) 17/07/2007 * @version 0.1 * @since JDK1.5 * </p> */ public class DoubleTransposedDataSet implements IConfigure { /** * <p> * Set of data of a problem * </p> */ ///////////////////////////////////////////////////////////////// // --------------------------------------- Serialization constant ///////////////////////////////////////////////////////////////// /** Generated by Eclipse */ private static final long serialVersionUID = -7161371989002786655L; ///////////////////////////////////////////////////////////////// // --------------------------------------------------- Attributes ///////////////////////////////////////////////////////////////// /** Number of the file to extract the observations */ String fileName; /** Number of observations (Matrix columns) */ protected int nofobservations = -1; /** Number of variables (Matrix rows) */ protected int nofvariables = -1; //nofvariables = nofinputs+nofoutputs /** Number of inputs */ protected int nofinputs = -1; /** Number of outputs */ protected int nofoutputs = -1; /** Array with all data */ protected double[][] array; /** Array with the mean of each output */ protected double[] outputMeans; /** Maximum distance between data */ protected double maximumDistance; ///////////////////////////////////////////////////////////////// // -------------------------------------------------- Constructor ///////////////////////////////////////////////////////////////// /** * Empty constructor */ public DoubleTransposedDataSet() { super(); } ///////////////////////////////////////////////////////////////// // ------------------------------- Getting and setting attributes ///////////////////////////////////////////////////////////////// /** * <p> * Returns the filename used to read the observations and parameters * </p> * @return String Filename */ public String getFileName() { return fileName; } /** * <p> * Sets the filename used to read the observations and parameters * </p> * @param fileName Filename */ public void setFileName(String fileName) { this.fileName = fileName; } /** * <p> * Returns the number of inputs of the observations stored in the data set * </p> * @return int Number of inputs */ public int getNofinputs() { return nofinputs; } /** * <p> * Sets the number of inputs of the observations stored in the data set * </p> * @param nofinputs New number of inputs */ public void setNofinputs(int nofinputs) { this.nofinputs = nofinputs; if(nofoutputs!=-1 && nofobservations!=-1) init(); } /** * <p> * Returns the number of observations stored in the data set * </p> * @return int Number of observations */ public int getNofobservations() { return nofobservations; } /** * <p> * Sets the number of observations stored in the data set * </p> * @param nofobservations New number of observations */ public void setNofobservations(int nofobservations) { this.nofobservations = nofobservations; if(nofoutputs!=-1 && nofinputs!=-1) init(); } /** * <p> * Returns the number of variables stored in the data set * </p> * @return int Number of variables */ public int getNofvariables() { return nofvariables; } /** * <p> * Sets the number of variables stored in the data set * </p> * @param nofvariables New number of variables */ public void setNofvariables(int nofvariables) { this.nofvariables = nofvariables; } /** * <p> * Returns the number of outputs of the observations stored in the data set * </p> * @return int Number of outputs */ public int getNofoutputs() { return nofoutputs; } /** * <p> * Sets the number of outputs of the observations stored in the data set * </p> * @param nofoutputs New number of outputs */ public void setNofoutputs(int nofoutputs) { this.nofoutputs = nofoutputs; if(nofinputs!=-1 && nofobservations!=-1) init(); } /** * <p> * Returns an specified observation * </p> * @param nofobservation Number of observation to return * @return double [] Array with the specified observation */ public double [] getObservation(int nofobservation) { double [] observation = new double[nofvariables]; for(int i=0; i<nofvariables; i++) observation[i] = array[i][nofobservation]; return observation; } /** * <p> * Sets an specified observation * </p> * @param nofobservation Number of observation * @param observation New observation */ public void setObservation(int nofobservation, double [] observation) { for(int i=0; i<nofvariables; i++) array[i][nofobservation] = observation[i]; } /** * <p> * Returns the outputs of an specified observation * </p> * @param nofobservation Number of the observation * @return double [] Array with the outputs of the observation */ public double [] getOutputs(int nofobservation) { double[] outputs = new double[nofoutputs]; for(int i=nofinputs; i<nofvariables; i++) outputs[i-nofinputs] = array[i][nofobservation]; return outputs; } /** * <p> * Returns a matrix with all the outputs of the dataSet * in rows. * </p> * @return double [][] Matrix with all the outputs of the dataSet */ public double [][] getAllOutputs() { double[][] outputs = new double[nofoutputs][nofobservations]; for(int i=nofinputs; i<nofvariables; i++) outputs[i-nofinputs]=array[i]; return outputs; } /** * <p> * Returns the inputs of an specified observation * </p> * @param nofobservation Number of the observation * @return double [] Array with the inputs of the observation */ public double [] getInputs(int nofobservation) { double[] inputs = new double[nofinputs]; for(int i=0; i<nofinputs; i++) inputs[i] = array[i][nofobservation]; return inputs; } /** * <p> * Returns a matrix with all the inputs of the dataSet * in rows * </p> * @return double [][] Matrix with all the inputs of the dataSet */ public double [][] getAllInputs() { double[][] inputs = new double[nofinputs][nofobservations]; for(int i=0; i<nofinputs; i++) inputs[i] = array[i]; return inputs; } /** * <p> * Returns all the values of an output in the data set * </p> * @param nofoutput Number of the output * @return double [] Array with all the values of the output */ public double [] getOutput(int nofoutput) { return array[nofinputs+nofoutput]; } /** * <p> * Sets all the values of an output in the data set * </p> * @param nofoutput Number of the output * @param values Double array with all the values of the output */ public void setOutput(int nofoutput, double [] values) { array[nofinputs+nofoutput] = values; } /** * <p> * Returns all the values of a variable in the data set * </p> * @param nofvariable Number of the variable * @return double [] Array with all the values of the variable */ public double [] getObservationsOf(int nofvariable) { return array[nofvariable]; } /** * <p> * Sets all the values of a variable in the data set * </p> * @param nofvariable Number of the variable * @param values Double array with all the values of the variable */ public void setObservationsOf(int nofvariable, double [] values) { array[nofvariable] = values; } /** * <p> * Returns the mean of a specific number of output * </p> * @param index Number of output mean to return * @return double Output mean */ public double getOutputMean(int index) { return outputMeans[index]; } /** * <p> * Returns the maximum value of a specific variable * </p> * @param index Number of variable maximum value to return * @return double Maximum value */ public double getMaxValueOf(int index) { double max = array[index][0]; for(int i=0; i<nofobservations; i++) if(array[index][i] > max) max = array[index][i]; return max; } /** * <p> * Returns the minimum value of a specific variable * </p> * @param index Number of variable minimum value to return * @return double Minimum value */ public double getMinValueOf(int index) { double min = array[index][0]; for(int i=0; i<nofobservations; i++) if(array[index][i] < min) min = array[index][i]; return min; } /** * <p> * Returns the maximum distance between train data * </p> * @return double Maximum distance */ public double getMaximumDistance() { return maximumDistance; } /** * <p> * Sets the maximum distance between train data * </p> * @param maximumDistance New maximum distance */ public void setMaximumDistance(double maximumDistance) { this.maximumDistance = maximumDistance; } ///////////////////////////////////////////////////////////////// // ----------------------------------------------- Public methods ///////////////////////////////////////////////////////////////// /** * <p> * Init the DoubleTransposedDataSet using a normal IDataset * </p> * @throws DatasetException * @param schema Schema of the dataset * @param dataset Dataset to read data of */ public void read(byte[] schema, IDataset dataset) throws DatasetException{ //Open dataset dataset.open(); //Reads number of observations setNofobservations(dataset.numberOfInstances()); //Reads number of inputs and outputs int nOfInputs = 0; int nOfOutputs = 0; for(int i=0; i<schema.length; i++) if(schema[i]==1) nOfInputs++; else if(schema[i]==2) nOfOutputs++; //Metadata IMetadata metadata = dataset.getMetadata(); //Transform categorical attributes for(int i=0; i<metadata.numberOfAttributes(); i++){ if(metadata.getAttribute(i).getType() == AttributeType.Categorical){ int numberCategories = ((CategoricalAttribute) metadata.getAttribute(i)).getNumberCategories(); if(numberCategories==2 && schema[i]==1) numberCategories=1; if(schema[i]==1) nOfInputs+=(numberCategories-1); else if(schema[i]==2) nOfOutputs+=(numberCategories-1); } } //Sets number of outputs and inputs setNofinputs(nOfInputs); setNofoutputs(nOfOutputs); //For each instance int inputCounter = 0; int outputCounter = 0; int instanceCounter = 0; while(dataset.next()){ IDataset.IInstance instancia = dataset.read(); boolean lostValues = false; for(int i=0; i<schema.length; i++){ double value = instancia.getValue(i); if(Double.isNaN(value)) lostValues = true; if(metadata.getAttribute(i).getType() != AttributeType.Categorical){ if(schema[i]==1) array[inputCounter++][instanceCounter] = value; else if(schema[i]==2) array[nOfInputs+(outputCounter++)][instanceCounter] = value; } else{ CategoricalAttribute attribute = (CategoricalAttribute) metadata.getAttribute(i); int numberCategories = attribute.getNumberCategories(); if(numberCategories==2 && schema[i]==1) numberCategories=1; if(schema[i]==1) for(int j=1; j<=numberCategories; j++) array[inputCounter++][instanceCounter] = ((value == j)?1:0); else if(schema[i]==2) for(int j=1; j<=numberCategories; j++) array[nOfInputs+(outputCounter++)][instanceCounter] = ((value == j)?1:0); } } if(!lostValues) instanceCounter++; inputCounter = 0; outputCounter = 0; } if(instanceCounter < nofobservations){ nofobservations = instanceCounter; double[][] auxArray = array; array = new double[nofvariables][nofobservations]; for(int i=0; i<array.length; i++) System.arraycopy(auxArray[i],0,array[i],0,array[i].length); } dataset.close(); calculateMeans(); } /** * <p> * Init the array stored in the DataSet * </p> * @throws IOException, NumberFormatException */ public void read() throws IOException, NumberFormatException { try{ //DataInputStream to read of BufferedReader reader = new BufferedReader(new FileReader(fileName)); //------------------// //Reading first line// //------------------// String values[] = reader.readLine().split("[\\s\\t]"); //Space or tab separated //Check the text format if(values.length<3) throw new IOException("Illegal Text Format"); //Reads number of observations setNofobservations(Integer.parseInt(values[0])); //Reads number of inputs setNofinputs(Integer.parseInt(values[1])); //Reads number of outputs setNofoutputs(Integer.parseInt(values[2])); //-------------------// //Reading second line// //-------------------// values = reader.readLine().split("[\\s\\t]"); //Space or tab separated //Count the number of real variables int nofrealvariables = 0; for(int i=0; i<values.length; i++) if(Byte.parseByte(values[i])==1 || Byte.parseByte(values[i])==2) nofrealvariables++; //Check the text format if(nofrealvariables!=nofvariables) throw new IOException("Illegal Text Format"); //Reads the input schema array byte iSchema[] = new byte[values.length]; for(int i=0; i<iSchema.length; i++) iSchema[i] = Byte.parseByte(values[i]); //-------------------// //Reading other lines// //-------------------// //Input Counter int ic=0; //Output Counter int oc=0; //For each observation for(int i=0; i<nofobservations; i++) { //Read a line values = reader.readLine().split("[\\s\\t]"); //Space or tab //Check the text format if(values.length<iSchema.length) throw new IOException("Illegal Text Format"); //Read values for(int j=0; j<iSchema.length; j++){ //Read value double value = Double.parseDouble(values[j]); //If it is an input if(iSchema[j]==1){ array[ic%nofinputs][i] = value; ic++; } //If it is an output if(iSchema[j]==2){ array[nofinputs+(oc%nofoutputs)][i] = value; oc++; } //If (iSchema[j]!=2 && iSchema[j]!=1) // then the value is ignored } } } catch(EOFException e){ System.out.println("Illegal Text Format"); throw new IOException("Illegal Text Format"); } catch(NumberFormatException e){ System.out.println("Number format exception"); throw e; } catch(FileNotFoundException e){ System.out.println("File not found"); throw e; } calculateMeans(); } /** * <p> * Obtain the means of all the outputs * </p> */ public void calculateMeans(){ //Obtain the means of the outputs if(outputMeans==null) outputMeans = new double[nofoutputs]; for(int j=0; j<nofoutputs; j++) outputMeans[j]=0; for(int i=0; i<nofobservations; i++){ for(int j=0; j<nofoutputs; j++) outputMeans[j]+=array[nofinputs+j][i]; } for(int j=0; j<nofoutputs; j++) outputMeans[j]/=nofobservations; //Obtain the maximum distance between data obtainMaximumDistance(); } /** * <p> * Returns a string representation of the DataSet * </p> * @return String Representation of the DataSet */ public String toString(){ StringBuffer sb = new StringBuffer(); sb.append("<DataSet>\n"); sb.append("<nofobservations>" + nofobservations + "</nofobservations>\n"); sb.append("<nofinputs>" + nofinputs + "</nofinputs>\n"); sb.append("<nofoutputs>" + nofoutputs + "</nofoutputs>\n"); sb.append("<observations>\n"); for(int i=0; i<nofobservations; i++) { for(int j=0; j<nofvariables; j++) sb.append(array[j][i]+ " "); sb.append("\n"); } sb.append("</observations>\n"); sb.append("</DataSet>"); return sb.toString(); } /** * <p> * Returns a copy of the DataSet * </p> * @return DataSet Copy of the DataSet */ public DoubleTransposedDataSet copy(){ DoubleTransposedDataSet result = new DoubleTransposedDataSet(); //Set the fileName result.fileName = this.fileName; //Copy number of observations result.setNofobservations(this.nofobservations); //Copy number of inputs result.setNofinputs(this.nofinputs); //Copy number of outputs result.setNofoutputs(this.nofoutputs); //Copy the array for(int i=0; i<nofvariables; i++){ System.arraycopy(this.array[i], 0, result.array[i], 0, nofobservations); } //Calculate means result.calculateMeans(); return result; } /** * <p> * Obtain a boolean array with true at these inputs that are constants * </p> * @return boolean [] Constant inputs */ public boolean[] obtainConstantsInputs(){ boolean[] toRemove = new boolean[nofinputs]; //Obtain constant inputs for(int i=0; i<nofinputs; i++){ toRemove[i] = false; double value = array[i][0]; int j=1; while(j<nofobservations && array[i][j]==value) j++; if(j==nofobservations) toRemove[i] = true; } return toRemove; } /** * <p> * Remove the inputs desired * </p> * @param toRemove Array of Boolean indicating constant inputs with true * @param newNofinputs New number of inputs of the dataset */ public void removeInputs(boolean[] toRemove, int newNofinputs){ //Auxiliary copy DoubleTransposedDataSet aux = copy(); //Remove inputs setNofinputs(newNofinputs); //Copy the array for(int i=0, j=0; i<aux.nofvariables; i++){ if( i>=aux.nofinputs || (i<aux.nofinputs && !toRemove[i])){ System.arraycopy(aux.array[i], 0, this.array[j], 0, nofobservations); j++; } } } ///////////////////////////////////////////////////////////////// // ---------------------------------------------- Private methods ///////////////////////////////////////////////////////////////// /** * <p> * Init the array stored in the DataSet * </p> */ private void init(){ setNofvariables(nofinputs+nofoutputs); array = new double[nofvariables][nofobservations]; } /** * <p> * Obtain the largest distance between the input data * </p> */ private void obtainMaximumDistance() { maximumDistance = 0; for (int i=0; i<getNofobservations(); i++) { for (int j=i+1; j<getNofobservations(); j++) { double distance = 0; // Calculate the distance between two data for (int k=0; k<getNofinputs(); k++) { double firstComponent = getInputs(i)[k]; // First data double secondComponent = getInputs(j)[k]; // Second data distance += Math.pow(firstComponent-secondComponent, 2.0); } distance = Math.sqrt(distance); // Get the largest distances if(distance > maximumDistance) maximumDistance = distance; } } } ///////////////////////////////////////////////////////////////// // ---------------------------- Implementing IConfigure interface ///////////////////////////////////////////////////////////////// /** * <p> * Configuration parameters for this data set are: * * <ul> * <li> * <code>[@file-name] (String)</code></p> * File name. Name of the file that stores the neccesary information * for this data set. * </li> * </ul> * </p> * @param settings Configuration object from which the properties are going to be read */ public void configure(Configuration settings) { // ----------------------------------------- Setup fileName fileName = settings.getString("[@file-name]"); } }