/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Neural_Networks.NNEP_Common.data;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import net.sf.jclec.IConfigure;
import org.apache.commons.configuration.Configuration;
/**
* <p>
* @author Written by Pedro Antonio Gutierrez Penna, Aaron Ruiz Mora (University of Cordoba) 17/07/2007
* @version 0.1
* @since JDK1.5
* </p>
*/
public class DoubleTransposedDataSet implements IConfigure
{
/**
* <p>
* Set of data of a problem
* </p>
*/
/////////////////////////////////////////////////////////////////
// --------------------------------------- Serialization constant
/////////////////////////////////////////////////////////////////
/** Generated by Eclipse */
private static final long serialVersionUID = -7161371989002786655L;
/////////////////////////////////////////////////////////////////
// --------------------------------------------------- Attributes
/////////////////////////////////////////////////////////////////
/** Number of the file to extract the observations */
String fileName;
/** Number of observations (Matrix columns) */
protected int nofobservations = -1;
/** Number of variables (Matrix rows) */
protected int nofvariables = -1; //nofvariables = nofinputs+nofoutputs
/** Number of inputs */
protected int nofinputs = -1;
/** Number of outputs */
protected int nofoutputs = -1;
/** Array with all data */
protected double[][] array;
/** Array with the mean of each output */
protected double[] outputMeans;
/** Maximum distance between data */
protected double maximumDistance;
/////////////////////////////////////////////////////////////////
// -------------------------------------------------- Constructor
/////////////////////////////////////////////////////////////////
/**
* Empty constructor
*/
public DoubleTransposedDataSet() {
super();
}
/////////////////////////////////////////////////////////////////
// ------------------------------- Getting and setting attributes
/////////////////////////////////////////////////////////////////
/**
* <p>
* Returns the filename used to read the observations and parameters
* </p>
* @return String Filename
*/
public String getFileName() {
return fileName;
}
/**
* <p>
* Sets the filename used to read the observations and parameters
* </p>
* @param fileName Filename
*/
public void setFileName(String fileName) {
this.fileName = fileName;
}
/**
* <p>
* Returns the number of inputs of the observations stored in the data set
* </p>
* @return int Number of inputs
*/
public int getNofinputs() {
return nofinputs;
}
/**
* <p>
* Sets the number of inputs of the observations stored in the data set
* </p>
* @param nofinputs New number of inputs
*/
public void setNofinputs(int nofinputs) {
this.nofinputs = nofinputs;
if(nofoutputs!=-1 && nofobservations!=-1)
init();
}
/**
* <p>
* Returns the number of observations stored in the data set
* </p>
* @return int Number of observations
*/
public int getNofobservations() {
return nofobservations;
}
/**
* <p>
* Sets the number of observations stored in the data set
* </p>
* @param nofobservations New number of observations
*/
public void setNofobservations(int nofobservations) {
this.nofobservations = nofobservations;
if(nofoutputs!=-1 && nofinputs!=-1)
init();
}
/**
* <p>
* Returns the number of variables stored in the data set
* </p>
* @return int Number of variables
*/
public int getNofvariables() {
return nofvariables;
}
/**
* <p>
* Sets the number of variables stored in the data set
* </p>
* @param nofvariables New number of variables
*/
public void setNofvariables(int nofvariables) {
this.nofvariables = nofvariables;
}
/**
* <p>
* Returns the number of outputs of the observations stored in the data set
* </p>
* @return int Number of outputs
*/
public int getNofoutputs() {
return nofoutputs;
}
/**
* <p>
* Sets the number of outputs of the observations stored in the data set
* </p>
* @param nofoutputs New number of outputs
*/
public void setNofoutputs(int nofoutputs) {
this.nofoutputs = nofoutputs;
if(nofinputs!=-1 && nofobservations!=-1)
init();
}
/**
* <p>
* Returns an specified observation
* </p>
* @param nofobservation Number of observation to return
* @return double [] Array with the specified observation
*/
public double [] getObservation(int nofobservation) {
double [] observation = new double[nofvariables];
for(int i=0; i<nofvariables; i++)
observation[i] = array[i][nofobservation];
return observation;
}
/**
* <p>
* Sets an specified observation
* </p>
* @param nofobservation Number of observation
* @param observation New observation
*/
public void setObservation(int nofobservation, double [] observation) {
for(int i=0; i<nofvariables; i++)
array[i][nofobservation] = observation[i];
}
/**
* <p>
* Returns the outputs of an specified observation
* </p>
* @param nofobservation Number of the observation
* @return double [] Array with the outputs of the observation
*/
public double [] getOutputs(int nofobservation) {
double[] outputs = new double[nofoutputs];
for(int i=nofinputs; i<nofvariables; i++)
outputs[i-nofinputs] = array[i][nofobservation];
return outputs;
}
/**
* <p>
* Returns a matrix with all the outputs of the dataSet
* in rows.
* </p>
* @return double [][] Matrix with all the outputs of the dataSet
*/
public double [][] getAllOutputs() {
double[][] outputs = new double[nofoutputs][nofobservations];
for(int i=nofinputs; i<nofvariables; i++)
outputs[i-nofinputs]=array[i];
return outputs;
}
/**
* <p>
* Returns the inputs of an specified observation
* </p>
* @param nofobservation Number of the observation
* @return double [] Array with the inputs of the observation
*/
public double [] getInputs(int nofobservation) {
double[] inputs = new double[nofinputs];
for(int i=0; i<nofinputs; i++)
inputs[i] = array[i][nofobservation];
return inputs;
}
/**
* <p>
* Returns a matrix with all the inputs of the dataSet
* in rows
* </p>
* @return double [][] Matrix with all the inputs of the dataSet
*/
public double [][] getAllInputs() {
double[][] inputs = new double[nofinputs][nofobservations];
for(int i=0; i<nofinputs; i++)
inputs[i] = array[i];
return inputs;
}
/**
* <p>
* Returns all the values of an output in the data set
* </p>
* @param nofoutput Number of the output
* @return double [] Array with all the values of the output
*/
public double [] getOutput(int nofoutput) {
return array[nofinputs+nofoutput];
}
/**
* <p>
* Sets all the values of an output in the data set
* </p>
* @param nofoutput Number of the output
* @param values Double array with all the values of the output
*/
public void setOutput(int nofoutput, double [] values) {
array[nofinputs+nofoutput] = values;
}
/**
* <p>
* Returns all the values of a variable in the data set
* </p>
* @param nofvariable Number of the variable
* @return double [] Array with all the values of the variable
*/
public double [] getObservationsOf(int nofvariable) {
return array[nofvariable];
}
/**
* <p>
* Sets all the values of a variable in the data set
* </p>
* @param nofvariable Number of the variable
* @param values Double array with all the values of the variable
*/
public void setObservationsOf(int nofvariable, double [] values) {
array[nofvariable] = values;
}
/**
* <p>
* Returns the mean of a specific number of output
* </p>
* @param index Number of output mean to return
* @return double Output mean
*/
public double getOutputMean(int index) {
return outputMeans[index];
}
/**
* <p>
* Returns the maximum value of a specific variable
* </p>
* @param index Number of variable maximum value to return
* @return double Maximum value
*/
public double getMaxValueOf(int index) {
double max = array[index][0];
for(int i=0; i<nofobservations; i++)
if(array[index][i] > max)
max = array[index][i];
return max;
}
/**
* <p>
* Returns the minimum value of a specific variable
* </p>
* @param index Number of variable minimum value to return
* @return double Minimum value
*/
public double getMinValueOf(int index) {
double min = array[index][0];
for(int i=0; i<nofobservations; i++)
if(array[index][i] < min)
min = array[index][i];
return min;
}
/**
* <p>
* Returns the maximum distance between train data
* </p>
* @return double Maximum distance
*/
public double getMaximumDistance() {
return maximumDistance;
}
/**
* <p>
* Sets the maximum distance between train data
* </p>
* @param maximumDistance New maximum distance
*/
public void setMaximumDistance(double maximumDistance) {
this.maximumDistance = maximumDistance;
}
/////////////////////////////////////////////////////////////////
// ----------------------------------------------- Public methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Init the DoubleTransposedDataSet using a normal IDataset
* </p>
* @throws DatasetException
* @param schema Schema of the dataset
* @param dataset Dataset to read data of
*/
public void read(byte[] schema, IDataset dataset) throws DatasetException{
//Open dataset
dataset.open();
//Reads number of observations
setNofobservations(dataset.numberOfInstances());
//Reads number of inputs and outputs
int nOfInputs = 0;
int nOfOutputs = 0;
for(int i=0; i<schema.length; i++)
if(schema[i]==1)
nOfInputs++;
else if(schema[i]==2)
nOfOutputs++;
//Metadata
IMetadata metadata = dataset.getMetadata();
//Transform categorical attributes
for(int i=0; i<metadata.numberOfAttributes(); i++){
if(metadata.getAttribute(i).getType() == AttributeType.Categorical){
int numberCategories = ((CategoricalAttribute) metadata.getAttribute(i)).getNumberCategories();
if(numberCategories==2 && schema[i]==1)
numberCategories=1;
if(schema[i]==1)
nOfInputs+=(numberCategories-1);
else if(schema[i]==2)
nOfOutputs+=(numberCategories-1);
}
}
//Sets number of outputs and inputs
setNofinputs(nOfInputs);
setNofoutputs(nOfOutputs);
//For each instance
int inputCounter = 0;
int outputCounter = 0;
int instanceCounter = 0;
while(dataset.next()){
IDataset.IInstance instancia = dataset.read();
boolean lostValues = false;
for(int i=0; i<schema.length; i++){
double value = instancia.getValue(i);
if(Double.isNaN(value))
lostValues = true;
if(metadata.getAttribute(i).getType() != AttributeType.Categorical){
if(schema[i]==1)
array[inputCounter++][instanceCounter] = value;
else if(schema[i]==2)
array[nOfInputs+(outputCounter++)][instanceCounter] = value;
}
else{
CategoricalAttribute attribute = (CategoricalAttribute) metadata.getAttribute(i);
int numberCategories = attribute.getNumberCategories();
if(numberCategories==2 && schema[i]==1)
numberCategories=1;
if(schema[i]==1)
for(int j=1; j<=numberCategories; j++)
array[inputCounter++][instanceCounter] = ((value == j)?1:0);
else if(schema[i]==2)
for(int j=1; j<=numberCategories; j++)
array[nOfInputs+(outputCounter++)][instanceCounter] = ((value == j)?1:0);
}
}
if(!lostValues)
instanceCounter++;
inputCounter = 0;
outputCounter = 0;
}
if(instanceCounter < nofobservations){
nofobservations = instanceCounter;
double[][] auxArray = array;
array = new double[nofvariables][nofobservations];
for(int i=0; i<array.length; i++)
System.arraycopy(auxArray[i],0,array[i],0,array[i].length);
}
dataset.close();
calculateMeans();
}
/**
* <p>
* Init the array stored in the DataSet
* </p>
* @throws IOException, NumberFormatException
*/
public void read() throws IOException, NumberFormatException
{
try{
//DataInputStream to read of
BufferedReader reader = new BufferedReader(new FileReader(fileName));
//------------------//
//Reading first line//
//------------------//
String values[] = reader.readLine().split("[\\s\\t]"); //Space or tab separated
//Check the text format
if(values.length<3)
throw new IOException("Illegal Text Format");
//Reads number of observations
setNofobservations(Integer.parseInt(values[0]));
//Reads number of inputs
setNofinputs(Integer.parseInt(values[1]));
//Reads number of outputs
setNofoutputs(Integer.parseInt(values[2]));
//-------------------//
//Reading second line//
//-------------------//
values = reader.readLine().split("[\\s\\t]"); //Space or tab separated
//Count the number of real variables
int nofrealvariables = 0;
for(int i=0; i<values.length; i++)
if(Byte.parseByte(values[i])==1 || Byte.parseByte(values[i])==2)
nofrealvariables++;
//Check the text format
if(nofrealvariables!=nofvariables)
throw new IOException("Illegal Text Format");
//Reads the input schema array
byte iSchema[] = new byte[values.length];
for(int i=0; i<iSchema.length; i++)
iSchema[i] = Byte.parseByte(values[i]);
//-------------------//
//Reading other lines//
//-------------------//
//Input Counter
int ic=0;
//Output Counter
int oc=0;
//For each observation
for(int i=0; i<nofobservations; i++)
{
//Read a line
values = reader.readLine().split("[\\s\\t]"); //Space or tab
//Check the text format
if(values.length<iSchema.length)
throw new IOException("Illegal Text Format");
//Read values
for(int j=0; j<iSchema.length; j++){
//Read value
double value = Double.parseDouble(values[j]);
//If it is an input
if(iSchema[j]==1){
array[ic%nofinputs][i] = value;
ic++;
}
//If it is an output
if(iSchema[j]==2){
array[nofinputs+(oc%nofoutputs)][i] = value;
oc++;
}
//If (iSchema[j]!=2 && iSchema[j]!=1)
// then the value is ignored
}
}
}
catch(EOFException e){
System.out.println("Illegal Text Format");
throw new IOException("Illegal Text Format");
}
catch(NumberFormatException e){
System.out.println("Number format exception");
throw e;
}
catch(FileNotFoundException e){
System.out.println("File not found");
throw e;
}
calculateMeans();
}
/**
* <p>
* Obtain the means of all the outputs
* </p>
*/
public void calculateMeans(){
//Obtain the means of the outputs
if(outputMeans==null)
outputMeans = new double[nofoutputs];
for(int j=0; j<nofoutputs; j++)
outputMeans[j]=0;
for(int i=0; i<nofobservations; i++){
for(int j=0; j<nofoutputs; j++)
outputMeans[j]+=array[nofinputs+j][i];
}
for(int j=0; j<nofoutputs; j++)
outputMeans[j]/=nofobservations;
//Obtain the maximum distance between data
obtainMaximumDistance();
}
/**
* <p>
* Returns a string representation of the DataSet
* </p>
* @return String Representation of the DataSet
*/
public String toString(){
StringBuffer sb = new StringBuffer();
sb.append("<DataSet>\n");
sb.append("<nofobservations>" + nofobservations + "</nofobservations>\n");
sb.append("<nofinputs>" + nofinputs + "</nofinputs>\n");
sb.append("<nofoutputs>" + nofoutputs + "</nofoutputs>\n");
sb.append("<observations>\n");
for(int i=0; i<nofobservations; i++)
{
for(int j=0; j<nofvariables; j++)
sb.append(array[j][i]+ " ");
sb.append("\n");
}
sb.append("</observations>\n");
sb.append("</DataSet>");
return sb.toString();
}
/**
* <p>
* Returns a copy of the DataSet
* </p>
* @return DataSet Copy of the DataSet
*/
public DoubleTransposedDataSet copy(){
DoubleTransposedDataSet result = new DoubleTransposedDataSet();
//Set the fileName
result.fileName = this.fileName;
//Copy number of observations
result.setNofobservations(this.nofobservations);
//Copy number of inputs
result.setNofinputs(this.nofinputs);
//Copy number of outputs
result.setNofoutputs(this.nofoutputs);
//Copy the array
for(int i=0; i<nofvariables; i++){
System.arraycopy(this.array[i], 0, result.array[i], 0, nofobservations);
}
//Calculate means
result.calculateMeans();
return result;
}
/**
* <p>
* Obtain a boolean array with true at these inputs that are constants
* </p>
* @return boolean [] Constant inputs
*/
public boolean[] obtainConstantsInputs(){
boolean[] toRemove = new boolean[nofinputs];
//Obtain constant inputs
for(int i=0; i<nofinputs; i++){
toRemove[i] = false;
double value = array[i][0];
int j=1;
while(j<nofobservations && array[i][j]==value)
j++;
if(j==nofobservations)
toRemove[i] = true;
}
return toRemove;
}
/**
* <p>
* Remove the inputs desired
* </p>
* @param toRemove Array of Boolean indicating constant inputs with true
* @param newNofinputs New number of inputs of the dataset
*/
public void removeInputs(boolean[] toRemove, int newNofinputs){
//Auxiliary copy
DoubleTransposedDataSet aux = copy();
//Remove inputs
setNofinputs(newNofinputs);
//Copy the array
for(int i=0, j=0; i<aux.nofvariables; i++){
if( i>=aux.nofinputs || (i<aux.nofinputs && !toRemove[i])){
System.arraycopy(aux.array[i], 0, this.array[j], 0, nofobservations);
j++;
}
}
}
/////////////////////////////////////////////////////////////////
// ---------------------------------------------- Private methods
/////////////////////////////////////////////////////////////////
/**
* <p>
* Init the array stored in the DataSet
* </p>
*/
private void init(){
setNofvariables(nofinputs+nofoutputs);
array = new double[nofvariables][nofobservations];
}
/**
* <p>
* Obtain the largest distance between the input data
* </p>
*/
private void obtainMaximumDistance() {
maximumDistance = 0;
for (int i=0; i<getNofobservations(); i++) {
for (int j=i+1; j<getNofobservations(); j++) {
double distance = 0;
// Calculate the distance between two data
for (int k=0; k<getNofinputs(); k++) {
double firstComponent = getInputs(i)[k]; // First data
double secondComponent = getInputs(j)[k]; // Second data
distance += Math.pow(firstComponent-secondComponent, 2.0);
}
distance = Math.sqrt(distance);
// Get the largest distances
if(distance > maximumDistance)
maximumDistance = distance;
}
}
}
/////////////////////////////////////////////////////////////////
// ---------------------------- Implementing IConfigure interface
/////////////////////////////////////////////////////////////////
/**
* <p>
* Configuration parameters for this data set are:
*
* <ul>
* <li>
* <code>[@file-name] (String)</code></p>
* File name. Name of the file that stores the neccesary information
* for this data set.
* </li>
* </ul>
* </p>
* @param settings Configuration object from which the properties are going to be read
*/
public void configure(Configuration settings)
{
// ----------------------------------------- Setup fileName
fileName = settings.getString("[@file-name]");
}
}