/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Complexity_Metrics;
import keel.Dataset.*;
import java.util.*;
/**
* This is the main class of the Statistics computation
*
* <p>
* @author Written by Nuria Macia (La Salle, Universitat Ramon Llull) 27/05/2010 and
* modified by Albert Orriols (La Salle, Universitat Ramon Llull) 31/05/2010.
* @version 1.1
* @since JDK1.2
* </p>
*/
public class Statistics {
/** Dataset **/
keel.Dataset.InstanceSet dSet;
/** Classes */
private Vector classValues;
/** Instances per class */
private int[] numInstancesPerClass;
/** Means */
private double[][] mean;
/** Variances */
private double[][] variance;
/** Maximum */
private double[][] maximum;
/** Minimum */
private double[][] minimum;
/** Number of classes */
private int numberOfClasses;
/** Number of attributes */
private int numberOfAttributes;
Statistics ( InstanceSet _dSet, int _numberOfClasses ) {
int i, j;
dSet = _dSet;
numberOfAttributes = Attributes.getNumAttributes() - 1;
numberOfClasses = _numberOfClasses;
classValues = new Vector();
mean = new double [ numberOfAttributes ][ numberOfClasses ];
numInstancesPerClass = new int [ numberOfClasses ];
variance = new double [ numberOfAttributes ][ numberOfClasses ];
maximum = new double [ numberOfAttributes ][ numberOfClasses ];
minimum = new double [ numberOfAttributes ][ numberOfClasses ];
// Initialize variables
for ( i = 0; i < numberOfAttributes; i++ ) {
for ( j = 0; j < numberOfClasses; j++ ) {
mean[i][j] = 0.0;
variance[i][j] = 0.0;
maximum[i][j] = Double.MIN_VALUE;
minimum[i][j] = Double.MAX_VALUE;
}
}
for ( i = 0; i < numberOfClasses; i++ ) {
numInstancesPerClass[i] = 0;
}
} // end Statistics
/**
* It computes the statistics for the given parameters
*
* @param example Examples normalized from the KEEL Data set
* @param classOfExample Class of each normalized example
* @param numberOfExamples Number of examples in the data set
* @param numberOfAttributes Number of attributes
*/
public void run ( double [][]example, int []classOfExample, int numberOfExamples, int numberOfAttributes ) {
runClassValues( example, classOfExample, numberOfExamples, numberOfAttributes );
runMinMax( example, classOfExample, numberOfExamples, numberOfAttributes );
runMeanComputation( example, classOfExample, numberOfExamples, numberOfAttributes );
runVarianceComputation( example, classOfExample, numberOfExamples, numberOfAttributes );
} // end run
private void runMeanComputation( double [][]example, int []classOfExample, int numberOfExamples, int numberOfAttributes ) {
int i, j;
for ( i = 0; i < numberOfExamples; i++ ) {
for ( j = 0; j < numberOfAttributes; j++ ) {
mean[j][ classOfExample[i] ] += example[i][j];
}
}
for ( i = 0; i < numberOfAttributes; i++ ) {
for ( j = 0; j < numberOfClasses; j++ ) {
mean[i][j] /= (double) numInstancesPerClass[j];
}
}
} // end runMeanComputation
private void runClassValues( double [][]example, int []classOfExample, int numberOfExamples, int numberOfAttributes ) {
int i;
String classValue;
for ( i = 0; i < dSet.getNumInstances(); i++ ) {
classValue = dSet.getInstance(i).getOutputNominalValues(0);
if ( !( classValues.contains( classValue ) ) ) {
classValues.add( classValue );
}
}
for ( i = 0; i < numberOfExamples; i++ ) {
numInstancesPerClass[ classOfExample[i] ] ++;
}
} // end runClassValues
private void runMinMax( double [][]example, int []classOfExample, int numberOfExamples, int numberOfAttributes ) {
int i, j;
int whichClass;
for ( j = 0; j < numberOfAttributes; j++ ) {
for ( i = 0; i < numberOfClasses; i++ ) {
maximum[j][i] = Double.MIN_VALUE;
minimum[j][i] = Double.MAX_VALUE;
}
}
for ( i = 0; i < numberOfExamples; i++ ) {
for ( j = 0 ; j < numberOfAttributes; j++ ) {
maximum[j][ classOfExample[i] ] = Math.max( maximum[j][ classOfExample[i] ], example[i][j] );
minimum[j][ classOfExample[i] ] = Math.min( minimum[j][ classOfExample[i] ], example[i][j] );
}
}
} // end runMinMax
private void runVarianceComputation( double [][]example, int []classOfExample, int numberOfExamples, int numberOfAttributes ) {
int i, j;
double [][] sumOfSquareValues;
double [][] sumOfValues;
for ( i = 0; i < numberOfExamples; i++ ) {
for ( j = 0; j < numberOfAttributes ; j++ ) {
variance[j][ classOfExample[i] ] += Math.pow( example[i][j] - mean[j][ classOfExample[i] ], 2 );
}
}
for ( i = 0; i < numberOfAttributes; i++ ) {
for ( j = 0; j < numberOfClasses; j++ ) {
variance[i][j] /= numInstancesPerClass[j] - 1;
}
}
} // end runVarianceComputation
/**
* It returns the variance of the given attribute within the given class
*
* @param whichAttribute index of the attribute
* @param whichClass index of the class
* @return variance of the given attribute within the given class
*/
public double getVariance ( int whichAttribute, int whichClass ) {
return variance[ whichAttribute ][ whichClass ];
} // end getVariance
/**
* It returns the mean of the given attribute within the given class
*
* @param whichAttribute index of the attribute
* @param whichClass index of the class
* @return mean of the given attribute within the given class
*/
public double getMean ( int whichAttribute, int whichClass ) {
return mean[ whichAttribute ][ whichClass ];
} // end geMean
/**
* It returns the maximum of the given attribute within the given class
*
* @param whichAttribute index of the attribute
* @param whichClass index of the class
* @return maximum of the given attribute within the given class
*/
public double getMax ( int whichAttribute, int whichClass ) {
return maximum[ whichAttribute ][ whichClass ];
} // end getMax
/**
* It returns the minimum of the given attribute within the given class
*
* @param whichAttribute index of the attribute
* @param whichClass index of the class
* @return minimum of the given attribute within the given class
*/
public double getMin ( int whichAttribute, int whichClass ) {
return minimum[ whichAttribute ][ whichClass ];
} // end getMin
} // end Statistics