/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Genetic_Rule_Learning.PART;
import java.util.Enumeration;
/**
* <p>
* Class to handle a classification of class values.
* </p>
*
* <p>
* @author Written by Crist�bal Romero Morales (University of Oviedo) 01/07/2008
* @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 03/12/2008
* @version 1.1
* @since JDK1.2
* </p>
*/
public class Classification{
/** Weight of itemsets per class per value. */
private double perClassPerValue[][];
/** Weight of itemsets per value. */
private double perValue[];
/** Weight of itemsets per class. */
private double perClass[];
/** Total weight of itemsets. */
private double total;
/** Function to create and initialize a new classification.
*
* @param numValues Number of values used to make the classification.
* @param numClasses Number of classes used to make the classification.
*/
public Classification( int numValues, int numClasses )
{
int i;
perClassPerValue = new double [numValues][0];
perValue = new double [numValues];
perClass = new double [numClasses];
for ( i = 0; i < numValues; i++ )
perClassPerValue[i] = new double [numClasses];
total = 0;
}
/** Function to create a new classification with only one value.
*
* @param source The dataset.
*
* @throws Exception If cannot read the dataset.
*/
public Classification( MyDataset source ) throws Exception
{
perClassPerValue = new double [1][0];
perValue = new double [1];
total = 0;
perClass = new double [source.numClasses()];
perClassPerValue[0] = new double [source.numClasses()];
Enumeration enum2 = source.enumerateItemsets();
while ( enum2.hasMoreElements() )
add( 0, (Itemset) enum2.nextElement() );
}
/** Function to create a new classification with the given dataset.
*
* @param source The dataset.
* @param model The model selected to make the classification.
*
* @throws Exception If cannot build the classification.
*/
public Classification( MyDataset source, Cut model ) throws Exception
{
int index;
Itemset itemset;
double[] weights;
perClassPerValue = new double [model.numSubsets()][0];
perValue = new double [model.numSubsets()];
total = 0;
perClass = new double [source.numClasses()];
for ( int i = 0; i < model.numSubsets(); i++ )
perClassPerValue[i] = new double [source.numClasses()];
Enumeration enum2 = source.enumerateItemsets();
while ( enum2.hasMoreElements() )
{
itemset = (Itemset) enum2.nextElement();
index = model.whichSubset( itemset );
if ( index != -1 )
add( index, itemset );
else
{
weights = model.weights( itemset );
addWeights( itemset, weights );
}
}
}
/** Function to create a new classification with only one value by merging all �
* values of given classification.
*
* @param toMerge The original classification to merge.
*/
public Classification( Classification toMerge )
{
total = toMerge.total;
perClass = new double [toMerge.numClasses()];
System.arraycopy( toMerge.perClass, 0, perClass, 0, toMerge.numClasses() );
perClassPerValue = new double [1] [0];
perClassPerValue[0] = new double [toMerge.numClasses()];
System.arraycopy( toMerge.perClass,0, perClassPerValue[0], 0, toMerge.numClasses() );
perValue = new double [1];
perValue[0] = total;
}
/** Function to add the given itemset to given the value.
*
* @param valueIndex The index of the value.
* @param itemset The itemset to add.
*/
public final void add( int valueIndex, Itemset itemset )
{
int classIndex;
double weight;
classIndex = (int)itemset.getClassValue();
weight = itemset.getWeight();
perClassPerValue[valueIndex][classIndex] = perClassPerValue[valueIndex][classIndex]+weight;
perValue[valueIndex] = perValue[valueIndex] + weight;
perClass[classIndex] = perClass[classIndex] + weight;
total = total + weight;
}
/** Function to add all itemsets with unknown values for given attribute.
*
* @param source The dataset that contains all the itemsets.
* @param attIndex The index of the attribute with possible unknown values.
*
* @throws Exception
*/
public final void addWithUnknownValue( MyDataset source, int attIndex )
{
double [] probs;
double weight, newWeight;
int classIndex;
Itemset itemset;
int j;
probs = new double [perValue.length];
for ( j = 0; j < perValue.length; j++ )
{
//if ( Comparators.isEqual( total, 0 ) )
if ( total == 0 )
probs[j] = 1.0 / probs.length;
else
probs[j] = perValue[j]/total;
}
Enumeration enum2 = source.enumerateItemsets();
while ( enum2.hasMoreElements() )
{
itemset = (Itemset) enum2.nextElement();
if ( itemset.isMissing( attIndex ) )
{
classIndex = (int)itemset.getClassValue();
weight = itemset.getWeight();
perClass[classIndex] = perClass[classIndex] + weight;
total = total + weight;
for ( j = 0; j < perValue.length; j++ )
{
newWeight = probs[j] * weight;
perClassPerValue[j][classIndex] = perClassPerValue[j][classIndex] + newWeight;
perValue[j] = perValue[j] + newWeight;
}
}
}
}
/** Function to add all itemsets in given range to given value.
*
* @param valueIndex The index of the value.
* @param source The source of the data.
* @param start The index of the first itemset to add.
* @param end The index of the first itemset that will not be added.
*
* @throws Exception
*/
public final void addRange( int valueIndex, MyDataset source, int start, int end )
{
double sumOfWeights = 0;
int classIndex;
Itemset itemset;
int i;
for ( i = start; i < end; i++ )
{
itemset = (Itemset) source.itemset( i );
classIndex = (int)itemset.getClassValue();
sumOfWeights = sumOfWeights+itemset.getWeight();
perClassPerValue[valueIndex][classIndex] += itemset.getWeight();
perClass[classIndex] += itemset.getWeight();
}
perValue[valueIndex] += sumOfWeights;
total += sumOfWeights;
}
/** Funtion to add the given itemset to all values weighting it according to given weights.
*
* @param itemset The itemset to add.
* @param weights The weights of the itemset for every value.
*
*/
public final void addWeights( Itemset itemset, double [] weights )
{
int classIndex;
int i;
classIndex = (int)itemset.getClassValue();
for ( i = 0; i < perValue.length; i++ )
{
double weight = itemset.getWeight() * weights[i];
perClassPerValue[i][classIndex] = perClassPerValue[i][classIndex] + weight;
perValue[i] = perValue[i] + weight;
perClass[classIndex] = perClass[classIndex] + weight;
total = total + weight;
}
}
/** Function to check if at least two values contain a minimum number of itemsets.
*
* @param minItemsets The minimum number of itemsets.
*
* @return True if the condition is satisfied. False otherwise.
*/
public final boolean check( double minItemsets )
{
int counter = 0;
int i;
for (i=0;i<perValue.length;i++)
//
if ( perValue[i] >= minItemsets )
counter++;
if ( counter > 1 )
return true;
else
return false;
}
/** Returns index of value containing maximum number of itemsets.
* @return index of value containing maximum number of itemsets.
*/
public final int maxValue()
{
double max;
int maxIndex;
int i;
max = 0;
maxIndex = -1;
for ( i = 0; i < perValue.length; i++ )
if ( perValue[i] >= max )
{
max = perValue[i];
maxIndex = i;
}
return maxIndex;
}
/** Returns class with highest frequency over all values.
* @return class with highest frequency over all values.
*/
public final int maxClass()
{
double maxCount = 0;
int maxIndex = 0;
int i;
for ( i = 0; i < perClass.length; i++ )
if ( perClass[i] > maxCount )
{
maxCount = perClass[i];
maxIndex = i;
}
return maxIndex;
}
/** Returns class with highest frequency for given value.
*
* @param index The index of the value.
* @return class with highest frequency for given value.
*/
public final int maxClass( int index )
{
double maxCount = 0;
int maxIndex = 0;
int i;
if ( perValue[index] > 0 )
{
for ( i = 0; i < perClass.length; i++ )
if ( perClassPerValue[index][i] > maxCount )
{
maxCount = perClassPerValue[index][i];
maxIndex = i;
}
return maxIndex;
}
else
return maxClass();
}
/** Returns number of values.
* @return number of values.
*/
public final int numValues()
{
return perValue.length;
}
/** Returns number of classes.
* @return number of classes.
*/
public final int numClasses()
{
return perClass.length;
}
/** Returns the weight of all itemsets of the class with highest frequency.
* @return the weight of all itemsets of the class with highest frequency.
*/
public final double numCorrect()
{
return perClass[maxClass()];
}
/** Returns incorrectly classifed
* @return incorrectly classifed
*/
public final double numIncorrect()
{
return total-numCorrect();
}
/** Returns the number of incorrectly classified itemsets for the given value.
*
* @param index The index of the value.
* @return the number of incorrectly classified itemsets for the given value.
*/
public final double numIncorrect( int index )
{
return perValue[index]-numCorrect(index);
}
/** Returns the number of correctly classified itemsets for the given value.
*
* @param index The index of the value.
* @return the number of correctly classified itemsets for the given value.
*/
public final double numCorrect( int index )
{
return perClassPerValue[index][maxClass(index)];
}
/** Returns total weight of itemsets.
* @return total weight of itemsets.
*/
public final double getTotal()
{
return total;
}
/** Returns number of itemsets of given class in given value.
*
* @param valueIndex The index of the value.
* @param classIndex The index of the class.
* @return number of itemsets of given class in given value.
*/
public final double perClassPerValue( int valueIndex, int classIndex )
{
return perClassPerValue[valueIndex][classIndex];
}
/** Returns number of (possibly fractional) itemsets in given value.
*
* @param valueIndex The index of the value.
* @return number of (possibly fractional) itemsets in given value.
*/
public final double perValue( int valueIndex )
{
return perValue[valueIndex];
}
/** Returns number of itemsets of given class.
*
* @param classIndex The index of the class.
* @return number of itemsets of given class.
*/
public final double perClass( int classIndex )
{
return perClass[classIndex];
}
/** Returns relative frequency of class over all values.
*
* @param classIndex The index of the class.
* @return relative frequency of class over all values.
*/
public final double probability( int classIndex )
{
if ( total != 0 )
return perClass[classIndex] / total;
else
return 0;
}
/** Returns relative frequency of class for given value.
*
* @param classIndex The index of the class.
* @param attIndex The index of the attribute.
* @return relative frequency of class for given value.
*/
public final double probability( int classIndex, int attIndex )
{
if ( perValue[attIndex] > 0 )
return perClassPerValue[attIndex][classIndex] / perValue[attIndex];
else
return probability( classIndex );
}
/** Function to shift all itemsets in given range from one value to another.
*
* @param from The minimum value.
* @param to The maximum value.
* @param source The dataset.
* @param start The index of the first itemset to add.
* @param end The index of the first itemset that will not be added.
*/
public final void shiftRange( int from, int to, MyDataset source, int start, int end )
{
int classIndex;
double weight;
Itemset itemset;
int i;
for ( i = start; i < end; i++ )
{
itemset = (Itemset) source.itemset( i );
classIndex = (int)itemset.getClassValue();
weight = itemset.getWeight();
perClassPerValue[from][classIndex] -= weight;
perClassPerValue[to][classIndex] += weight;
perValue[from] -= weight;
perValue[to] += weight;
}
}
}