/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.ImbalancedClassification.Ensembles.C45; import java.util.*; /** * Class to handle a classification of class values. * */ public class Classification { /** Weight of itemsets per class per value. */ private double perClassPerValue[][]; /** Weight of itemsets per value. */ private double perValue[]; /** Weight of itemsets per class. */ private double perClass[]; /** Total weight of itemsets. */ private double total; /** Function to create and initialize a new classification. * * @param numValues Number of values used to make the classification. * @param numClasses Number of classes used to make the classification. */ public Classification(int numValues, int numClasses) { int i; perClassPerValue = new double[numValues][0]; perValue = new double[numValues]; perClass = new double[numClasses]; for (i = 0; i < numValues; i++) { perClassPerValue[i] = new double[numClasses]; } total = 0; } /** Function to create a new classification with only one value. * * @param source The dataset. * * @throws Exception If cannot read the dataset. */ public Classification(Dataset source) throws Exception { perClassPerValue = new double[1][0]; perValue = new double[1]; total = 0; perClass = new double[source.numClasses()]; perClassPerValue[0] = new double[source.numClasses()]; Enumeration enum2 = source.enumerateItemsets(); while (enum2.hasMoreElements()) { add(0, (Itemset) enum2.nextElement()); } } /** Function to create a new classification with the given dataset. * * @param source The dataset. * @param model The model selected to make the classification. * * @throws Exception If cannot build the classification. */ public Classification(Dataset source, Cut model) throws Exception { int index; Itemset itemset; double[] weights; perClassPerValue = new double[model.numSubsets()][0]; perValue = new double[model.numSubsets()]; total = 0; perClass = new double[source.numClasses()]; for (int i = 0; i < model.numSubsets(); i++) { perClassPerValue[i] = new double[source.numClasses()]; } Enumeration enum2 = source.enumerateItemsets(); while (enum2.hasMoreElements()) { itemset = (Itemset) enum2.nextElement(); index = model.whichSubset(itemset); if (index != -1) { add(index, itemset); } else { weights = model.weights(itemset); addWeights(itemset, weights); } } } /** Function to create a new classification with only one value by merging all � * values of given classification. * * @param toMerge The original classification to merge. */ public Classification(Classification toMerge) { total = toMerge.total; perClass = new double[toMerge.numClasses()]; System.arraycopy(toMerge.perClass, 0, perClass, 0, toMerge.numClasses()); perClassPerValue = new double[1][0]; perClassPerValue[0] = new double[toMerge.numClasses()]; System.arraycopy(toMerge.perClass, 0, perClassPerValue[0], 0, toMerge.numClasses()); perValue = new double[1]; perValue[0] = total; } /** Function to add the given itemset to given the value. * * @param valueIndex The index of the value. * @param itemset The itemset to add. */ public final void add(int valueIndex, Itemset itemset) { int classIndex; double weight; classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[valueIndex][classIndex] = perClassPerValue[valueIndex][ classIndex] + weight; perValue[valueIndex] = perValue[valueIndex] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; } /** Function to add all itemsets with unknown values for given attribute. * * @param source The dataset that contains all the itemsets. * @param attIndex The index of the attribute with possible unknown values. * * @throws Exception */ public final void addWithUnknownValue(Dataset source, int attIndex) { double[] probs; double weight, newWeight; int classIndex; Itemset itemset; int j; probs = new double[perValue.length]; for (j = 0; j < perValue.length; j++) { //if ( Comparators.isEqual( total, 0 ) ) if (total == 0) { probs[j] = 1.0 / probs.length; } else { probs[j] = perValue[j] / total; } } Enumeration enum2 = source.enumerateItemsets(); while (enum2.hasMoreElements()) { itemset = (Itemset) enum2.nextElement(); if (itemset.isMissing(attIndex)) { classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; for (j = 0; j < perValue.length; j++) { newWeight = probs[j] * weight; perClassPerValue[j][classIndex] = perClassPerValue[j][ classIndex] + newWeight; perValue[j] = perValue[j] + newWeight; } } } } /** Function to add all itemsets in given range to given value. * * @param valueIndex The index of the value. * @param source The itemset to add. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. * * @throws Exception */ public final void addRange(int valueIndex, Dataset source, int start, int end) { double sumOfWeights = 0; int classIndex; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); sumOfWeights = sumOfWeights + itemset.getWeight(); perClassPerValue[valueIndex][classIndex] += itemset.getWeight(); perClass[classIndex] += itemset.getWeight(); } perValue[valueIndex] += sumOfWeights; total += sumOfWeights; } /** Funtion to add the given itemset to all values weighting it according to given weights. * * @param itemset The itemset to add. * @param weights The weights of the itemset for every value. * */ public final void addWeights(Itemset itemset, double[] weights) { int classIndex; int i; classIndex = (int) itemset.getClassValue(); for (i = 0; i < perValue.length; i++) { double weight = itemset.getWeight() * weights[i]; perClassPerValue[i][classIndex] = perClassPerValue[i][classIndex] + weight; perValue[i] = perValue[i] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; } } /** Function to check if at least two values contain a minimum number of itemsets. * * @param minItemsets The minimum number of itemsets. * * @return True if the condition is satisfied. False otherwise. */ public final boolean check(double minItemsets) { int counter = 0; int i; for (i = 0; i < perValue.length; i++) { // if (perValue[i] >= minItemsets) { counter++; } } if (counter > 1) { return true; } else { return false; } } /** Returns index of value containing maximum number of itemsets. * */ public final int maxValue() { double max; int maxIndex; int i; max = 0; maxIndex = -1; for (i = 0; i < perValue.length; i++) { if (perValue[i] >= max) { max = perValue[i]; maxIndex = i; } } return maxIndex; } /** Returns class with highest frequency over all values. * */ public final int maxClass() { double maxCount = 0; int maxIndex = 0; int i; for (i = 0; i < perClass.length; i++) { if (perClass[i] > maxCount) { maxCount = perClass[i]; maxIndex = i; } } return maxIndex; } /** Returns class with highest frequency for given value. * * @param index The index of the value. */ public final int maxClass(int index) { double maxCount = 0; int maxIndex = 0; int i; if (perValue[index] > 0) { for (i = 0; i < perClass.length; i++) { if (perClassPerValue[index][i] > maxCount) { maxCount = perClassPerValue[index][i]; maxIndex = i; } } return maxIndex; } else { return maxClass(); } } /** Returns number of values. * */ public final int numValues() { return perValue.length; } /** Returns number of classes. * */ public final int numClasses() { return perClass.length; } /** Returns the weight of all itemsets of the class with highest frequency. * */ public final double numCorrect() { return perClass[maxClass()]; } /** Returns incorrectly classifed * */ public final double numIncorrect() { return total - numCorrect(); } /** Returns the number of incorrectly classified itemsets for the given value. * * @param index The index of the value. */ public final double numIncorrect(int index) { return perValue[index] - numCorrect(index); } /** Returns the number of correctly classified itemsets for the given value. * * @param index The index of the value. */ public final double numCorrect(int index) { return perClassPerValue[index][maxClass(index)]; } /** Returns total weight of itemsets. * */ public final double getTotal() { return total; } /** Returns number of itemsets of given class in given value. * * @param valueIndex The index of the value. * @param classIndex The index of the class. */ public final double perClassPerValue(int valueIndex, int classIndex) { return perClassPerValue[valueIndex][classIndex]; } /** Returns number of (possibly fractional) itemsets in given value. * * @param valueIndex The index of the value. */ public final double perValue(int valueIndex) { return perValue[valueIndex]; } /** Returns number of itemsets of given class. * * @param classIndex The index of the class. */ public final double perClass(int classIndex) { return perClass[classIndex]; } /** Returns relative frequency of class over all values. * * @param classIndex The index of the class. */ public final double probability(int classIndex) { if (total != 0) { return perClass[classIndex] / total; } else { return 0; } } /** Returns relative frequency of class for given value. * * @param classIndex The index of the class. * @param attIndex The index of the attribute. */ public final double probability(int classIndex, int attIndex) { if (perValue[attIndex] > 0) { return perClassPerValue[attIndex][classIndex] / perValue[attIndex]; } else { return probability(classIndex); } } /** Function to shift all itemsets in given range from one value to another. * * @param from The minimum value. * @param to The maximum value. * @param source The dataset. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. */ public final void shiftRange(int from, int to, Dataset source, int start, int end) { int classIndex; double weight; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[from][classIndex] -= weight; perClassPerValue[to][classIndex] += weight; perValue[from] -= weight; perValue[to] += weight; } } }