/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Manuel Moreno (Universidad de C�rdoba) 01/07/2008 * @version 0.1 * @since JDK 1.5 *</p> */ package keel.Algorithms.Decision_Trees.CART.impurities; import keel.Algorithms.Neural_Networks.NNEP_Common.data.DoubleTransposedDataSet; /** * Implementation of GINI impurity Function * * */ public class Gini implements IImpurityFunction { /** Complete Data set of patterns */ private DoubleTransposedDataSet dataset; /** * * It sets the datasets of patters * * @param dataset Complete data set of patterns */ public void setDataset(DoubleTransposedDataSet dataset) { this.dataset = dataset; } /** * * It compute the impurity value associated * * @param patterns index of patterns from dataset associated to node to evaluate * @param cost Associated cost * @return Impurity value associated * @throws Exception * */ public double impurities(int [] patterns, double cost) { int nofoutputs = dataset.getNofoutputs(); int nofpatterns = patterns.length; // Probabilities for each class in current data set portion double []prob_j = new double[nofoutputs]; for (int i=0; i<patterns.length; i++) { int patternIndex = patterns[i]; // Current pattern int patternClass=-1; // Initialize variable // Find which class owns current pattern for (int j=0; j<nofoutputs; j++) { if ( dataset.getAllOutputs()[j][patternIndex] == 1.0) { patternClass = j; break; } } // Increment number of patterns in that class prob_j[patternClass]++; } // calculate real probabilities for (int i=0; i<nofoutputs; i++) prob_j[i] = prob_j[i]/nofpatterns; // Calculate impurities as 2*SUM(SUM( Cost*p(j|node)*p(k|node))) for each class // This can be replaced using 2*SUM(SUM(Cost*p(j|node)*p(k|node))) for j>k double impurities = 0f; for (int j=0; j<nofoutputs -1; j++) { for (int k=j+1; k<nofoutputs; k++) { impurities += cost*prob_j[j]*prob_j[k]; } } impurities = 2*impurities; /* TODO Alternative way of compute impurities double info = 1.0; for (int j=0; j<nofoutputs;j++) info += -Math.pow(prob_j[j],2.0d); */ // Return impurities return impurities; //return info; } }