/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Genetic_Rule_Learning.M5Rules; /** * Class for handling the impurity values when spliting the instances */ public final class Impurity { double n; // number of total instances int attr; // splitting attribute double nl; // number of instances in the left group double nr; // number of instances in the right group double sl; // sum of the left group double sr; // sum of the right group double s2l; // squared sum of the left group double s2r; // squared sum of the right group double sdl; // standard deviation of the left group double sdr; // standard deviation of the right group double vl; // variance of the left group double vr; // variance of the right group double sd; // overall standard deviation double va; // overall variance double impurity; // impurity value; int order; // order = 1, variance; order = 2, standard deviation; order = 3, the cubic root of the variance; // order = k, the k-th order root of the variance /** * Constructs an Impurity object containing the impurity values of partitioning the instances using an attribute * @param partition the index of the last instance in the left subset * @param attribute the attribute used in partitioning * @param inst instances * @param k the order of the impurity; =1, the variance; =2, the stardard deviation; =k, the k-th order root of the variance */ public Impurity(int partition, int attribute, MyDataset inst, int k) { StatisticsStore values = new StatisticsStore(0, inst.numItemsets() - 1, inst.getClassIndex(), inst); attr = attribute; n = inst.numItemsets(); sd = values.sd; va = values.va; values = new StatisticsStore(0, partition, inst.getClassIndex(), inst); nl = partition + 1; sl = values.sum; s2l = values.sqrSum; values = new StatisticsStore(partition + 1, inst.numItemsets() - 1, inst.getClassIndex(), inst); nr = inst.numItemsets() - partition - 1; sr = values.sum; s2r = values.sqrSum; order = k; this.incremental(0, 0); } /** * Converts an Impurity object to a string * @return the converted string */ public final String toString() { StringBuffer text = new StringBuffer(); text.append("Print impurity values:\n"); text.append(" Number of total instances:\t" + n + "\n"); text.append(" Splitting attribute:\t\t" + attr + "\n"); text.append(" Number of the instances in the left:\t" + nl + "\n"); text.append(" Number of the instances in the right:\t" + nr + "\n"); text.append(" Sum of the left:\t\t\t" + sl + "\n"); text.append(" Sum of the right:\t\t\t" + sr + "\n"); text.append(" Squared sum of the left:\t\t" + s2l + "\n"); text.append(" Squared sum of the right:\t\t" + s2r + "\n"); text.append(" Standard deviation of the left:\t" + sdl + "\n"); text.append(" Standard deviation of the right:\t" + sdr + "\n"); text.append(" Variance of the left:\t\t" + vr + "\n"); text.append(" Variance of the right:\t\t" + vr + "\n"); text.append(" Overall standard deviation:\t\t" + sd + "\n"); text.append(" Overall variance:\t\t\t" + va + "\n"); text.append(" Impurity (order " + order + "):\t\t" + impurity + "\n"); return text.toString(); } /** * Incrementally computes the impurity values * @param value the incremental value * @param type if type=1, value will be added to the left subset; type=-1, to the right subset; type=0, initializes */ public final void incremental(double value, int type) { double y = 0., yl = 0., yr = 0.; switch (type) { case 1: nl += 1; nr -= 1; sl += value; sr -= value; s2l += value * value; s2r -= value * value; break; case -1: nl -= 1; nr += 1; sl -= value; sr += value; s2l -= value * value; s2r += value * value; break; case 0: break; default: M5.errorMsg("wrong type in Impurity.incremental()."); } if (nl <= 0.0) { vl = 0.0; sdl = 0.0; } else { vl = (nl * s2l - sl * sl) / ((double) nl * ((double) nl)); vl = Math.abs(vl); sdl = Math.sqrt(vl); } if (nr <= 0.0) { vr = 0.0; sdr = 0.0; } else { vr = (nr * s2r - sr * sr) / ((double) nr * ((double) nr)); vr = Math.abs(vr); sdr = Math.sqrt(vr); } if (order <= 0) { M5.errorMsg( "Impurity order less than zero in Impurity.incremental()"); } else if (order == 1) { y = va; yl = vl; yr = vr; } else { y = Math.pow(va, 1. / order); yl = Math.pow(vl, 1. / order); yr = Math.pow(vr, 1. / order); } if (nl <= 0.0 || nr <= 0.0) { impurity = 0.0; } else { impurity = y - ((double) nl / (double) n) * yl - ((double) nr / (double) n) * yr; } } }