/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Juli�n Luengo Mart�n 28/11/2006 * @version 0.1 * @since JDK 1.5 * </p> */ package keel.Algorithms.Preprocess.Missing_Values.kmeansImpute; import java.io.*; import java.util.*; import keel.Dataset.*; /** * <p> * This class represents a group of centers (centroids) of a set of clusters * </p> */ public class gCenter { String[][] gravCenters; int numCenters; int[] centerOf; int ndatos; /** Creates a new instance of gCenter */ public gCenter() { gravCenters = null; centerOf = null; numCenters = 0; ndatos = 0; } /** * <p> * Creates a new instance of gCenter with provided number of centers, number of instances of the * data set and number of attributes * </p> * @param k Fixed number of centroids * @param ndatos number of instances in the data set related to this object * @param nvariables number of attributes */ public gCenter(int k, int ndatos, int nvariables) { gravCenters = new String[k][nvariables]; numCenters = k; centerOf = new int[ndatos]; this.ndatos = ndatos; } /** * <p> * Computes the distance between a instances (without previous normalization) and * one clusters (i.e. its centroid). * </p> * @param i The reference instance * @param k The cluster number * @return The Euclidean distance between i and k */ public double distance(Instance i, int k) { double dist = 0; int in = 0; int out = 0; int tipo = 0; int direccion = 0; int nvariables; nvariables = Attributes.getNumAttributes(); for (int l = 0; l < nvariables; l++) { Attribute a = Attributes.getAttribute(l); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !i.getInputMissingValues(in) && gravCenters[k][l].compareTo("<null>")!= 0) { // real value, apply euclidean distance dist += Math.sqrt((i.getInputRealValues(in) - (new Double( gravCenters[k][l]).doubleValue())) * (i.getInputRealValues(in) - (new Double( gravCenters[k][l]).doubleValue()))); } else { if (!i.getInputMissingValues(in) && i.getInputNominalValues(in) != gravCenters[k][l]) dist += 1; } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !i.getOutputMissingValues(out)) { dist += (i.getOutputRealValues(out) - (new Double( gravCenters[k][l]).doubleValue())) * (i.getOutputRealValues(out) - (new Double( gravCenters[k][l]).doubleValue())); } else { if (!i.getOutputMissingValues(out) && i.getOutputNominalValues(out) != gravCenters[k][l]) dist += 1; } out++; } } } return dist; } /** * <p> * Computes the nearest cluster to the given instance * </p> * @param inst The instance we are interested to compare * @return The index of the nearest cluster */ public int nearestCenter(Instance inst) { int nearest = 0; double minDist = this.distance(inst, 0); double distAct; int in = 0; int out = 0; int tipo = 0; int direccion = 0; for (int k = 1; k < numCenters; k++) { distAct = this.distance(inst, k); if (distAct < minDist) { minDist = distAct; nearest = k; } } return nearest; } /** * <p> * this function initializes a center with the values of a given instance. * </p> * @param i the initialization instance * @param c the index of the cluster to be initialized */ public void copyCenter(Instance i, int c) { int in = 0; int out = 0; int tipo = 0; int direccion = 0; int nvariables; nvariables = Attributes.getNumAttributes(); for (int l = 0; l < nvariables; l++) { Attribute a = Attributes.getAttribute(l); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !i.getInputMissingValues(in)) { // real value, apply euclidean distance gravCenters[c][l] = String .valueOf(i.getInputRealValues(in)); } else { if (!i.getInputMissingValues(in)) gravCenters[c][l] = i.getInputNominalValues(in); else{ gravCenters[c][l] = "<null>"; } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !i.getOutputMissingValues(out)) { gravCenters[c][l] = String.valueOf(i .getOutputRealValues(out)); } else { if (!i.getOutputMissingValues(out)) { gravCenters[c][l] = i.getOutputNominalValues(out); } else{ gravCenters[c][l] = "<null>"; } } out++; } } } } /** * <p> * Recalculates all the centroids using a given InstanceSet, in order to reduce the * total sum of distances for each object to the centroid of the cluster, which the object belongs to * </p> * @param IS The reference InstanceSet */ public void recalculateCenters(InstanceSet IS) { int[][] nInst; double tmp; Instance i; int c; int in = 0; int out = 0; int tipo = 0; int direccion = 0; int nvariables; FreqList[][] modes; String[][] oldGC; nvariables = Attributes.getNumAttributes(); modes = new FreqList[numCenters][nvariables]; nInst = new int[numCenters][nvariables]; oldGC = gravCenters; gravCenters = new String[numCenters][nvariables]; for (int a = 0; a < numCenters; a++) { for (int b = 0; b < nvariables; b++) { nInst[a][b] = 0; gravCenters[a][b] = "a"; modes[a][b] = new FreqList(); } } for (int m = 0; m < ndatos; m++) { i = IS.getInstance(m); c = this.getClusterOf(m); in = 0; out = 0; for (int l = 0; l < nvariables; l++) { Attribute a = Attributes.getAttribute(l); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !i.getInputMissingValues(in)) { nInst[c][l]++; if(gravCenters[c][l].compareTo("a") == 0) gravCenters[c][l] = new String("0"); tmp = new Double(gravCenters[c][l]).doubleValue(); tmp += i.getInputRealValues(in); gravCenters[c][l] = String.valueOf(tmp); } else { if (!i.getInputMissingValues(in)){ modes[c][l].AddElement(i.getInputNominalValues(in)); nInst[c][l]++; } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !i.getOutputMissingValues(out)) { nInst[c][l]++; if(gravCenters[c][l].compareTo("a") == 0) gravCenters[c][l] = new String("0"); tmp = new Double(gravCenters[c][l]).doubleValue(); tmp += i.getOutputRealValues(out); gravCenters[c][l] = String.valueOf(tmp); } else { if (!i.getOutputMissingValues(out)) { nInst[c][l]++; modes[c][l].AddElement(i .getOutputNominalValues(out)); } } out++; } } } for (int l = 0; l < nvariables; l++) { Attribute a = Attributes.getAttribute(l); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (tipo == Attribute.NOMINAL) { if(modes[c][l].numElems() > 0){ gravCenters[c][l] = (modes[c][l].mostCommon()).getValue(); } else{ //what do we do if no valid value is available among the instances of this cluster for this attribute? //gravCenters[c][l] = new String("<null>"); //instead of the previous solution, lets leave the old attribute in the centroid as is gravCenters[c][l] = oldGC[c][l]; } } } } // compute the means for real attributes for (int b = 0; b < nvariables; b++) { Attribute at = Attributes.getAttribute(b); tipo = at.getType(); if (tipo != Attribute.NOMINAL) { for (int a = 0; a < numCenters; a++) { if(gravCenters[a][b].compareTo("a") != 0){ tmp = new Double(gravCenters[a][b]).doubleValue(); tmp = tmp / nInst[a][b]; gravCenters[a][b] = String.valueOf(tmp); } else{//what do we do if no valid value is available among the instances of this cluster for this attribute? //gravCenters[a][b] = new String("<null>"); //instead of the previous solution, lets leave the old attribute in the centroid as is gravCenters[a][b] = oldGC[a][b]; } } } } } /** * <p> * Updates the cluster membership of the instance to the nearest cluster * </p> * @param i The considered instance * @param orderOf_i The index of the instance i */ public void setClusterOf(Instance i, int orderOf_i) { centerOf[orderOf_i] = this.nearestCenter(i); } /** * <p> * Returns the cluster to which the given instance belongs to * </p> * @param orderOf_i The index of the instance * @return The index of the cluster to this isntance belongs to. */ public int getClusterOf(int orderOf_i) { return centerOf[orderOf_i]; } /** * <p> * Get the value of an attribute of the indicated centroid * </p> * @param cluster The index of the cluster (centroid) * @param position The attribute (dimension) to be obtained * @return the current value of the dimension of the given cluster */ public String valueAt(int cluster, int position) { return gravCenters[cluster][position]; } }