/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.LQD.preprocess.Expert; import java.util.Vector; /** * * File: missing.java * * Properties and functions when the variables have missing values. * The missing values are replaced by the mean, more frequent or * the mean of the minimum and maximum * * @author Written by Ana Palacios Jimenez (University of Oviedo) 25/006/2010 * @version 1.0 */ public class missing { static Vector<Vector<fuzzy>> values_missing(Vector<Vector<fuzzy>> X,int nejemplos,int dimx, int m) { Vector<Float> frequent_iz= new Vector<Float>(); Vector<Float> frequent_der= new Vector<Float>(); Vector<Float> frequent_centrob= new Vector<Float>(); Vector<Float> frequent_centroc= new Vector<Float>(); for(int i=0;i<dimx;i++) { float mean_iz=0; float mean_der=0; float mean_centrob=0; float mean_centroc=0; float min=1000; float max=-1; int contador=0; frequent_iz.clear(); frequent_der.clear(); frequent_centrob.clear(); frequent_centroc.clear(); for(int j=0;j<nejemplos;j++) { if(X.get(j).get(i).geta()!=Main.MISSING && X.get(j).get(i).getb()!=Main.MISSING && X.get(j).get(i).getd()!=Main.MISSING) { mean_iz=mean_iz+X.get(j).get(i).geta(); mean_der=mean_der+X.get(j).get(i).getd(); mean_centrob=mean_centrob+X.get(j).get(i).getb(); mean_centroc=mean_centroc+X.get(j).get(i).getc(); contador++; frequent_iz.add(X.get(j).get(i).geta()); frequent_der.add(X.get(j).get(i).getd()); frequent_centrob.add(X.get(j).get(i).getb()); frequent_centroc.add(X.get(j).get(i).getc()); if(X.get(j).get(i).geta()<min) min=X.get(j).get(i).geta(); if(X.get(j).get(i).getd()>max) max=X.get(j).get(i).getd(); } } int max_ant_iz=0; float variable_iz=0; int max_ant_der=0; float variable_der=0; int max_ant_ceb=0; float variable_ceb=0; int max_ant_cec=0; float variable_cec=0; for(int f=0;f<frequent_centrob.size();f++) { int max_ceb=1; for(int t=0;t<frequent_centrob.size();t++) { if(t!=f) { if(frequent_centrob.get(t).compareTo(frequent_centrob.get(f))==0) { max_ceb++; } } } if(max_ceb>max_ant_ceb) { max_ant_ceb=max_ceb; variable_ceb=frequent_centrob.get(f); } } for(int f=0;f<frequent_centroc.size();f++) { int max_cec=1; for(int t=0;t<frequent_centroc.size();t++) { if(t!=f) { if(frequent_centroc.get(t).compareTo(frequent_centroc.get(f))==0) { max_cec++; } } } if(max_cec>max_ant_cec) { max_ant_cec=max_cec; variable_cec=frequent_centroc.get(f); } } for(int f=0;f<frequent_iz.size();f++) { int max_min=1; int max_max=1; for(int t=0;t<frequent_iz.size();t++) { if(t!=f) { if(frequent_iz.get(t).compareTo(frequent_iz.get(f))==0) { max_min++; } if(frequent_der.get(t).compareTo(frequent_der.get(f))==0) { max_max++; } } } if(max_min>max_ant_iz) { max_ant_iz=max_min; variable_iz=frequent_iz.get(f); } if(max_max>max_ant_der) { max_ant_der=max_max; variable_der=frequent_der.get(f); } } mean_iz=mean_iz/contador; mean_der=mean_der/contador; mean_centroc=mean_centroc/contador; mean_centrob=mean_centrob/contador; for(int j=0;j<nejemplos;j++) { if(X.get(j).get(i).es_crisp()==1 && X.get(j).get(i).geta()==-2) { if(m==1) { X.get(j).get(i).setizd(mean_iz); X.get(j).get(i).setcenti(mean_centrob); X.get(j).get(i).setcentd(mean_centroc); X.get(j).get(i).setdere(mean_der); } else if (m==2) { X.get(j).get(i).setizd(min); X.get(j).get(i).setcenti(min); X.get(j).get(i).setcentd(max); X.get(j).get(i).setdere(max); } else { X.get(j).get(i).setizd(variable_iz); X.get(j).get(i).setcenti(variable_ceb); X.get(j).get(i).setcentd(variable_cec); X.get(j).get(i).setdere(variable_der); } } } } return X; } }