/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.LQD.methods.FGFS_costInstances;
import java.util.Vector;
/**
*
* File: missing.java
*
* Properties and functions when the variables have missing values.
* The missing values are replaced by the mean, more frequent or
* the mean of the minimum and maximum
*
* @author Written by Ana Palacios Jimenez (University of Oviedo) 25/006/2010
* @version 1.0
*/
public class missing {
static fuzzy[][] values_missing(fuzzy X[][],int nejemplos,int dimx, int m)
{
Vector<Float> frequent_iz= new Vector<Float>();
Vector<Float> frequent_der= new Vector<Float>();
Vector<Float> frequent_centerb= new Vector<Float>();
Vector<Float> frequent_centerc= new Vector<Float>();
for(int i=0;i<dimx;i++)
{
float mean_iz=0;
float mean_der=0;
float mean_centerb=0;
float mean_centerc=0;
int cont=0;
frequent_iz.clear();
frequent_der.clear();
frequent_centerb.clear();
frequent_centerc.clear();
for(int j=0;j<nejemplos;j++)
{
if(X[j][i].geta()!=Main.MISSING && X[j][i].getb()!=Main.MISSING && X[j][i].getd()!=Main.MISSING)
{
mean_iz=mean_iz+X[j][i].geta();
mean_der=mean_der+X[j][i].getd();
mean_centerb=mean_centerb+X[j][i].getb();
mean_centerc=mean_centerc+X[j][i].getc();
cont++;
frequent_iz.add(X[j][i].geta());
frequent_der.add(X[j][i].getd());
frequent_centerb.add(X[j][i].getb());
frequent_centerc.add(X[j][i].getc());
}
}
//Check with is the variable more frequent
int max_ant_iz=0;
float variable_iz=0;
int max_ant_der=0;
float variable_der=0;
int max_ant_ceb=0;
float variable_ceb=0;
int max_ant_cec=0;
float variable_cec=0;
for(int f=0;f<frequent_centerb.size();f++)
{
int max_ceb=1;
for(int t=0;t<frequent_centerb.size();t++)
{
if(t!=f)
{
if(frequent_centerb.get(t).compareTo(frequent_centerb.get(f))==0)
{
max_ceb++;
}
}
}
if(max_ceb>max_ant_ceb)
{
max_ant_ceb=max_ceb;
variable_ceb=frequent_centerb.get(f);
}
}
for(int f=0;f<frequent_centerc.size();f++)
{
int max_cec=1;
for(int t=0;t<frequent_centerc.size();t++)
{
if(t!=f)
{
if(frequent_centerc.get(t).compareTo(frequent_centerc.get(f))==0)
{
max_cec++;
}
}
}
if(max_cec>max_ant_cec)
{
max_ant_cec=max_cec;
variable_cec=frequent_centerc.get(f);
}
}
for(int f=0;f<frequent_iz.size();f++)
{
int max_min=1;
int max_max=1;
for(int t=0;t<frequent_iz.size();t++)
{
if(t!=f)
{
if(frequent_iz.get(t).compareTo(frequent_iz.get(f))==0)
{
max_min++;
}
if(frequent_der.get(t).compareTo(frequent_der.get(f))==0)
{
max_max++;
}
}
}
if(max_min>max_ant_iz)
{
max_ant_iz=max_min;
variable_iz=frequent_iz.get(f);
}
if(max_max>max_ant_der)
{
max_ant_der=max_max;
variable_der=frequent_der.get(f);
}
}
mean_iz=mean_iz/cont;
mean_der=mean_der/cont;
mean_centerc=mean_centerc/cont;
mean_centerb=mean_centerb/cont;
//if the value of the variable is missing we replace this value for the mean
for(int j=0;j<nejemplos;j++)
{
if(X[j][i].es_crisp()==1 && X[j][i].geta()==Main.MISSING)
{
if(m==1)
{
X[j][i].setizd(mean_iz);
X[j][i].setcenti(mean_centerb);
X[j][i].setcentd(mean_centerc);
X[j][i].setdere(mean_der);
}
else
{
X[j][i].setizd(variable_iz);
X[j][i].setcenti(variable_ceb);
X[j][i].setcentd(variable_cec);
X[j][i].setdere(variable_der);
}
}
}
}
return X;
}
static Vector<Integer> values_missing_test(float X[][],int nejemplos,int dimx)
{
//We indicate which example are eliminated for having missing values
Vector<Integer> eliminate = new Vector<Integer>();
for(int j=0;j<nejemplos;j++)
{
for(int i=0;i<dimx;i++)
{
if(X[j][i]==-1)
{
eliminate.add(j);
i=dimx;
}
}
}
return eliminate;
}
}