/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 J. Alcal�-Fdez (jalcala@decsai.ugr.es) A. Fern�ndez (alberto.fernandez@ujaen.es) S. Garc�a (sglopez@ujaen.es) F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * * File: RSTData.java * * RSTData utility class * * @author Written by Joaqu�n Derrac (University of Granada) 13/11/2008 * @version 1.0 * @since JDK1.5 * */ package keel.Algorithms.RST_Learning; import java.util.Arrays; import keel.Algorithms.RST_Learning.Operators; public class RSTData { private static double data[][]; private static int discrete[][]; private static double prob[][][]; private static double stdDev []; private static double mean []; private static int output[]; private static int FS[]; private static int featuresSelected; private static int IS[]; private static int instancesSelected; private static int instances; private static int features; private static int classes; private static double normalizeFactor; private static double alpha; private static boolean nominal []; private static boolean marked [][]; private static int markedF; private static int avc[][][]; private static int av[][]; private static int numValues[]; private static int maxNumValues; public static void setNumValues(){ numValues=new int [features]; for(int i=0;i<features;i++){ numValues[i]=0; } } public static void setNumValue(int value, int position){ numValues[position]=value; maxNumValues=Math.max(maxNumValues, value); } public static void setNominal(boolean vec []){ nominal= new boolean[features]; for(int i=0;i <features;i++){ nominal[i]=vec[i]; } discretize(); computeAVC(); } /** * Loads the training data into the classifier * * @param newData Data represented with continuous values */ public static void setData(double newData[][]){ instances = newData.length; features = newData[0].length; data = new double [instances][features]; for(int i=0;i<instances;i++){ for(int j=0;j<features;j++){ data[i][j]=newData[i][j]; } } FS = new int [features]; IS = new int [instances]; stdDev = new double [features]; mean = new double [features]; Arrays.fill(FS, 1); featuresSelected=features; Arrays.fill(IS, 1); instancesSelected=instances; normalizeFactor=1.0; marked = new boolean [features][instances]; computeStd(); }//end-method private static void computeAVC(){ avc= new int [features][maxNumValues][classes]; av=new int [features][maxNumValues]; int aux; //calcular AVC for(int i=0;i<features;i++){ for(int j=0;j<maxNumValues;j++){ for(int k=0;k<classes;k++){ avc[i][j][k]=0; } av[i][j]=0; } } for(int i=0;i<instances;i++){ aux=output[i]; for(int j=0;j<features;j++){ if(nominal[j]){ avc[j][discrete[i][j]][aux]++; av[j][discrete[i][j]]++; } } } computeProbabilities(); } private static void computeProbabilities(){ double aux; double val1,val2; double max=0.0; prob=new double[features][maxNumValues][maxNumValues]; for(int att=0; att<features; att++){ if(nominal[att]){ for(int i=0;i<maxNumValues;i++){ prob[att][i][i]=0.0; for(int j=i+1;j<maxNumValues;j++){ aux=0.0; for(int c=0;c<classes;c++){ if((av[att][i]!=0)&&(av[att][j]!=0)){ val1=(double)avc[att][i][c]/(double)av[att][i]; val2=(double)avc[att][j][c]/(double)av[att][j]; aux+=Math.abs(val1-val2); } } prob[att][i][j]=aux; prob[att][j][i]=aux; if(max<prob[att][i][j]){ max=prob[att][i][j]; } } } } } //normalize prob matrix for(int att=0; att<features; att++){ for(int i=0;i<maxNumValues;i++){ for(int j=i+1;j<maxNumValues;j++){ prob[att][i][j]/=max; prob[att][j][i]/=max; } } } String texto; for(int att=0;att<features;att++){ texto=""; for(int i=0;i<maxNumValues;i++){ for(int j=0;j<maxNumValues;j++){ texto+=prob[att][i][j]+" "; } texto+="\n"; } //System.out.println(texto); } } private static void discretize(){ discrete= new int[instances][features]; //cada instancia se multiplica for(int j=0;j<features;j++){ if(nominal[j]){ for(int i=0;i<data.length;i++){ discrete[i][j]=discrete(data[i][j],j); } } } } private static int discrete(double val, int att){ int res; res=(int)(val*(double)(numValues[att]-1)); return res; } /** * Loads the training output into the classifier * * @param newOutput Output attribute of the training data */ public static void setOutput(int newOutput[]){ output=new int [data.length]; System.arraycopy(newOutput,0,output, 0, data.length); }//end-method private static void computeStd(){ for(int i=0;i<features;i++){ stdDev[i]=0.0; mean[i]=0.0; } for(int i=0;i<features;i++){ for(int j=0;j<instances;j++){ mean[i]+=data[j][i]; } mean[i]/=(double)instancesSelected; } for(int i=0;i<features;i++){ for(int j=0;j<instances;j++){ stdDev[i]+=((data[j][i]-mean[i])*(data[j][i]-mean[i])); } stdDev[i]/=instancesSelected-1; stdDev[i]=Math.sqrt(stdDev[i]); } } private static double similarity(double a, double b, int att){ double std; double first, second; double dist; if(nominal[att]){ if(a==b){ dist=0.0; }else{ dist=prob[att][discrete(a,att)][discrete(b,att)]; } return 1.0-dist; } else{ std=stdDev[att]; first = (a-b+std)/std; second = (b-a+std)/std; return Math.max(Math.min(first, second),0); } } public static double instanceSimilarity(int a, int b){ double value=1.0; for(int i=0;i<features;i++){ if(FS[i]==1){ //value=Operators.TNormLukasiewicz(value,similarity(data[a][i],data[b][i],i)); value=Operators.TNormMin(value, similarity(data[a][i],data[b][i],i)); //value=Operators.TNormProd(value, similarity(data[a][i],data[b][i],i)); if (value == 0.0){ break; } } } return value; } public static boolean isAttribute(int att){ if(FS[att]==0){ return false; } return true; } public static void setAttribute(int att){ if(FS[att]==0){ FS[att]=1; featuresSelected++; } } public static void setClasses(int val){ classes=val; } public static void unsetAttribute(int att){ if(FS[att]==1){ FS[att]=0; featuresSelected--; } } public static void setAttributes(int [] atts){ featuresSelected=0; for(int i=0;i<features;i++){ FS[i]=atts[i]; if(atts[i]==1){ featuresSelected++; } } } public static void setInstances(int [] ins){ instancesSelected=0; for(int i=0; i< instances;i++){ IS[i]=ins[i]; if(ins[i]==1){ instancesSelected++; } } } public static int [] getAttributes(){ int newFS []; newFS= new int [FS.length]; for(int i=0;i<FS.length;i++){ newFS[i]=FS[i]; } return newFS; } public static void clearAttributes(){ featuresSelected=0; Arrays.fill(FS, 0); } public static double computeGamma(){ double lower; double current; double average=0.0; for(int i=0;i<instances;i++){ if(IS[i]==1){ lower=1.0; for(int j=0;j<instances;j++){ if((IS[i]==1)&&(output[i]!=output[j])){ current=instanceSimilarity(i,j); current=1.0-current; lower=Math.min(current, lower); if (lower == 0.0){ break; } } } average+=lower; } } average/=instancesSelected; average/=normalizeFactor; return average; } private static double computeInnerGamma(){ double lower; double current; double average=0.0; for(int i=0;i<instances;i++){ if(IS[i]>0){ if(IS[i]==2){ average+=1.0; } else{ lower=1.0; for(int j=0;j<instances;j++){ if((IS[i]>0)&&(output[i]!=output[j])){ current=1.0-instanceSimilarity(i,j); lower=Math.min(current, lower); if (lower == 0.0){ break; } } } average+=lower; if (lower == 1.0){ marked[markedF][i]= true; } } } } average/=instancesSelected; average/=normalizeFactor; return average; } public static void setNormalization(double value){ normalizeFactor=value; } public static int getnFeatures(){ return featuresSelected; } public static void computeBestFeatures(){ double best=-1.0; int selected; double newGamma, gamma; clearAttributes(); gamma=0.0; for(int i=0;i<features;i++){ Arrays.fill(marked[i],false); } do{ selected=-1; for(int i=0;i<features;i++){ if(FS[i]==0){ FS[i]=1; markedF=i; newGamma=computeInnerGamma(); FS[i]=0; if((newGamma>=best)){ best=newGamma; selected=i; } } } if(selected>-1){ FS[selected]=1; for(int i=0;i<instances;i++){ if(marked[selected][i]){ IS[i]=2; } } featuresSelected++; gamma=best; } else{ break; } }while((gamma<alpha)&&(featuresSelected<features)); for(int i=0;i<instances;i++){ if(IS[i]==2){ IS[i]=1; } } } public static void setAlpha(double value){ alpha=value; } public static void setAllInstances(){ Arrays.fill(IS, 1); } } //end-class