/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.LQD.preprocess.Fuzzy_SMOTE; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.Vector; /** * * File: Main.java * * Read the parameters given by the user. * Read the training file * Analysis the classes to apply the preprocessing method. This * method is based in SMOTE but now the inputs have meta-information * and the outputs are imprecise (set of values). M and N can be obtained * with the preprocessing method or can be indicate by the user * Replace the missing values for the mean * * @author Written by Ana Palacios Jimenez (University of Oviedo) 25/006/2010 * @version 1.0 */ public class Main { /** * @param args * @throws IOException */ static float MISSING=-2; public static void main(String[] args) throws IOException { //Read the parameters of the method. This parameters are in one .txt String parameters = args[0]; parameters par = new parameters(args[0]); int cont=0; /*100 ficheros bootstrap*/ while(cont<par.files) { //copy the test file String ntest=par.original_data+cont+"tst.dat";// FileWriter ftest= new FileWriter(par.OutputName+cont+"tst.dat"); BufferedReader test = new BufferedReader(new FileReader(ntest)); while(test.ready()) ftest.write(test.readLine()+"\n"); ftest.close(); test.close(); //Files String ninput=par.original_data+cont+"tra.dat";// System.out.println("\n Input File: "+ninput); FileWriter fs1= new FileWriter(par.OutputName+cont+"tra.dat"); FileWriter inst= new FileWriter(par.OutputName+"Instances"+cont+".txt"); int dimx=par.dimx; //Number of variables int ncol=dimx+1; //Number of columns in the input file int instances=par.instances; //Number of instances int nclasses = par.nclasses; File fe = new File(ninput); if(fe.exists()==false) { System.out.println("The file does not exist"); System.exit(0); } BufferedReader input = new BufferedReader(new FileReader(ninput)); Character character =null; //save the variables of each example Vector <Vector<fuzzy>> X = new Vector <Vector<fuzzy>>(); Vector<Vector<Float>> C= new Vector<Vector<Float>>();//the output will be a set of elements Vector<Vector<Float>> count_classes = new Vector<Vector<Float>>(); String number= ""; //Read the input file int lines=1+dimx+4; for(int i=0;i<lines;i++) fs1.write(input.readLine()+"\n"); // boolean leido=false; for(int i=0; i<instances; i++) { Vector<fuzzy> atributos =new Vector<fuzzy>(); for(int j=0; j<ncol-1; j++) { //if(leido==false) character = (char)input.read(); while(character!=']' && character!='\n' && character!='?') { number= number + character; character = (char)input.read(); } if(character==']' || character=='?') { number= number + character; character = (char)input.read();//read , if(number.compareTo("?")==0) { fuzzy nuevo= new fuzzy(); nuevo.borrosotriangular(MISSING, MISSING, MISSING); atributos.addElement(nuevo); } else { atributos.addElement(fun_aux.to_fuzzy(number)); } number=""; if(atributos.get(j).geta()>atributos.get(j).getd()) { System.out.println("Incorrect values in the file: Values of the style [4,1]"+atributos.get(j).geta()+ " "+atributos.get(j).getd()); System.exit(0); } } } //for the variables X.addElement(atributos); //Read the classes of the instance {1,..,x} (imprecise output) character = (char)input.read();//read { Vector <Float> salidas_imp= new Vector<Float>(); while(character!='}') { character = (char)input.read();//begin with a number while(character!=',' && character!='}') { number= number + character; character = (char)input.read(); } salidas_imp.addElement(Float.parseFloat(number)); number=""; } C.add(i,salidas_imp); if(salidas_imp.size()==1) { //System.out.println("the class is "+C.get(i).get(0)); if(count_classes.size()==0) { Vector<Float> first = new Vector<Float>(); first.addElement(C.get(i).get(0)); first.addElement((float)1); count_classes.addElement(first); } else { boolean existe=false; for(int cnt=0;cnt<count_classes.size();cnt++) { if(count_classes.get(cnt).get(0).compareTo(C.get(i).get(0))==0) { existe=true; Vector<Float> cambio = count_classes.get(cnt); cambio.set(1,cambio.get(1)+1); count_classes.set(cnt, cambio); } } if(existe==false) { Vector<Float> first = new Vector<Float>(); first.addElement(C.get(i).get(0)); first.addElement((float)1); count_classes.addElement(first); } } }//if the count of classes character = (char)input.read();//\n after } number=""; }//for read file input.close(); //The variables unknown will be replaced by the average X= missing.values_missing(X, instances, dimx,1); //Read the minority classes Vector<Float> minority= new Vector<Float>(); Vector<Integer> N= new Vector<Integer>(); //Sort the classes according to the number of ocurrences Vector<Float> temporal= new Vector<Float>(); for(int cnt=0;cnt<count_classes.size();cnt++) { for(int j=0;j<count_classes.size();j++) { if(count_classes.get(j).get(1)>count_classes.get(cnt).get(1))//es decir si j<i { temporal = count_classes.get(cnt); count_classes.set(cnt,count_classes.get(j)); count_classes.set(j,temporal); } } } if(par.N.compareTo("[-1]")==0) //we look for the minority class { for(int cnt=0;cnt<count_classes.size();cnt++) { minority.addElement(count_classes.get(cnt).get(0)); } } else //the expert indicates the minority classes [0,1,2] { String contenido = par.N.substring(1, par.N.length()-1); int inicio=0; int posicion = contenido.indexOf(","); while(posicion!=-1) { minority.addElement(Float.parseFloat(contenido.substring(inicio,posicion).toString())); inicio=posicion+1; posicion = contenido.indexOf(",",posicion+1); } minority.addElement(Float.parseFloat(contenido.substring(inicio,contenido.length()).toString())); } if(par.M.compareTo("[-1]")==0) //we look for number of the new instances { for(int cnt=0;cnt<count_classes.size();cnt++) { if(cnt==count_classes.size()-1) N.addElement(1); else { float valor=count_classes.get(count_classes.size()-1).get(1)/count_classes.get(cnt).get(1); N.addElement((int)(valor+1)); } } } else //the expert indicates the numbers of new instances { String contenido = par.M.substring(1, par.M.length()-1); int inicio=0; int posicion = contenido.indexOf(","); while(posicion!=-1) { N.addElement(Integer.parseInt(contenido.substring(inicio,posicion).toString())); inicio=posicion+1; posicion = contenido.indexOf(",",posicion+1); } N.addElement(Integer.parseInt(contenido.substring(inicio,contenido.length()).toString())); } //Read the k neighbour int k = par.k; Vector<Vector<fuzzy>> M= new Vector<Vector<fuzzy>>(); for(int i=0;i<minority.size();i++) { M.clear(); for(int e=0;e<X.size();e++) { for(int classes=0;classes<C.get(e).size();classes++) { if(C.get(e).get(classes).compareTo(minority.get(i))==0) { M.addElement(X.get(e)); } } } //For each minority class we obtain N more for(int min=0;min<M.size();min++) { //Calculate the k neighbour to each minority instance //Calculate the distance between this instances and the rest of instances of M Vector<Integer> distancias_vecinos = distance(M,min); for(int replicas=0;replicas<N.get(i);replicas++) { //Select one k between 0 y k-1 int aleatorio=(int)(0+(float)Math.random()*k); int elegido = distancias_vecinos.get(aleatorio); //Create the new instances from the actual (M) and k Vector<fuzzy> sintetico = new Vector<fuzzy>(); for(int atri=0;atri<dimx;atri++) { fuzzy dif= fuzzy.resta(M.get(elegido).get(atri), M.get(min).get(atri)); //dif.show(); float gap =(float)Math.random()*1; //System.out.println(" gap is "+gap); // borroso.multinumber(gap, dif).show(); fuzzy atributo =fuzzy.suma(M.get(min).get(atri),(fuzzy.multinumber(gap, dif))); sintetico.addElement(fuzzy.neg(atributo)); //borroso.suma(M.get(min).get(atri),(borroso.multinumber(gap, dif))).show(); //new BufferedReader(new InputStreamReader(System.in)).readLine(); } Vector<Float> clas_mino = new Vector<Float>(); clas_mino.addElement(minority.get(i)); // Add the new instances a X X.addElement(sintetico); C.addElement(clas_mino); //System.out.println(" after inserting a new instances "); /*for(int s=0;s<X.size();s++) { for(int j=0;j<X.get(s).size();j++) { X.get(s).get(j).show(); } for(int classes=0;classes<C.get(s).size();classes++) { System.out.println(" classes "+C.get(s).get(classes)); } } new BufferedReader(new InputStreamReader(System.in)).readLine();*/ } } }//for the all minority classes //copy the new file inst.write(dimx+"\n"); inst.write(X.size()+"\n"); inst.write(nclasses+""); for(int e=0;e<X.size();e++) { for(int a=0;a<dimx;a++) { fs1.write(fuzzy.fichero(X.get(e).get(a))+","); } fs1.write("{"); for(int classes=0;classes<C.get(e).size();classes++) { if(classes!=0) fs1.write(","); fs1.write(C.get(e).get(classes)+""); } fs1.write("}\n"); } fs1.close(); inst.close(); cont++; } } public static Vector<Integer> distance(Vector<Vector<fuzzy>> M, int i) throws IOException { Vector<fuzzy> distance = new Vector <fuzzy>(); Vector<Integer> instance = new Vector <Integer>(); for(int min=0;min<M.size();min++) { if(min!=i) { fuzzy sumatorio= new fuzzy(0); for(int a=0;a<M.get(min).size();a++) { fuzzy resta = fuzzy.resta(M.get(i).get(a),M.get(min).get(a)); sumatorio = fuzzy.suma(sumatorio, fuzzy.pow(fuzzy.abs(resta), 2)); } sumatorio = fuzzy.pow(sumatorio,(float)0.5); distance.addElement(sumatorio); instance.addElement(min); } //new BufferedReader(new InputStreamReader(System.in)).readLine(); } //Sort the distances fuzzy temporal = new fuzzy(); int eje; for(int dis=0;dis<distance.size();dis++) { for (int j = dis+1; j < distance.size(); j++) { if(Ranking.wang(distance.get(dis),distance.get(j))==0) { temporal = distance.get(dis); distance.set(dis,distance.get(j)); distance.set(j,temporal); eje= instance.get(dis); instance.set(dis, instance.get(j)); instance.set(j, eje); } //new BufferedReader(new InputStreamReader(System.in)).readLine(); } } return instance; } }