/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.LQD.preprocess.Prelabelling; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Random; import java.util.Vector; import java.io.FileWriter; /** * * File: Main.java * * Apply a prelabelling method, to get that the semi-labelled or unlabelled instances * have only one class. If the example is semi-labelled the expert in the field * is given meta-information about the possible class of the example * * @author Written by Ana Palacios Jimenez (University of Oviedo) 25/006/2010 * @version 1.0 */ public class Main { /** * @param args * @throws IOException */ static float MISSING=-2; public static void main(String[] args) throws IOException { //Read the parameters of the method. This parameters are in one .txt String parameters = args[0]; parameters par = new parameters(args[0]); //Files String ninput=par.original_data+".dat";// System.out.println("\n Input File: "+ninput); FileWriter fs1= new FileWriter(par.OutputName+".dat"); int numlabels=par.partitions; //Number of partitions for the low quality variables int alfa = par.alfa; //number of alfa cuts int dimx=par.dimx; //Number of variables int ncol=dimx+1; //Number of columns in the input file int instances=par.instances; //Number of instances int nclasses = par.nclasses; File fe = new File(ninput); if(fe.exists()==false) { System.out.println("The file does not exist"); System.exit(0); } BufferedReader input = new BufferedReader(new FileReader(ninput)); Character character =null; fuzzy X[][] = new fuzzy[instances][dimx]; //Value of the features in each instance Vector<Vector<fuzzy>> L= new Vector<Vector<fuzzy>>(); //The output will be a set of elements (labelled) Vector<Vector<fuzzy>> U= new Vector<Vector<fuzzy>>(); //The output will be a set of elementes (semi-labelled or unlabelled) Vector<fuzzy> P= new Vector<fuzzy>(); //Cost of the instances Vector<Vector<Float>> C= new Vector<Vector<Float>>(instances); //The output will be a set of elements (classes of instances) Vector<Float> CL= new Vector<Float>(); //Classes of labelled instances Vector<Vector<Float>> CU= new Vector<Vector<Float>>(); //Classes of semi-labelled or unlabelled instances interval rangoL[]= new interval[dimx]; //Minimum and maximum of each variable for(int j=0; j<ncol-1; j++) { interval nuevo= new interval(-1,-1); rangoL[j]= nuevo; } String numero= ""; //Read the input file int lines=1+dimx+4; for(int i=0;i<lines;i++) input.readLine(); boolean leido=false; for(int i=0; i<instances; i++) { for(int j=0; j<ncol-1; j++) { if(leido==false) character = (char)input.read(); //System.out.print( "the character read is "+caracter ); while(character!=']' && character!='\n' && character!='?') { numero= numero + character; character = (char)input.read(); } if(character==']' || character=='?') { numero= numero + character; character = (char)input.read();//read , if(numero.compareTo("?")==0) { fuzzy nuevo= new fuzzy(); nuevo.borrosotriangular(MISSING, MISSING, MISSING); X[i][j]= nuevo; } else { X[i][j]=fun_aux.to_fuzzy(numero); } numero=""; if(X[i][j].geta()>X[i][j].getd()) { System.out.println("Incorrect values in the file: Values of the style [4,1]"+X[i][j].geta()+ " "+X[i][j].getd()); System.exit(0); } } } //Read the classes of the instance {1,..,x} (imprecise output) character = (char)input.read();//read { Vector <Float> salidas_imp= new Vector<Float>(); while(character!='}') { character = (char)input.read(); while(character!=',' && character!='}') { numero= numero + character; character = (char)input.read(); } salidas_imp.addElement(Float.parseFloat(numero)); numero=""; } C.add(i,salidas_imp); if(salidas_imp.size()==1) //instances labelled (only one class) { CL.add(salidas_imp.get(0)); Vector<fuzzy> l = new Vector<fuzzy>(); for(int j=0;j<X[i].length;j++) { l.addElement(X[i][j]); if(i==0 || (rangoL[j].getmax()==-1 && rangoL[j].getmin()==-1)) { interval nuevo = new interval(X[i][j].geta(),X[i][j].getd()); rangoL[j]=nuevo; } if(X[i][j].getd() > rangoL[j].getmax()) rangoL[j].setmax(X[i][j].getd()); if(X[i][j].geta() < rangoL[j].getmin()) rangoL[j].setmin(X[i][j].geta()); } L.addElement(l); P.addElement(new fuzzy(1)); } else //instances semi-labelled or unlabelled (several classes or anything) { CU.add(salidas_imp); Vector<fuzzy> u = new Vector<fuzzy>(); for(int j=0;j<X[i].length;j++) { u.addElement(X[i][j]); } U.addElement(u); } character = (char)input.read();//\n after } character = (char)input.read(); if(character.compareTo('[')==0) leido=true; else leido=false; numero=""; }//for read input.close(); //missing values replaced by one interval (minimum of minumim, maximum of maximum) L= missing.values_missing(L, L.size(), dimx,2); U= missing.values_missing(U, U.size(), dimx,2); Vector<Float> values_classes= new Vector<Float>(nclasses); for(int i=0;i<nclasses;i++)//nclases is the number of classes { values_classes.add(par.classes.get(i)); //System.out.println("classes"+valores_clases.get(i)); } float distancia=(float)1/(float)((8*2)+(1*3)); Vector<fuzzy> costs= new Vector<fuzzy>(nclasses); for(int j=0;j<nclasses;j++)//nclases is the number of classes { float iz=0,ce=distancia,ce2=distancia*2,de=distancia*3; // System.out.println("values "+cont+" iz "+iz+" ce "+ce+" ce2 "+ce2+" de "+de); float etiqueta=(par.costs.get(j)); int conta=1; //System.out.println("values "+cont+" iz "+iz+" ce "+ce+" ce2 "+ce2+" de "+de); if(etiqueta>0 && etiqueta<10) { while(conta<etiqueta) { iz=ce2; ce=de; ce2=de+distancia; de=ce2+distancia; //System.out.println("values "+cont+" iz "+iz+" ce "+ce+" ce2 "+ce2+" de "+de); conta++; } } else if(etiqueta==0) { iz=ce=ce2=de=0; } else if(etiqueta>=10) { iz=ce=ce2=de=1; } fuzzy cost = new fuzzy(); cost.borrosotrapezoidal(iz, ce, ce2, de); costs.add(j,cost); } Vector<Float> relevants = ranking(costs,values_classes); float relevant = relevants.get(relevants.size()-1); /* for(int j=0;j<costes.size();j++) { System.out.println( "cost "+j+" "); costs.get(j).show(); }*/ Vector<Vector<fuzzy>> noclassified= new Vector<Vector<fuzzy>>();//the output will be a set of elements while(U.size()!=0) { //Calculate the partitions from L and the number of labels for(int j=0;j<dimx;j++) { if(L.get(L.size()-1).get(j).getd() > rangoL[j].getmax()) rangoL[j].setmax(L.get(L.size()-1).get(j).getd()); if(L.get(L.size()-1).get(j).geta() < rangoL[j].getmin()) rangoL[j].setmin(L.get(L.size()-1).get(j).geta()); } Vector<fuzzypartition> particione = new Vector<fuzzypartition>(dimx); particione=partitions(L,numlabels,dimx,rangoL); fuzzy actual= new fuzzy(0); int setWinnerU=-1; Vector<Float> semi_label = new Vector<Float>(); Vector<fuzzy> setU= new Vector<fuzzy>(); float label=-1; Vector<Vector<fuzzy>> compatible = new Vector<Vector<fuzzy>>(); for(int i=0;i<U.size();i++) { Vector<fuzzy> comp= new Vector<fuzzy>(); comp.addElement(new fuzzy(-1)); if(contain(U.get(i),noclassified)==false) { fuzzyrule regla= new fuzzyrule(particione,U.get(i)); comp.clear(); for(int j=0;j<L.size();j++) { //Membership of U respect to L //System.out.println("Show the instance L "+j); comp.addElement(fuzzy.multi(P.get(j),regla.match_alpha(L.get(j),particione, alfa))); //comp.get(comp.size()-1).show(); //new BufferedReader(new InputStreamReader(System.in)).readLine(); if(Ranking.wang(actual,comp.get(comp.size()-1))==1) //(actual<membership) { //Save U (more compatible) actual=comp.get(comp.size()-1); setWinnerU=i; //System.out.println("The U is " + i); setU=U.get(i); //new BufferedReader(new InputStreamReader(System.in)).readLine(); semi_label= CU.get(i); } } } //end if contain compatible.addElement(comp); }//end if U //Sort L respect to the compatibility of U obtained, setWinnerU if(setWinnerU!=-1) { //System.out.println("We obtain one U compatible with L "+setWinnerU); int equal=1; Vector<Float> Lcolocados=order(CL,compatible.get(setWinnerU)); int k=3; /*if(k==1) { label=Lcolocados.get(0); System.out.println("the label is "+label); } else {*/ for(int j=1;j<k;j++) { // System.out.println("the "+j+" label is "+Lcolocados.get(j)); if(Lcolocados.get(0).compareTo(Lcolocados.get(j))!=0) { equal=0; break; } } if(equal==1) { label=Lcolocados.get(0); //System.out.println("all k are the same "+label); } else //the label es the most freq. { //if(k>2) //{ if(Lcolocados.get(0).compareTo(Lcolocados.get(1))==0 || Lcolocados.get(0).compareTo(Lcolocados.get(2))==0) label=Lcolocados.get(0); else if(Lcolocados.get(1).compareTo(Lcolocados.get(2))==0) label=Lcolocados.get(1); //} } //} //new BufferedReader(new InputStreamReader(System.in)).readLine(); //Look for if the actual label is contained in the semi-labelled, setWinnerU if(semi_label.contains(label)==true) { //System.out.print("the instance semi-labelled contains the label selected"); U.remove(setWinnerU); CU.remove(setWinnerU); /*for(int i=0;i<U.size();i++) { for(int j=0;j<U.get(i).size();j++) { U.get(i).get(j).show(); } for(int j=0;j<CU.get(i).size();j++) { System.out.println("output is "+CU.get(i).get(j)); } } new BufferedReader(new InputStreamReader(System.in)).readLine();*/ L.addElement(setU); CL.addElement(label); //Obtain the cost of the instance int pos=-1; for(int v_clases=0; v_clases<values_classes.size(); v_clases++) { if(values_classes.get(v_clases).compareTo(label)==0) { pos=v_clases; break; } } P.addElement(costs.get(pos)); //System.out.println("the cost of the instances is "); //P.get(P.size()-1).show(); noclassified.clear(); /* for(int i=0;i<L.size();i++) { for(int j=0;j<L.get(i).size();j++) { L.get(i).get(j).show(); } System.out.println("label is "+CL.get(i)+" and the cost "); P.get(i).show(); }*/ // new BufferedReader(new InputStreamReader(System.in)).readLine(); } else { //System.out.println("Insert U in no classified and the U is "); //for (int c=0;c<setU.size();c++) // setU.get(c).show(); //Insert the U in no classified noclassified.addElement(setU); /*System.out.println("All in no classified are"); for (int c1=0;c1<noclassified.size();c1++) { System.out.println("Is "+c1); for (int c2=0;c2<noclassified.get(c1).size();c2++) noclassified.get(c1).get(c2).show(); } new BufferedReader(new InputStreamReader(System.in)).readLine();*/ } } //if we obtain one U compatible with L else //Non U is compatible with L { noclassified.clear(); for (int u=0;u<U.size();u++) { noclassified.addElement(U.get(u)); } //nocompatibilidad=true; } //no classified equal a U --> Non U is compatible with L or the label //selected is different that the provided by the expert (semi-labelled) if(equal(noclassified,U)==true) { L.addElement(U.get(0)); if(semi_label.contains(relevant)==false) { for(int r=relevants.size()-1; r==0;r--) { if(semi_label.contains(relevants.get(r))==true) { relevant=relevants.get(r); break; } } } CL.addElement(relevant); int pos=-1; for(int v_clases=0; v_clases<values_classes.size(); v_clases++) { if(values_classes.get(v_clases).compareTo(relevant)==0) { pos=v_clases; break; } } P.addElement(costs.get(pos)); if(U.size()>1) { U.remove(0); CU.remove(0); } else { U.clear(); CU.clear(); } noclassified.clear(); } //noclassified==U }// del while fs1.write(dimx+"\n"); fs1.write(L.size()+"\n"); fs1.write(nclasses+""); for(int e=0;e<L.size();e++) { fs1.write("\n"); for(int a=0;a<dimx;a++) { fs1.write(fuzzy.fichero(L.get(e).get(a))+" "); } fs1.write("["+P.get(e).a+","+P.get(e).b+","+P.get(e).c+","+P.get(e).d+"]"+" "); fs1.write("{"); fs1.write(CL.get(e)+""); fs1.write("}"); } fs1.close(); //cont++; //} } public static float freq(Vector<Float> CL, Vector<Float> valores) { int cantidad=0; Vector<Integer> cantidades = new Vector<Integer>(valores.size()); for(int j=0;j<valores.size();j++) { cantidades.add(j,0); } for(int i = 0;i <CL.size(); i++) { for(int j=0;j<valores.size();j++) { if(CL.get(i)==valores.get(j)) { cantidades.set(j, cantidades.get(j)+1); break; } } } for (int i = 0; i < cantidades.size(); i++) { for (int j = 0; j < cantidades.size(); j++) { if(cantidades.get(i)<cantidades.get(j)) { cantidad = cantidades.get(i); cantidades.set(i,cantidades.get(j)); cantidades.set(j,cantidad); } } } return valores.get(0); } public static Vector<Float> ranking (Vector<fuzzy> costes, Vector<Float> valores) throws IOException { fuzzy temporal = new fuzzy(); Vector<fuzzy> coste = new Vector<fuzzy>(); Vector<Float> valor = new Vector<Float>(); for(int i=0;i<valores.size();i++) valor.addElement(valores.get(i)); for (int i = 0; i < costes.size(); i++) { coste.addElement(costes.get(i)); } float tem=-1; for (int i = 0; i < coste.size(); i++) { for (int j = 0; j < coste.size(); j++) { if(Ranking.wang(coste.get(i),coste.get(j))==1) { temporal = coste.get(i); coste.set(i,coste.get(j)); coste.set(j,temporal); tem = valor.get(i); valor.set(i,valor.get(j)); valor.set(j,tem); } } } return valor;//.get(valor.size()-1); } public static Vector<Float> order ( Vector<Float> CL, Vector<fuzzy> comp) throws IOException { Vector<Float> Lcolocado= new Vector<Float>(); for(int i=0;i<CL.size();i++) Lcolocado.addElement(CL.get(i)); fuzzy temporal = new fuzzy(); float tem=-1; for (int i = 0; i < comp.size(); i++) { for (int j = 0; j < comp.size(); j++) { if(Ranking.wang(comp.get(i),comp.get(j))==0) { temporal = comp.get(i); comp.set(i,comp.get(j)); comp.set(j,temporal); tem = Lcolocado.get(i); Lcolocado.set(i,Lcolocado.get(j)); Lcolocado.set(j,tem); } } } return Lcolocado; } public static boolean equal(Vector<Vector<fuzzy>> U, Vector<Vector<fuzzy>> noclassified) throws IOException { Vector<Vector<fuzzy>> copiau= new Vector<Vector<fuzzy>>(); for (int i=0;i<U.size();i++) { Vector<fuzzy> conte = new Vector<fuzzy>(); for(int j=0;j<U.get(i).size();j++) { conte.addElement(U.get(i).get(j)); } copiau.addElement(conte); } boolean contenido=true; if(noclassified.size()==0 || copiau.size()!=noclassified.size()) return false; else { for(int i=0;i<noclassified.size();i++) { for(int u=0;u<noclassified.size();u++) { contenido=true; for(int v=0;v<noclassified.get(i).size();v++) { if(copiau.get(u).get(v).equal(noclassified.get(i).get(v))==false) { contenido=false; v=noclassified.get(i).size(); } } if(contenido==true) { copiau.remove(u); } //} if(contenido==true) break; } } } if(copiau.size()==0) return true; else return false; } public static boolean contain(Vector<fuzzy> U, Vector<Vector<fuzzy>> noclassified) { boolean contenido=true; if(noclassified.size()==0) { return false; } else { for(int i=0;i<noclassified.size();i++) { contenido=true; for(int v=0;v<noclassified.get(i).size();v++) { if(U.get(v).equal(noclassified.get(i).get(v))==false) { contenido=false; } } if(contenido==true) return true; } } return contenido; } public static Vector<fuzzypartition> partitions(Vector<Vector<fuzzy>> L, int numlabels, int dimx,interval rangoL[]) { //obtain the partitions int[] fuzzy= new int[dimx+1]; //zero is crisp and one is fuzzy for(int i=0; i<L.get(0).size(); i++) { boolean es_fuzzy=false; for(int j=0; j<L.size(); j++) { if(L.get(j).get(i).geta()!= L.get(j).get(i).getd()) es_fuzzy=true; } if(es_fuzzy==true) fuzzy[i]=1; } fuzzy[fuzzy.length-1]=0;//is the class // Definition of the partitions Vector<Integer> neparticion = new Vector<Integer>(dimx); Vector<fuzzypartition> particione = new Vector<fuzzypartition>(dimx); for(int i=0; i<fuzzy.length-1; i++) { if(fuzzy[i]==1)//is fuzzy { neparticion.add(i, numlabels); } else//is crisp { Vector <Float> variables = new Vector<Float>(); for(int k=0; k<L.size(); k++) { if(k==0) { variables.addElement(L.get(k).get(i).geta()); } else { boolean existe=false; for(int h=0; h<variables.size();h++) { if(variables.get(h)==L.get(k).get(i).geta()) { existe=true; break; } } if(existe==false) { variables.addElement(L.get(k).get(i).geta()); } } }//for if(variables.size()>4 && numlabels!=0) { neparticion.add(i,numlabels); } else neparticion.add(i, variables.size()); }//else crisp fuzzypartition particion= new fuzzypartition(rangoL[i].getmin(),rangoL[i].getmax(),neparticion.get(i)); particione.add(i,particion); }// for obtain partitions return particione; } }