/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* Democratic.java Isaac Triguero Velazquez. Created by Isaac Triguero Velazquez 11-1-2011 Copyright (c) 2008 __MyCompanyName__. All rights reserved. */ package keel.Algorithms.Semi_Supervised_Learning.Democratic; import keel.Algorithms.Semi_Supervised_Learning.Basic.C45.*; import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerNB; import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerSMO; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerator; import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Semi_Supervised_Learning.Basic.Utilidades; import keel.Algorithms.Semi_Supervised_Learning.*; import java.util.*; import keel.Algorithms.Semi_Supervised_Learning.utilities.*; import keel.Algorithms.Semi_Supervised_Learning.utilities.KNN.*; import keel.Dataset.Attribute; import keel.Dataset.Attributes; import keel.Dataset.InstanceAttributes; import keel.Dataset.InstanceSet; import org.core.*; import java.util.StringTokenizer; /** * This class implements the Co-traning wrapper. You can use: Knn, C4.5, SMO and as classifiers. * @author triguero * */ public class DemocraticGenerator extends PrototypeGenerator { /*Own parameters of the algorithm*/ protected int numberOfClassifiers = 3; protected int numberOfPrototypes; // Particle size is the percentage protected int numberOfClass; /** Parameters of the initial reduction process. */ private String[] paramsOfInitialReducction = null; int pre[][] = new int[this.numberOfClassifiers][]; double [][][] probabilities = new double[this.numberOfClassifiers][][]; double average[] = new double[this.numberOfClassifiers]; double deviation[] = new double[this.numberOfClassifiers]; double li[] = new double[this.numberOfClassifiers]; double hi[] = new double[this.numberOfClassifiers]; double wi[] = new double[this.numberOfClassifiers]; /** * Build a new DemocraticGenerator Algorithm * @param t Original prototype set to be reduced. * @param perc Reduction percentage of the prototype set. */ public DemocraticGenerator(PrototypeSet _trainingDataSet, int neigbors,int poblacion, int perc, int iteraciones, double c1, double c2, double vmax, double wstart, double wend) { super(_trainingDataSet); algorithmName="Democratic"; } /** * Build a new DemocraticGenerator Algorithm * @param t Original prototype set to be reduced. * @param unlabeled Original unlabeled prototype set for SSL. * @param params Parameters of the algorithm (only % of reduced set). */ public DemocraticGenerator(PrototypeSet t, PrototypeSet unlabeled, PrototypeSet test, Parameters parameters) { super(t,unlabeled, test, parameters); algorithmName="Democratic"; this.numberOfClassifiers = parameters.getNextAsInt(); //Last class is the Unknown this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size(); // System.out.print("\nIsaacSSL dice: " + this.numberOfClass +"\n"); } /** * This methods implement the voting rule in order to classify unlabeled data with the prediction pre[][] * @param unlabeled * @param pre * @return */ double [] votingRule(PrototypeSet unlabeled, int pre[][]){ double predicho[] = new double[unlabeled.size()]; for(int i=0; i< unlabeled.size(); i++){ // voting rule double perClass[] = new double [this.numberOfClass]; Arrays.fill(perClass, 0); for(int j=0; j< this.numberOfClassifiers; j++){ if(pre[j][i]!=-1) perClass[(int) pre[j][i]]++; } int Maximo = Integer.MIN_VALUE; for (int j=0 ; j< this.numberOfClass; j++){ if(perClass[j]>Maximo){ Maximo =(int) perClass[j]; predicho[i] = j; } } } // End voting Rule return predicho; } public void getSolicitaGarbageColector(){ try{ // System.out.println( "********** INICIO: 'LIMPIEZA GARBAGE COLECTOR' **********" ); Runtime basurero = Runtime.getRuntime(); // System.out.println( "MEMORIA TOTAL 'JVM': " + basurero.totalMemory() ); // System.out.println( "MEMORIA [FREE] 'JVM' [ANTES]: " + basurero.freeMemory() ); basurero.gc(); //Solicitando ... // System.out.println( "MEMORIA [FREE] 'JVM' [DESPUES]: " + basurero.freeMemory() ); //System.out.println( "********** FIN: 'LIMPIEZA GARBAGE COLECTOR' **********" ); } catch( Exception e ){ e.printStackTrace(); } } /** * Classify and calculing intervals of confidence * @param train * @param test * @throws Exception */ public double[] clasificar(PrototypeSet train[], PrototypeSet test) throws Exception{ double predicho[] = new double[test.size()]; for (int i=0; i<this.numberOfClassifiers; i++){ getSolicitaGarbageColector(); if(i%3==0){ // 3NN // System.out.println("Executing KNN"); pre[i] = KNN.classify(train[i], test, 3, probabilities[i]); }else if(i%3==1){ // NB //System.out.println("Executing NB"); HandlerNB nb = new HandlerNB(train[i].prototypeSetTodouble(), train[i].prototypeSetClasses(), test.prototypeSetTodouble(), test.prototypeSetClasses(),this.numberOfClass); pre[i] = nb.getPredictions(); probabilities[i] = nb.getProbabilities(); nb = null; }else if(i%3==2){ //C45 //System.out.println("Executing C45"); InstanceSet uno = train[i].toInstanceSet(); InstanceSet dos = test.toInstanceSet(); C45 c45 = new C45(train[i].toInstanceSet(), test.toInstanceSet()); // C4.5 called pre[i] = c45.getPredictions(); probabilities[i] = c45.getProbabilities(); uno = null; dos = null; c45 = null; } } for (int i=0; i< this.numberOfClassifiers; i++){ average[i]=0; deviation[i]=0; for(int z=0; z<test.size(); z++){ int clase = pre[i][z]; if(clase!=-1) average[i] += probabilities[i][z][clase]; } average[i] /= test.size(); // System.out.println("Average = "+i+" es "+ average[i]); for(int z=0; z<test.size(); z++){ int clase = pre[i][z]; if(clase!=-1) deviation[i] += probabilities[i][z][clase]-average[i]; } deviation[i] /= test.size(); deviation[i] = Math.sqrt(Math.abs(deviation[i])); //System.out.println("Deviation = "+i+" es "+ deviation[i]); } for (int i=0; i< this.numberOfClassifiers; i++){ // computing the 95% -conf int. [li,hi] for each classifier // Calculating the average and standard deviation. li[i] = average[i] -1.96*(deviation[i]/Math.sqrt(test.size())); hi[i] = average[i] +1.96*(deviation[i]/Math.sqrt(test.size())); wi[i] = (li[i]+hi[i])/2.; } predicho = votingRule(test, pre); // in predicho we have the possible label, but we have to contrast this information with the confidence level. getSolicitaGarbageColector(); return predicho; } /** * Apply the DemocraticGenerator method with 3 classifiers: C45, NB, and 3NN * * @return */ public Pair<PrototypeSet, PrototypeSet> applyAlgorithm() throws Exception { System.out.print("\nThe algorithm Democratic-CoLearning is starting...\n Computing...\n"); PrototypeSet labeled; PrototypeSet unlabeled; labeled = new PrototypeSet(trainingDataSet.getAllDifferentFromClass(this.numberOfClass)); // Selecting labeled prototypes from the training set. unlabeled = new PrototypeSet(trainingDataSet.getFromClass(this.numberOfClass)); // establishing the indexes for (int j=0; j< labeled.size();j++){ labeled.get(j).setIndex(j); } for (int j=0; j< unlabeled.size();j++){ unlabeled.get(j).setIndex(j); } // In order to avoid problems with C45 and NB. for(int p=0; p<unlabeled.size(); p++){ unlabeled.get(p).setFirstOutput(0); // todos con un valor válido. } PrototypeSet Li[] = new PrototypeSet[this.numberOfClassifiers]; PrototypeSet LiP[] = new PrototypeSet[this.numberOfClassifiers]; double Ei[] = new double[this.numberOfClassifiers]; double EiP[] = new double[this.numberOfClassifiers]; probabilities = new double[this.numberOfClassifiers][unlabeled.size()][this.numberOfClass]; for(int i=0; i<this.numberOfClassifiers; i++){ Li[i] = new PrototypeSet(labeled.clone()); // labeled data for Ai Ei[i] = 0; // estimate for # mislabeled exs in Li } PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone()); PrototypeSet test = new PrototypeSet(this.testDataSet.clone()); double traPrediction[] = null; double tstPrediction[] = null; int aciertoTrs = 0; int aciertoTst = 0; probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass]; traPrediction = clasificar(Li, tranductive); int pertenece[][] = new int[tranductive.size()][this.numberOfClass]; double wii[] = new double[this.numberOfClass]; for(int i=0; i<tranductive.size(); i++){ Arrays.fill(pertenece[i], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][i]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[i][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[i][j]+0.5)/ (pertenece[i][j]+1) * ((wii[j])/pertenece[i][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } traPrediction[i]=clase; if(tranductive.get(i).getOutput(0) == traPrediction[i]){ aciertoTrs++; } tranductive.get(i).setFirstOutput(traPrediction[i]); } // Test phase probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass]; tstPrediction = clasificar(Li, test); pertenece = new int[test.size()][this.numberOfClass]; for(int i=0; i<test.size(); i++){ Arrays.fill(pertenece[i], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][i]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[i][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[i][j]+0.5)/ (pertenece[i][j]+1) * ((wii[j])/pertenece[i][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } tstPrediction[i]=clase; if(test.get(i).getOutput(0) == tstPrediction[i]){ aciertoTst++; } test.get(i).setFirstOutput(tstPrediction[i]); } // System.out.println("Initial-Labeled size "+ Li[1].size()); // System.out.println("Initial % de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size()); // System.out.println("Initial % de acierto TST = "+ (aciertoTst*100.)/testDataSet.size()); boolean changes = true; while(changes){ changes = false; double predicho[] = new double[unlabeled.size()]; probabilities = new double[this.numberOfClassifiers][unlabeled.size()][this.numberOfClass]; predicho = clasificar(Li, unlabeled); // Choose which exs to propose for labeling for (int i=0; i< this.numberOfClassifiers; i++){ LiP[i] = new PrototypeSet(); // data proposed for adding to Li } // for(int i=0; i< this.numberOfClassifiers; i++){ // System.out.println(wi[i]); //} for(int j=0; j<unlabeled.size(); j++){ // For each unlabeld data. // is the sum of the mean confidence values of the learners in the majority group is greater than the sum of the mean confidence values in the minority group?? double sumWi[] = new double[this.numberOfClass]; Arrays.fill(sumWi, 0); for(int i=0; i< this.numberOfClassifiers; i++){ if(pre[i][j]!=-1) sumWi[pre[i][j]] += wi[i]; } /* for(int i=0; i< this.numberOfClass; i++){ System.out.println(sumWi[i]); } System.out.println("******************************************"); */ // Calculate the maximum condifence with different label to PREDICHO. double Max = Double.MIN_VALUE; for(int i=0; i<this.numberOfClass; i++){ if(i!= (int) predicho[j]){ // different from labeld predicho. if(sumWi[i]> Max){ Max = sumWi[i]; } } } if(sumWi[(int)predicho[j]]> Max){ // second condition to label. for(int i=0; i< this.numberOfClassifiers; i++){ // if the classifier i does not label this X unlabeled as predicho[j], adding in Li. if(pre[i][j]!=predicho[j]){ Prototype anadir = new Prototype(unlabeled.get(j)); anadir.setFirstOutput(predicho[j]); LiP[i].add(anadir); // System.out.println("PREVIOUSAdding"); } } } } // Estimate if adding Li' to Li improves the accuracy // Re-classifying and recalculing confidence interval. PrototypeSet prueba[] = new PrototypeSet[this.numberOfClassifiers]; for (int i=0; i<this.numberOfClassifiers; i++){ PrototypeSet aux = new PrototypeSet(Li[i].clone()); aux.add(LiP[i].clone()); prueba[i] = new PrototypeSet(aux.clone()); } clasificar(prueba, unlabeled); //double sumli[] = new double[this.numberOfClassifiers]; double sumli=0; for (int i=0; i< this.numberOfClassifiers; i++){ // computing the 95% -conf int. [li,hi] for each classifier sumli+=li[i]; //sumli[i] =average[i] -1.96*(deviation[i]/Math.sqrt(unlabeled.size()));; ///sumli[i]/= this.numberOfClass; } sumli/=this.numberOfClass; double qi[] = new double[this.numberOfClassifiers]; double qiP[] = new double[this.numberOfClassifiers]; for (int i=0; i< this.numberOfClassifiers ; i++){ if(LiP[i].size() !=0){ qi[i] = Li[i].size() * Math.pow((1-2*(Ei[i]/Li[i].size())), 2); EiP[i] = LiP[i].size() * (1.-sumli); qiP[i] = (Li[i].size()+LiP[i].size())*(1.-((2.*(Ei[i]+EiP[i]))/(Li[i].size()+LiP[i].size()))); // System.out.println("qi -> "+qi[i]); // System.out.println("qiP -> + " + qiP[i]); if(qiP[i]> qi[i] && Li[i].size()<unlabeled.size()){ // System.out.println("Adding"); changes = true; Li[i].add(LiP[i].clone()); Ei[i] = Ei[i]+ EiP[i]; //****************** tranductive = new PrototypeSet(this.transductiveDataSet.clone()); test = new PrototypeSet(this.testDataSet.clone()); traPrediction = null; tstPrediction = null; aciertoTrs = 0; aciertoTst = 0; probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass]; traPrediction = clasificar(Li, tranductive); pertenece = new int[tranductive.size()][this.numberOfClass]; wii= new double[this.numberOfClass]; for(int m=0; m<tranductive.size(); m++){ Arrays.fill(pertenece[m], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][m]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[m][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[m][j]+0.5)/ (pertenece[m][j]+1) * ((wii[j])/pertenece[m][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } traPrediction[m]=clase; if(tranductive.get(m).getOutput(0) == traPrediction[m]){ aciertoTrs++; } tranductive.get(m).setFirstOutput(traPrediction[m]); } // Test phase probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass]; tstPrediction = clasificar(Li, test); pertenece = new int[test.size()][this.numberOfClass]; for(int m=0; m<test.size(); m++){ Arrays.fill(pertenece[m], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][m]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[m][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[m][j]+0.5)/ (pertenece[m][j]+1) * ((wii[j])/pertenece[m][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } tstPrediction[m]=clase; if(test.get(m).getOutput(0) == tstPrediction[m]){ aciertoTst++; } test.get(m).setFirstOutput(tstPrediction[m]); } // System.out.println("update-Labeled size "+ Li[i].size()); // System.out.println("update-% de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size()); // System.out.println("update-% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size()); } } // System.out.println("Li[i] size = "+ Li[i].size()); // System.out.println("LiP[i] size = "+ LiP[i].size()); } } // End while no change! // Combining stage. /* PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone()); PrototypeSet test = new PrototypeSet(this.testDataSet.clone()); double traPrediction[] = null; double tstPrediction[] = null; int aciertoTrs = 0; int aciertoTst = 0; */ tranductive = new PrototypeSet(this.transductiveDataSet.clone()); test = new PrototypeSet(this.testDataSet.clone()); traPrediction = null; tstPrediction = null; aciertoTrs = 0; aciertoTst = 0; probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass]; traPrediction = clasificar(Li, tranductive); pertenece = new int[tranductive.size()][this.numberOfClass]; wii= new double[this.numberOfClass]; for(int i=0; i<tranductive.size(); i++){ Arrays.fill(pertenece[i], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][i]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[i][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[i][j]+0.5)/ (pertenece[i][j]+1) * ((wii[j])/pertenece[i][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } traPrediction[i]=clase; if(tranductive.get(i).getOutput(0) == traPrediction[i]){ aciertoTrs++; } tranductive.get(i).setFirstOutput(traPrediction[i]); } // Test phase probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass]; tstPrediction = clasificar(Li, test); pertenece = new int[test.size()][this.numberOfClass]; for(int i=0; i<test.size(); i++){ Arrays.fill(pertenece[i], 0); Arrays.fill(wii,0); for(int j=0; j< this.numberOfClassifiers; j++){ for (int z=0; z < this.numberOfClass; z++){ if(pre[j][i]==z && wi[j]>0.5){ // Allocate this calssifier y group Gj. pertenece[i][z]++; wii[z]+=wi[j]; } } } double countGj[] = new double[this.numberOfClass]; double max= Double.MIN_VALUE; int clase =0; for(int j=0; j< this.numberOfClass; j++){ /* Compute group averge mean confidence */ countGj[j] = (pertenece[i][j]+0.5)/ (pertenece[i][j]+1) * ((wii[j])/pertenece[i][j]); if(countGj[j]>max){ max = countGj[j]; clase = j; } } tstPrediction[i]=clase; if(test.get(i).getOutput(0) == tstPrediction[i]){ aciertoTst++; } test.get(i).setFirstOutput(tstPrediction[i]); } // System.out.println("Labeled size "+ Li[1].size()); System.out.println("% de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size()); System.out.println("% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size()); // tranductive.print(); // tranductive.save("outputDemocratic.dat"); return new Pair<PrototypeSet,PrototypeSet>(tranductive,test); } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename which contains the test data set. * 3: Seed of the random number generator. Always. * ************************** * @param args Arguments of the main function. */ public static void main(String[] args) { } }