/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* SETRED.java Isaac Triguero Velazquez. Created by Isaac Triguero Velazquez 11-1-2011 Copyright (c) 2008 __MyCompanyName__. All rights reserved. */ package keel.Algorithms.Semi_Supervised_Learning.SETRED; import keel.Algorithms.Semi_Supervised_Learning.Basic.NormalDistribution; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerator; import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype; import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Semi_Supervised_Learning.*; import java.util.*; import keel.Algorithms.Semi_Supervised_Learning.utilities.*; import keel.Algorithms.Semi_Supervised_Learning.utilities.KNN.*; import keel.Dataset.Attributes; import org.core.*; import org.core.*; import java.util.StringTokenizer; /** * This class implements the Self-traning wrapper. You can use: Knn, C4.5, SMO and Ripper as classifiers. * @author triguero * */ public class SETREDGenerator extends PrototypeGenerator { /*Own parameters of the algorithm*/ private int numberOfselectedExamples; private int MaxIter; private String classifier; private double threshold; protected int numberOfPrototypes; // Particle size is the percentage protected int numberOfClass; /** Parameters of the initial reduction process. */ private String[] paramsOfInitialReducction = null; /** * Build a new SETREDGenerator Algorithm * @param t Original prototype set to be reduced. * @param perc Reduction percentage of the prototype set. */ public SETREDGenerator(PrototypeSet _trainingDataSet, int neigbors,int poblacion, int perc, int iteraciones, double c1, double c2, double vmax, double wstart, double wend) { super(_trainingDataSet); algorithmName="SETRED"; } /** * Build a new SETREDGenerator Algorithm * @param t Original prototype set to be reduced. * @param unlabeled Original unlabeled prototype set for SSL. * @param params Parameters of the algorithm (only % of reduced set). */ public SETREDGenerator(PrototypeSet t, PrototypeSet unlabeled, PrototypeSet test, Parameters parameters) { super(t,unlabeled, test, parameters); algorithmName="SETRED"; this.numberOfselectedExamples = parameters.getNextAsInt(); this.MaxIter = parameters.getNextAsInt(); this.threshold = parameters.getNextAsDouble(); //Last class is the Unknown this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size(); System.out.print("\nIsaacSSL dice: " + this.numberOfselectedExamples+ ", "+ this.numberOfClass +"\n"); } public long factorial (int x){ long factor =1; if(x!=0){ for (int i=1; i<=x ; i++){ factor*=i; } } return factor; } public double bernuilli(double prob, int n, int x){ double bernuilli = 1; double f1 =factorial(n-x), f2 = factorial(x), f3=factorial(n); if(f1!=0 && f2!=0 && f3!=0){ bernuilli *= f3/(f2*f1); bernuilli *= Math.pow(prob, x) * Math.pow(1-prob, n-x); }else{ bernuilli=1; } return bernuilli; } /** * Apply the SelfTrainingGenerator method. * @return */ public Pair<PrototypeSet, PrototypeSet> applyAlgorithm() { System.out.print("\nThe algorithm SELF TRAINING is starting...\n Computing...\n"); PrototypeSet labeled; PrototypeSet unlabeled; labeled = new PrototypeSet(trainingDataSet.getAllDifferentFromClass(this.numberOfClass)); // Selecting labeled prototypes from the training set. unlabeled = new PrototypeSet(trainingDataSet.getFromClass(this.numberOfClass)); // Accuracy with initial labeled data. System.out.println("AccTrs with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.transductiveDataSet,1)*100./this.transductiveDataSet.size()); System.out.println("AccTst with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.testDataSet,1)*100./this.testDataSet.size()); //labeled.print(); //unlabeled.print(); System.out.println("Labeled size = " +labeled.size()); System.out.println("Unlabeled size = " + unlabeled.size()); // kj is the number of prototypes added from class j, that it must be propornotional to its ratio. double kj[] = new double[this.numberOfClass]; double proportion[] = new double[this.numberOfClass]; double minimo = Double.MAX_VALUE; for(int i=0; i<this.numberOfClass; i++){ if(labeled.getFromClass(i).size() == 0){ proportion[i] = 0; }else{ proportion[i] = (labeled.getFromClass(i).size()*1./labeled.size()); } if(proportion[i]<minimo && proportion[i]!=0){ minimo = proportion[i]; } //System.out.println(kj[i]); } double maximoKj = 0; // The minimum ratio is establish to this.numberOfselectedExamples for(int i=0; i<this.numberOfClass; i++){ kj[i] = Math.round(proportion[i]/minimo); maximoKj+=kj[i]; // System.out.println(kj[i]); } for (int i=0; i<this.MaxIter && unlabeled.size()>maximoKj; i++){ //For each class, we select the nearest unlabeled example. PrototypeSet labeledPrima = new PrototypeSet(); double confidence[][] = new double[unlabeled.size()][this.numberOfClass]; boolean condicionFIN = false; double contadorClase[] = new double[this.numberOfClass]; Arrays.fill(contadorClase, 0); while(!condicionFIN){ for (int q=0; q<unlabeled.size(); q++){ // for each unlabeled. Prototype NearClass[] = new Prototype[this.numberOfClass]; double sumatoria = 0; for (int j=0 ; j< this.numberOfClass; j++){ if(labeled.getFromClass(j).size() >0){ NearClass[j] = new Prototype (labeled.getFromClass(j).nearestTo(unlabeled.get(q))); confidence[q][j] = Math.exp(-1*(Distance.absoluteDistance(NearClass[j], unlabeled.get(q)))); sumatoria+= confidence[q][j]; }else{ confidence[q][j] =0; } } for (int j=0 ; j< this.numberOfClass; j++){ confidence[q][j]/=sumatoria; } } // selecting best kj[j] prototypes. // determine who are the best prototypes PrototypeSet best[] = new PrototypeSet[this.numberOfClass]; double maximoClase[] = new double[this.numberOfClass]; int indexClase[] = new int[this.numberOfClass]; Arrays.fill(maximoClase, Double.MIN_VALUE); for (int q=0; q<unlabeled.size(); q++){ // for each unlabeled. for (int j=0 ; j< this.numberOfClass; j++){ if(confidence[q][j]> maximoClase[j]){ maximoClase[j] = confidence[q][j]; indexClase[j] = q; } } } for (int j=0 ; j< this.numberOfClass; j++){ if(contadorClase[j]< kj[j]){ Prototype nearUnlabeled = new Prototype(unlabeled.get(indexClase[j])); Prototype clase = labeled.nearestTo(nearUnlabeled); nearUnlabeled.setFirstOutput(clase.getOutput(0)); labeledPrima.add(new Prototype(nearUnlabeled)); contadorClase[(int)clase.getOutput(0)]++; } } //Then we have to clean the unlabeled have to clean. for (int j=0 ; j< labeledPrima.size(); j++){ unlabeled.removeWithoutClass(labeledPrima.get(j)); } condicionFIN = true; //System.out.println(contadorClase[0]); for(int j=0; j< this.numberOfClass && condicionFIN; j++){ if(contadorClase[j] >= kj[j]){ condicionFIN = true; }else{ condicionFIN = false; } } if (unlabeled.size()< maximoKj){ condicionFIN = true; } } // END CONDITION /* for (int j=0; j<this.numberOfClass; j++){ for (int k=0; k< kj[j]; k++){ Prototype nearUnlabeled = new Prototype(unlabeled.nearestTo(labeled.getFromClass(j))); Prototype clase = labeled.nearestTo(nearUnlabeled); unlabeled.removeWithoutClass(nearUnlabeled); //First, You have to clean. if(clase.getOutput(0)==j){ nearUnlabeled.setFirstOutput(j); labeledPrima.add(nearUnlabeled); } } }// END For each class */ PrototypeSet labeledUnion = new PrototypeSet(labeled.clone()); labeledUnion.add(labeledPrima); //System.out.println("Labeled size = " +labeled.size()); // System.out.println("Unlabeled size = " + unlabeled.size()); //Now, SETRED applies a Data editing technique. //Construction of a neighborhood graph boolean adjacencia [][] = new boolean[labeledUnion.size()][labeledUnion.size()]; for(int l=0; l<labeledUnion.size(); l++){ Arrays.fill(adjacencia[l], false); } //Calculing all the distances: double dist[][] = new double[labeledUnion.size()][labeledUnion.size()]; for(int p=0; p<labeledUnion.size(); p++){ for(int q=0; q<labeledUnion.size(); q++){ if(p!=q){ dist[p][q]=Distance.absoluteDistance(labeledUnion.get(q), labeledUnion.get(p)); } } } //Build a neighborhood graph for(int p=0; p<labeledUnion.size(); p++){ for(int q=0; q<labeledUnion.size(); q++){ if(p!=q){ boolean edge = true; for(int n=0; n<labeledUnion.size() && edge; n++){ if(n!=p && n!=q){ if(dist[p][q]> Math.max(dist[p][n], dist[q][n])){ edge = false; } } } adjacencia[p][q] = edge; } } } //End Graph-Construcction. // For each prototype of L' //weights are 1/(1+dist[p][q]) // In kj[i] we have the proportion of examples of this class. double sumCutEdge[] = new double[labeledPrima.size()]; double sumCutEdgeCuadrado[] = new double[labeledPrima.size()]; double expectation[] = new double[labeledPrima.size()]; double variance[] = new double[labeledPrima.size()]; double observation[] = new double[labeledPrima.size()]; double Z[] = new double[labeledPrima.size()]; double p_value[] = new double[labeledPrima.size()]; int cont =labeledPrima.size()-1; // System.out.println("Número a añadir= "+ labeledPrima.size()); for(int p=labeledUnion.size()-1; p>=(labeledUnion.size()-labeledPrima.size()); p--){ sumCutEdge[cont] = 0; sumCutEdgeCuadrado[cont] = 0; int adjacentes =0; // Calcular Vecindario. Y cutEdges. for(int q=0; q<labeledUnion.size(); q++){ if(adjacencia[p][q]){ // if this instance belongs to its neighborhood adjacentes++; if(labeledUnion.get(p).getOutput(0)!=labeledUnion.get(q).getOutput(0)){ sumCutEdge[cont] += 1./(1+dist[p][q]); sumCutEdgeCuadrado[cont] += (1./(1+dist[p][q])) * (1./(1+dist[p][q])); // System.out.println("Tengo cut-edges"); } } } // adjacentes tiene el tamaño del vecindario. int contador =0; // to determine the number in the neighborhood for(int q=0; q<labeledUnion.size(); q++){ if(adjacencia[p][q]){ // if this instance belongs to its neighborhood contador++; // 1- // System.out.println("Bernuilii -> proportion = " + (1-proportion[(int)labeledUnion.get(p).getOutput(0)])+", N = "+ adjacentes + ", X= " +contador); observation[cont] += (1./(1+dist[p][q]))* bernuilli(1-proportion[(int)labeledUnion.get(p).getOutput(0)], adjacentes, contador); //*Ibernuilli } } // System.out.println("SymCut Edge ->"+ sumCutEdge[cont]); expectation[cont] = sumCutEdge[cont]*(1.-proportion[(int)labeledUnion.get(p).getOutput(0)]); variance[cont] = sumCutEdgeCuadrado[cont]* proportion[(int)labeledUnion.get(p).getOutput(0)]* (1-proportion[(int)labeledUnion.get(p).getOutput(0)]); Z[cont] = (observation[cont]-expectation[cont])/ Math.sqrt(variance[cont]); // System.out.println("Z ->"+ Z[cont]); NormalDistribution normal = new NormalDistribution(); normal.setMean(expectation[cont]); normal.setSigma(Math.sqrt(variance[cont])); p_value[cont] = normal.getTipifiedProbability(Z[cont], false); // System.out.println("P-value= "+ normal.getTipifiedProbability(Z[cont], false)); cont--; } for(int l=0; l<labeledPrima.size(); l++){ if(p_value[l]>this.threshold){ labeled.add(labeledPrima.get(l)); } /*else{ System.out.println("No lo Añado, está a la izquierda"); }*/ } //System.out.println("Labeled size = "+labeled.size()); //System.out.println("UNLabeled size = "+unlabeled.size()); } System.out.println("Labeled size = "+labeled.size()); System.out.println("UNLabeled size = "+unlabeled.size()); PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone()); PrototypeSet test = new PrototypeSet(this.testDataSet.clone()); //We have to return the classification done. for(int i=0; i<this.transductiveDataSet.size(); i++){ tranductive.get(i).setFirstOutput((labeled.nearestTo(this.transductiveDataSet.get(i))).getOutput(0)); } for(int i=0; i<this.testDataSet.size(); i++){ test.get(i).setFirstOutput((labeled.nearestTo(this.testDataSet.get(i))).getOutput(0)); } // Transductive Accuracy System.out.println("AccTrs ="+KNN.classficationAccuracy(labeled,this.transductiveDataSet,1)*100./this.transductiveDataSet.size()); // test accuracy System.out.println("AccTst ="+KNN.classficationAccuracy(labeled,this.testDataSet,1)*100./this.testDataSet.size()); return new Pair<PrototypeSet,PrototypeSet>(tranductive,test); } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename which contains the test data set. * 3: Seed of the random number generator. Always. * ************************** * @param args Arguments of the main function. */ public static void main(String[] args) { } }