/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* ICPL.java Isaac Triguero Velazquez. Created by Isaac Triguero Velazquez 12-3-09 Copyright (c) 2009 __MyCompanyName__. All rights reserved. */ package keel.Algorithms.Instance_Generation.ICPL; import keel.Algorithms.Instance_Generation.Basic.PrototypeSet; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator; import keel.Algorithms.Instance_Generation.Basic.Prototype; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Instance_Generation.*; import java.util.*; import keel.Algorithms.Instance_Generation.utilities.*; import keel.Algorithms.Instance_Generation.utilities.KNN.*; import org.core.*; import java.util.StringTokenizer; /** * * @param numberOfInitialsCentroids * @param k, to use with knn rule in the initialization. * @param GradientStep * @param Temperature * @author Isaac Triguero * @version 1.0 */ public class ICPLGenerator extends PrototypeGenerator { /*Own parameters of the algorithm*/ private int nAlg; // number of ICPL (1,2,3,4) private String method; // number of filtering (ENN, RT2, ACC) private int k; // for k-nn private int Q; // threshold for ACC filtering //others variables. protected int numberOfPrototypes; protected int numberOfClass; /** * Build a new ICPLGenerator Algorithm * */ public ICPLGenerator(PrototypeSet _trainingDataSet, int nalg, String Method, int k, int Q) { super(_trainingDataSet); algorithmName="ICPL"; this.nAlg = nalg; this.method = Method; this.k = k; this.Q = Q; } /** * Build a new RSPGenerator Algorithm * @param t Original prototype set to be reduced. * @param parameters Parameters of the algorithm (only % of reduced set). */ public ICPLGenerator(PrototypeSet t, Parameters parameters) { super(t, parameters); algorithmName="ICPL"; this.nAlg = parameters.getNextAsInt(); this.method = parameters.getNextAsString(); this.k = parameters.getNextAsInt(); this.Q = parameters.getNextAsInt(); this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size(); System.out.println("Isaac dice: nalg= " + this.nAlg+ " method = " + this.method ); System.out.println("Number of class= "+ this.numberOfClass); } /** * ICPL1_Pseudo-code: * ICPL1( T training) 1) C1= abstraccion T 2) C2 = Filtrar T. 3) S = C1 4) Para cada prototipo P en C2 Tmp = S U P. Good = numero de instancias de T correctamente clasificadas por P en Tmp. Bad = numero de instancias de T mal clasificadas por P en Tmp. Si(Good>Bad) S= S u P. 5) Devolver S. */ protected PrototypeSet icpl1_or_3(int num){ PrototypeSet S; PrototypeSet C1, C2,tmp; double Good, Bad; if(num == 1){ System.out.println("Algorithm ICPL 1"); C1 = TPA(); //C1.print(); C2 = filtering(); }else{ System.out.println("Algorithm ICPL 3"); C2 = TPA(); C1 = filtering(); } S = C1.clone(); for(Prototype p: C2){ tmp = S.addPrototype2(p); //obtain the prototype that has p like the NN. Good = 0; Bad = 0; PrototypeSet neig = trainingDataSet.isTheNearPrototype(p); for(Prototype q: neig){ if(q.getOutput(0) == p.getOutput(0)){ Good++; }else Bad++; } //Good = KNN.classficationAccuracy(tmp, trainingDataSet); //Bad = trainingDataSet.size() - Good; if( Good> Bad){ S.add(p); } } return S; } /** * ICPL2 - pseudo-code * ICPL2( T training) 1) C1= abstracci�n T 2) C2 = Filtrar T. 3) S = C1 4) Para cada prototipo P en C2 Tmp = S U P. Good = n�mero de instancias de T correctamente clasificadas por P en Tmp. Bad = n�mero de instancias de T mal clasificadas por P en Tmp. Si(Good>Bad) S= S U P. 5) Para cada prototipo P en C1 Tmp = S\ P. With= n�mero de instancias de T correctamente clasificadas por S Without = n�mero de instancias de T correctament clasificadas por Tmp. Si(Without>With) S= S \ P. 6) Devolver S. */ protected PrototypeSet icpl2_or_4(int num){ PrototypeSet S = new PrototypeSet(); PrototypeSet C1, C2,tmp; double Good, Bad, With, Without; if(num == 2){ C1 = TPA(); C2 = filtering(); }else{ C2 = TPA(); C1 = filtering(); // Change C2 by C1. } S = new PrototypeSet(C1); // S.print(); System.out.println("C1 size =" + C1.size() + " C2size ="+ C2.size()); Prototype p2 = C1.get(1); Good = 0; Bad = 0; for(Prototype p: C2){ tmp = S.addPrototype2(p); Good = 0; Bad = 0; PrototypeSet neig = trainingDataSet.isTheNearPrototype(p); for(Prototype q: neig){ if(q.getOutput(0) == p.getOutput(0)){ Good++; }else Bad++; } //Good = KNN.classficationAccuracy(tmp, trainingDataSet); //Bad = trainingDataSet.size() - Good; if( Good> Bad){ S.add(p); } } for(Prototype p: C1){ tmp = new PrototypeSet(S.without(p)); With = KNN.classficationAccuracy(S, trainingDataSet); Without = KNN.classficationAccuracy(tmp, trainingDataSet); if( Without>= With){ S = new PrototypeSet(tmp); } } return S; } /** * * @param index * @return */ protected double typicality(int index){ double result =0.0; double avgDifClass=0.0; PrototypeSet sameClass, differentClass; Prototype initial = trainingDataSet.get(index); sameClass = trainingDataSet.getFromClass(initial.getOutput(0)).without(initial); //same class (without the prototype) differentClass = trainingDataSet.getAllDifferentFromClass(initial.getOutput(0)); for(Prototype p: sameClass){ result +=1-Distance.d(initial, p); } result/= sameClass.size(); for(Prototype p: differentClass){ avgDifClass +=1- Distance.d(initial,p); } avgDifClass /= differentClass.size(); //System.out.println("typicality index= "+ index + " = " + result/avgDifClass); return result/avgDifClass; } /** * Identify a border by typicaly. * * @return a boolean vector indicate which prototypes are border prototypes. */ protected boolean[] identifyBorder(PrototypeSet pClass[]){ boolean borders[] = new boolean [trainingDataSet.size()]; double typ[] = new double[trainingDataSet.size()]; double Tmean[], Tsd[]; Arrays.fill(borders, false); for(int i = 0; i< trainingDataSet.size();i++){ //trainingDataSet.get(i).setIndex(i); // Establish the index, useful later. typ[i] = typicality(i); } //pClass = new PrototypeSet[this.numberOfClass]; Tmean = new double[this.numberOfClass]; Tsd = new double[this.numberOfClass]; //Arrays.fill(Tmean, 0); //Arrays.fill(Tsd,0); for(int i=0; i< this.numberOfClass; i++){ pClass[i] = new PrototypeSet(trainingDataSet.getFromClass(i)); Arrays.fill(Tmean, 0); Arrays.fill(Tsd,0); for (int m = 0; m < pClass[i].size(); m++){ //System.out.println(pClass[i].get(m).getIndex()); Lo de los index va de PM Tmean[i] += typ[ pClass[i].get(m).getIndex()]; } Tmean[i] /= pClass[i].size(); for(int m=0; m < pClass[i].size(); m++){ double aux= typ[ pClass[i].get(m).getIndex()] - Tmean[i]; aux *= aux; // ^2 Tsd[i] += aux; } Tsd[i] /= pClass[i].size(); System.out.println("Tmean = "+ Tmean[i] + " Tsd = "+ Tsd[i]); //Sort class C instances in descending order of typicality. // Bubble sort. for (int k = 0; k < pClass[i].size(); k++) { for (int j = 0; j < pClass[i].size()-1; j++) { if( typ[pClass[i].get(j).getIndex()] < typ[pClass[i].get(j+1).getIndex()]){ //here we need the index Prototype aux =pClass[i].get(j); pClass[i].set(j, pClass[i].get(j+1)); pClass[i].set(j+1,aux); } } } } // End for each class C. //for(int i=0 ; i< pClass[1].size(); i++){ // System.out.println(typ[pClass[0].get(i).getIndex()]); //} for(int i=0; i< this.numberOfClass; i++){ for(int j= 0; j< pClass[i].size(); j++){ // For each instace I of class i int T. if(typ[pClass[i].get(j).getIndex()]< (Tmean[i]-Tsd[i])){ borders[pClass[i].get(j).getIndex()] = true; }else{ borders[pClass[i].get(j).getIndex()] = false; } } } return borders; } /** * TPA. * */ protected PrototypeSet TPA(){ PrototypeSet S= new PrototypeSet(); boolean borders[]; PrototypeSet pClass[] = new PrototypeSet[this.numberOfClass]; borders = identifyBorder(pClass); //this method has sorted pClass PrototypeSet process = new PrototypeSet(); PrototypeSet merge = new PrototypeSet(); // Need to merge process for (int i=0; i< this.numberOfClass; i++){ //pClass[i] = new PrototypeSet(trainingDataSet.getFromClass(i)); //System.out.println("Class i =" + i ) ; for(int k= 0; k< pClass[i].size(); k++){ Prototype instance = pClass[i].get(k); // First instance with class i. //System.out.println("instance index =" + instance.getIndex()); //System.out.println(typicality(pClass[i].get(k).getIndex())); if(!borders[instance.getIndex()]){ // For each NON-BORDER if(!process.contains(instance)){ process.add(instance); // Denote like process. Prototype P = Merge(S,borders,instance,merge); if(!trainingDataSet.contains(P)){ // if P has been merged, abstract S.add(P); } } } } } System.out.println ( "S size = " + S.size()); System.out.println("Abstracction Accuracy % " +accuracy(S,trainingDataSet)); //System.out.println("abstraction Reduction % " + (100-(S.size()*100)/trainingDataSet.size()) ); System.out.println("data retention rate " + (S.size()*1.)/trainingDataSet.size()); return S; } /** * Merge * @param S * @param borders * @param I * @return */ protected Prototype Merge(PrototypeSet S, boolean borders[], Prototype I, PrototypeSet merge){ Prototype P = new Prototype(); PrototypeSet tData = new PrototypeSet(trainingDataSet); Prototype N, M; double Clase; //PrototypeSet merge = new PrototypeSet(); // We must save the prototypes merge. P = I; N = tData.nearestTo(I); // System.out.println("N index =" + N.getIndex()); //if(N.equals(trainingDataSet.get(N.getIndex()))) System.out.println("Todo OK"); Clase = I.getOutput(0); //int num_merges = 0; //boolean mergeBefore = false; while( (N.getOutput(0)!= Clase) || (!borders[N.getIndex()]) ) { //System.out.println("N index =" + N.getIndex()); if(N.getOutput(0) != Clase){ //System.out.println("Border or noisy"); //It may be either a border point or a noise. // N = next nearest neighbor tData = tData.without(N); N = tData.nearestTo(I); // N.print(); if( N.getOutput(0) != Clase){ // if the next nearest neighbor is different class, it's a border, not noise. // System.out.println("Nun merges hecho en este bucle " + num_merges); return P; // N is a border point } // else, it will be noise, discard N. } if(borders[N.getIndex()]){ return P; }else{ if(!merge.contains(N)){ // here is the mistake. //System.out.println("Entrooo"); //mergeBefore = true; //P.add(N); P = P.avg(N); merge.add(N); // save N like a prototype before merge. // num_merges++; tData = tData.without(N); N = tData.nearestTo(I); }else{ //System.out.println("Entrooo"); if(S.size() > 0){ M= S.nearestTo(N); //M = S.containing(N); P = P.avg(M); // num_merges++; S.remove(M); } // System.out.println("Nun merges hecho en este bucle " + num_merges); return P; } } } //System.out.println("Nun merges hecho en este bucle " + num_merges); return P; } /** * Method to call the appropriate method * @return */ protected PrototypeSet filtering(){ PrototypeSet result = new PrototypeSet(); //System.out.println("metodo="+this.method+"!"); if(this.method.equals("ENN")){ result = ENN(trainingDataSet); }else if(this.method.equals("ACC")){ result = ACC(trainingDataSet); }else if(this.method.equals("RT2") ){ result = RT2(trainingDataSet); } //result.print(); System.out.println("Filtering Accuracy % " +accuracy(result,trainingDataSet)); System.out.println("Filtering Reduction % " + (100-(result.size()*100)/trainingDataSet.size()) ); System.out.println("FILTERING retention rate " + (result.size()*1.)/trainingDataSet.size()); return result; } /** * * Edited nearest neighbor of T. * @return */ protected PrototypeSet ENN (PrototypeSet T) { //T.print(); PrototypeSet Sew = new PrototypeSet (T); //this.k = 7; // Elimination rule kohonen int majority = this.k / 2 + 1; // System.out.println("Mayor�a " + majority); int toClean[] = new int [T.size()]; Arrays.fill(toClean, 0); int pos = 0; for ( Prototype p : T){ double class_p = p.getOutput(0); PrototypeSet neighbors = KNN.knn(p, trainingDataSet, this.k); int counter= 0; for(Prototype q1 :neighbors ){ double class_q1 = q1.getOutput(0); if(class_q1 == class_p){ counter++; } } //System.out.println("Misma clase = "+ counter); if ( counter < majority){ // We must eliminate this prototype. toClean [pos] = 1; // we will clean } pos++; } //Clean the prototypes. PrototypeSet aux= new PrototypeSet(); for(int i= 0; i< toClean.length;i++){ if(toClean[i] == 0) aux.add(T.get(i)); } //Remove aux prototype set Sew = aux; //System.out.println("Result of filtering"); //Sew.print(); return Sew; } /** * Retaining Center Instances * @param T * @return */ protected PrototypeSet ACC (PrototypeSet T) { PrototypeSet result = new PrototypeSet(); int accuracy[] = new int [T.size()]; Arrays.fill(accuracy, 0); int pos = 0; for(Prototype p: T){ Prototype near = T.nearestTo(p); // Without itself //System.out.println("near index = "+ near.getIndex()); if(p.getOutput(0) == near.getOutput(0) ){ accuracy[near.getIndex()]++; } pos++; } for(int i = 0; i< T.size(); i++){ if(accuracy[i] > this.Q){ result.add(T.get(i)); } } return result; } /** * Return all the prototype in (this) that has other like the nearest neighbor * @return */ protected PrototypeSet associatesPrototype(PrototypeSet one,Prototype other){ PrototypeSet result = new PrototypeSet(); for(Prototype p: one){ PrototypeSet nearest = KNN.getNearestNeighbors(p, one, this.k); if(nearest.contains(other)){ // if one of k neares neighbor is the prototype. is an associates. result.add(p); } } return result; } /** * Retaining Border instances * @param T * @return */ protected PrototypeSet RT2 (PrototypeSet T) { PrototypeSet result = new PrototypeSet(); result = ENN(T); // ENN is applied first to filter noise. //System.out.println("Result size tras ENN "+ result.size()); //Stablish index. to result of ENN for(int i = 0; i< result.size(); i++) result.get(i).setIndex(i); // Bubble sort in descening order by the distnce of an instace to its nearest unlike neighbor. double DistunlikeNeighbor[] = new double [result.size()]; int pos =0; for(Prototype p: result){ Prototype nearestUnlike = KNN.getNearestWithDifferentClassAs(p, result); DistunlikeNeighbor[pos] = Distance.d(p, nearestUnlike); pos++; } // sort descending order b distance of an instance to this nearest unlike neigbor. for (int k = 0; k < result.size(); k++) { for (int j = 0; j < result.size()-1; j++) { if(DistunlikeNeighbor[result.get(j).getIndex()] < DistunlikeNeighbor[result.get(j+1).getIndex()] ){ Prototype aux = result.get(j); result.set(j, result.get(j+1)); result.set(j+1,aux); } } } // now we calculate the associates for each PrototypeSet associates[] = new PrototypeSet[result.size()]; pos = 0; for(Prototype p : result){ associates[pos] = associatesPrototype(result,p); pos++; } int majorityK = this.k/2 +1; int toClean[] = new int [result.size()]; Arrays.fill(toClean, 0); for(int i=0; i< result.size();i++){ Prototype p = result.get(i); int majority = associates[i].size()/2 + 1; int asociatesFail = 0; PrototypeSet withoutP = result.without(p); double class_p = p.getOutput(0); // we must to check associates are able to classify correct. for(int j=0; j< associates[i].size(); j++){ PrototypeSet newNeighbors = KNN.getNearestNeighbors(associates[i].get(j), withoutP, this.k); int counter= 0; for(Prototype q1 : newNeighbors ){ double class_q1 = q1.getOutput(0); if(class_q1 == class_p){ counter++; } } if(counter < majorityK){ // it able to classify without p. asociatesFail++; } } if(asociatesFail >= majority){ toClean[i] = 1; } } PrototypeSet clean = new PrototypeSet(); for(int i=0; i< result.size(); i++){ if(toClean[i] == 0){ clean.add(result.get(i)); } } //result = new PrototypeSet(clean); return clean; } @SuppressWarnings({ "unchecked", "static-access" }) public PrototypeSet reduceSet() { System.out.print("\nThe algorithm is starting...\n Computing...\n"); System.out.println("Number of class "+ this.numberOfClass); // trainingDataSet.print(); //Stablish index. for(int i = 0; i< trainingDataSet.size(); i++) trainingDataSet.get(i).setIndex(i); PrototypeSet outputDataSet = new PrototypeSet(); if ( this.nAlg == 1 || this.nAlg == 3) outputDataSet= icpl1_or_3(this.nAlg); else outputDataSet = icpl2_or_4(this.nAlg); // outputDataSet.print(); System.out.println("Accuracy % " +accuracy(outputDataSet,trainingDataSet)); System.out.println("Reduction % " + (100-(outputDataSet.size()*100)/trainingDataSet.size()) ); //RT2(trainingDataSet); /* boolean marcas[]; marcas = new boolean[outputDataSet.size()]; Arrays.fill(marcas, true); double accuracyInic =KNN.classficationAccuracy(outputDataSet, trainingDataSet); double accuracy; for(int i=0; i< outputDataSet.size(); i++){ marcas[i] = false; //At the begining you don't think you can elimante. PrototypeSet leaveOneOut = outputDataSet.without(outputDataSet.get(i)); accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet); if(accuracy > accuracyInic){ marcas[i] = true; // we can eliminate } } //Then we create the result set.. PrototypeSet clean = new PrototypeSet(); for(int i=0; i< marcas.length; i++){ if(!marcas[i]){ clean.add(outputDataSet.get(i)); } } System.out.println("Accuracy % " +accuracy(clean,trainingDataSet)); System.out.println("Reduction % " + (100-(clean.size()*100)/trainingDataSet.size()) ); */ return outputDataSet; } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename which contains the test data set. * 3: Seed of the random number generator. Always. * ************************** * 4: .Number of blocks * @param args Arguments of the main function. */ public static void main(String[] args) { Parameters.setUse("ICPL", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>"); Parameters.assertBasicArgs(args); PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]); PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]); long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE); ICPLGenerator.setSeed(seed); // int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE); //String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length); //System.out.print(" swarm ="+swarm+"\n"); ICPLGenerator generator = new ICPLGenerator(training, 1, "ENN", 4,40); PrototypeSet resultingSet = generator.execute(); //resultingSet.save(args[1]); //int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k); int accuracy1NN = KNN.classficationAccuracy(resultingSet, test); generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test); } }