/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Instance_Generation.Chen; import keel.Algorithms.Instance_Generation.Basic.PrototypeSet; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator; import keel.Algorithms.Instance_Generation.Basic.Prototype; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Instance_Generation.*; import keel.Algorithms.Instance_Generation.utilities.Distance; import java.util.*; import keel.Algorithms.Instance_Generation.utilities.*; import keel.Algorithms.Instance_Generation.utilities.KNN.*; import org.core.*; /* * To change this template, choose Tools | Templates * and open the template in the editor. */ /** * ChenGenerator prototype reducition algorithm * @author diegoj */ public class ChenGenerator extends PrototypeGenerator { /** Number of prototypes to be generated. */ protected int numberOfPrototypes; /** * Build a new ChenGenerator Algorithm * @param t Original prototype set to be reduced. * @param n_d Number of prototypes to be generated. */ public ChenGenerator(PrototypeSet t, int n_d) { super(t); algorithmName="Chen"; numberOfPrototypes = n_d; } /** * Build a new ChenGenerator Algorithm * @param t Original prototype set to be reduced. * @param percSet Reduction percentage of the prototype set. */ public ChenGenerator(PrototypeSet t, double percSet) { super(t); algorithmName="Chen"; this.numberOfPrototypes = getSetSizeFromPercentage(percSet); } /** * Build a new ChenGenerator Algorithm * @param t Original prototype set to be reduced. * @param params Parameters of the algorithm (only % of reduced set). */ public ChenGenerator(PrototypeSet t, Parameters params) { super(t, params); algorithmName="Chen"; numberOfPrototypes = getSetSizeFromPercentage(params.getNextAsDouble()); } /** * Generate a reduced prototype set by the ChenGenerator method. * @return Reduced set by ChenGenerator's method. */ @Override public PrototypeSet reduceSet() { PrototypeSet D = trainingDataSet.clone();//note: hard-copy for(int i=0; i<D.size(); ++i) D.get(i).setIndex(i); int Np = numberOfPrototypes; ArrayList<PrototypeSet> C = new ArrayList<PrototypeSet>(Np); Prototype P1 = null; Prototype P2 = null; System.err.println("Le he dicho tamaño " + Np); for(int Nc=0; Nc<Np; Nc++) { //Obtiene los 2 prototipos más lejanos Pair<Prototype,Prototype> Pi = D.farthestPrototypes(); P1 = Pi.first(); P2 = Pi.second(); //Haz partición de conjunto en función a la distancia a los prototipos //anteriormente calculados (los más lejanos) Pair<PrototypeSet,PrototypeSet> Di = D.partIntoSubsetsWhichSeedPointsAre(P1,P2); PrototypeSet D1 = Di.first(); PrototypeSet D2 = Di.second(); //System.err.println("Iteración " + Nc + " Tam C " + C.size()); //System.err.println("PRototypos mas lejanos son " + P1.getIndex() + " y " + P2.getIndex()); C.remove(D); C.add(D1); C.add(D2); //Encuentra el conjunto menos homogéneo ArrayList<PrototypeSet> I = null; ArrayList<PrototypeSet> I1 = new ArrayList<PrototypeSet>(); ArrayList<PrototypeSet> I2 = new ArrayList<PrototypeSet>(); //System.out.println("C.size ="+ C.size()); for(PrototypeSet pSet : C) { if(pSet.containsSeveralClasses()) I1.add(pSet); else I2.add(pSet); } I=I1; if(I1.isEmpty()) I = I2; //System.err.println("I1 " + I1.size()); //System.err.println("I2 " + I2.size()); // Encuentra el conjunto en I con los 2 puntos más lejanos double distMax = -1.0; PrototypeSet Qchosen = I.get(0); Pair<Prototype,Prototype> diameterPoints = null; for(PrototypeSet q : I) { if(q.size()>1)//limit-chase. Prototype set with only 1 element { Pair<Prototype,Prototype> farthest = q.farthestPrototypes(); double curDist = Distance.d(farthest.first().formatear(), farthest.second().formatear()); if(distMax < curDist) { distMax = curDist; Qchosen = q; diameterPoints = farthest; } } } D = Qchosen; P1 = diameterPoints.first(); P2 = diameterPoints.second(); }//loop-for-end PrototypeSet result = new PrototypeSet(Np); for(int i=0; i<Np; ++i) { Prototype averaged = C.get(i).avg(); double averagedClass = C.get(i).mostFrequentClass(); averaged.setLabel(averagedClass); result.add(averaged.formatear()); //System.out.println("Prototipo " + i + " tiene clase " + averagedClass); } //System.err.println("% de acierto en training " + ChenGenerator.accuracy(result, trainingDataSet) ); return result; } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename wich contains the test data set. * 3: Seed of the random number generator. * 4: Number of prototypes to be generated. * @param args Arguments of the main function. */ public static void main(String[] args) { Parameters.setUse("Chen", "<seed> <percentage of prototypes>"); Parameters.assertBasicArgs(args); PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]); PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]); long seed = Parameters.assertExtendedArgAsInt(args,2,Parameters.SEED_TXT,0,Long.MAX_VALUE); double percSize = Parameters.assertExtendedArgAsDouble(args,3, Parameters.PERC_SIZE_TXT, 0, 100); ChenGenerator.setSeed(seed); ChenGenerator generator = new ChenGenerator(training, percSize); PrototypeSet resultingSet = generator.execute(); //resultingSet.save("resultados_chen.txt"); //String filename = args[0] +"_"+ resultingSet.size()+"_CHEN.sel"; //resultingSet.save(filename); //System.out.println(resultingSet.toString()); //int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k); int accuracy1NN = KNN.classficationAccuracy(resultingSet, test); generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test); //generator.showResultsOfAccuracy(accuracyKNN, accuracy1NN, k, test); } }