/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Instance_Generation.GENN; import keel.Algorithms.Instance_Generation.Basic.PrototypeSet; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator; import keel.Algorithms.Instance_Generation.Basic.Prototype; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Instance_Generation.*; import keel.Algorithms.Instance_Generation.utilities.KNN.*; import org.core.*; import java.util.*; import keel.Algorithms.Instance_Generation.utilities.*; /** * Generalized Edited Nearest Neighbor * @author diegoj */ public class GENNGenerator extends PrototypeGenerator { /** Number of neighbors selected in the underlying KNN. */ protected int k = 4; // Index of the parameter k. //public static final int IK = 2; /** * Constructor of GENNGenerator objects. * @param tr Training Data Set. * @param k Number of neighbors selected in the underlying KNN */ public GENNGenerator(PrototypeSet tr, int k) { super(tr); algorithmName = "GENN";//Name of the algorithm this.k = k; } /** * Constructor of GENNGenerator objects. * @param tr Training Data Set. * @param param Parameters needed of GENNGenerator. */ public GENNGenerator(PrototypeSet tr, Parameters param) { super(tr, param); algorithmName = "GENN";//Name of the algorithm this.k = param.getNextAsInt(); } /** * Informs if there are a majority of prototypes whose class is the same as other prototype. * @param current Prototype which class is used. * @param protSet Set which the search will be performed. * @return TRUE if the number of prototypes whith current's class is equal or greather than 50%. */ protected boolean majorityOfSameClass(Prototype current, PrototypeSet protSet) { double currentLabel = current.label(); int protSet_size = protSet.size(); int count = 0; for(Prototype p : protSet) if(currentLabel == p.label()) ++count; return count > protSet_size/2; } /** * Reduce the set by the GENNGenerator method. * @return Reduced data set by the GENNGenerator method. */ @Override public PrototypeSet reduceSet() { int _size = trainingDataSet.size(); PrototypeSet result = new PrototypeSet(_size); result.add(trainingDataSet); KNN.setK(k); //Groups of k prototypes int numberOfGroups = _size / k; int i = 0; int index = RandomGenerator.Randint(0, _size); while(i<numberOfGroups)//O(n_g*k*n_e) { //WARNING! //The current implementation depends on the extracting order //of the algorithm. In other case, use trainingDataSet.get(index) //and KNN.knn(current, trainingDataSet) Prototype current = result.get(index); PrototypeSet neighbors = KNN.knn(current, result); //There are majority class. All group is classified as this frequent class if(majorityOfSameClass(current, neighbors)) { double maxFreqClass = neighbors.mostFrequentClass(); //Debug.errorln("MaxFreqClass " + maxFreqClass); //Sets each element class to the maximum frequent class for(Prototype p : neighbors) p.setLabel(maxFreqClass); current.setLabel(maxFreqClass); } else //There are no majority class. All the group is deleted { //System.out.println("REMOVE"); result.remove(index); for(Prototype p: neighbors)// O(k*n) result.remove(p);// } index = RandomGenerator.Randint(0, result.size()-1); ++i; } return result; } /** * General main for GENNGenerator prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename wich contains the test data set. * 2: Seed used in the random generator. * 3: k (size of neighborhood in KNN). * @param args Arguments of the main function. */ public static void main(String[] args) { Parameters.setUse("GENN", "<seed> <k (size of neighborhood in KNN)>"); Parameters.assertBasicArgs(args); //System.out.println("Using k = " + k); //Parameters param = new Parameters(args); long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE); GENNGenerator.setSeed(seed); int k = Parameters.assertExtendedArgAsInt(args, 3, "size of the neighborhood", 1, 10); PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]); PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]); GENNGenerator generator = new GENNGenerator(training, k); //resultingSet.save(args[1]); PrototypeSet resultingSet = generator.execute(); //System.out.println(resultingSet.toString()); //KNN.setK(3); //int accuracyKNN = KNN.classficationAccuracy(resultingSet, test); int accuracy1NN = KNN.classficationAccuracy1NN(resultingSet, test); generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test); //generator.showResultsOfAccuracy(accuracyKNN, accuracy1NN, KNN.k(), test); } }// end of the GENNGenerator class