/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package keel.Algorithms.Instance_Generation.BTS3; import keel.Algorithms.Instance_Generation.Basic.PrototypeSet; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator; import keel.Algorithms.Instance_Generation.Basic.Prototype; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Instance_Generation.*; import keel.Algorithms.Instance_Generation.utilities.*; import org.core.*; //import java.util.ArrayList; import keel.Algorithms.Instance_Generation.utilities.KNN.*; /** * Prototoype generator by the Boostrap algorithm (BST3) * @author diegoj */ public class BTS3Generator extends PrototypeGenerator { /** Size of the generated set. */ protected int numberOfPrototypesGenerated = 5; /** Number of bootstrapings of the algorithm. */ protected int randomTrials = 10; /** Size of the merged data set. */ protected int nearestNeighbors = 1; /** * Constructor * @param _trainingDataSet Traning data set. * @param parameters Parameters of the algorithm. */ public BTS3Generator(PrototypeSet _trainingDataSet, Parameters parameters) { super(_trainingDataSet, parameters); algorithmName="BTS3"; this.numberOfPrototypesGenerated = this.getSetSizeFromPercentage(parameters.getNextAsDouble()); this.nearestNeighbors = parameters.getNextAsInt(); this.randomTrials = parameters.getNextAsInt(); } /** * Constructor * @param _trainingDataSet Traning data set. * @param Np Number of prototypes to be generated. * @param nearestNeighbors Nearest-neighbors used to assign class to each prototype generated. * @param trials Random trials to be performed during execution. */ public BTS3Generator(PrototypeSet _trainingDataSet,int Np, int nearestNeighbors, int trials) { super(_trainingDataSet); algorithmName="BTS3"; this.numberOfPrototypesGenerated = Np; this.nearestNeighbors = nearestNeighbors; this.randomTrials = trials; } /** * Constructor * @param _trainingDataSet Traning data set. * @param percSize Percentage of the size of training set that will be the reduced set. * @param nearestNeighbors Nearest-neighbors used to assign class to each prototype generated. * @param trials Random trials to be performed during execution. */ public BTS3Generator(PrototypeSet _trainingDataSet, double percSize, int nearestNeighbors, int trials) { super(_trainingDataSet); algorithmName="BTS3"; this.numberOfPrototypesGenerated = this.getSetSizeFromPercentage(percSize); this.nearestNeighbors = nearestNeighbors; this.randomTrials = trials; } /** * Returns a new data set with each prototype is a centroid of their nearest-neighbors with itself. * @param original Prototype set to be condensed. * @return Merged, condensed prototype set of the original set. */ PrototypeSet mergePrototypesWithNNMethod(PrototypeSet original) { PrototypeSet merged = new PrototypeSet(); for(Prototype p : original) { PrototypeSet neighborsOfP = KNN.getNearestNeighborsWithSameClassAs(p,original,nearestNeighbors); neighborsOfP.add(p); Prototype mixed = neighborsOfP.avg().formatear();//media de todos los vecinos + p merged.add(mixed); } return merged; } /** * Reduce the training data set by the Hamamoto et al. Bootstrap method * @return Reduced prototype data set that is the condensed input data set. */ @Override public PrototypeSet reduceSet() { boolean useApriori = true; PrototypeSet best = null; int bestAccuracy = -1; //for each random trial used for(int i=0; i<randomTrials; ++i) { //1. Select a random sample (with a-priory probability????) PrototypeSet selected = super.selecRandomSet(numberOfPrototypesGenerated, useApriori); //2. Merge each point (in selected set) with its k nearest-neighbors PrototypeSet merged = mergePrototypesWithNNMethod(selected); //3. Do randomTrials test with the 1-NN rule on the original data set //to see which class is the best to each prototype of the merged prototype set PrototypeSet finalSet = new PrototypeSet(); for(Prototype p : merged) { Prototype nearest = KNN._1nn(p, trainingDataSet); finalSet.add(nearest); } int finalSetAccuracy = absoluteAccuracy(finalSet, trainingDataSet); //Debug.println("ANTES " + finalSetAccuracy+" vs "+bestAccuracy); if(finalSetAccuracy > bestAccuracy) { //Debug.println(finalSetAccuracy+" vs "+bestAccuracy); bestAccuracy = finalSetAccuracy; best = finalSet; } //Debug.println("Trial "+i); } //Debug.endsIf(best==null, "best is null"); return best; } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename wich will contain the test data set * 2: Seed of the random generator. * 3: Number of prototypes to be generated. * 4: Nearest-Neighbors used in the internal KNN use. * 5: Random Trials (number of bootstrappings performed). * @param args Arguments of the main function. */ public static void main(String[] args) { Parameters.setUse("BTS3", "<seed> <percentage of prototypes generated> <Nearest-neightbors (size of the merged sets)> <random trials>"); Parameters.assertBasicArgs(args); //Debug.setStdDebugMode(false); PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]); PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]); long seed = Parameters.assertExtendedArgAsInt(args,2,Parameters.SEED_TXT,0,Long.MAX_VALUE); BTS3Generator.setSeed(seed); double percNprot = Parameters.assertExtendedArgAsDouble(args,3,Parameters.PERC_SIZE_TXT, 0, 100); int k = Parameters.assertExtendedArgAsInt(args,4,"Nearest-neighbors used in the merging process", 1, 5); int randomTrials = Parameters.assertExtendedArgAsInt(args,5,"number of random trials", 1, Integer.MAX_VALUE); BTS3Generator generator = new BTS3Generator(training, percNprot, k, randomTrials); PrototypeSet resultingSet = generator.execute(); //int accuracyKNN = KNN.classficationAccuracy(resultingSet, test); int accuracy1NN = KNN.classficationAccuracy(resultingSet, test); generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test); //generator.showResultsOfAccuracy(accuracyKNN, accuracy1NN, KNN.k(), test); } }