/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Instance_Generation.VQ; import keel.Algorithms.Instance_Generation.Basic.PrototypeSet; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator; import keel.Algorithms.Instance_Generation.Basic.Prototype; import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm; import keel.Algorithms.Instance_Generation.*; import keel.Algorithms.Instance_Generation.utilities.*; import keel.Algorithms.Instance_Generation.LVQ.*; import keel.Algorithms.Instance_Generation.utilities.KNN.*; import java.util.*; /** * AVQ prototype generator. * @author diegoj */ public class AVQGenerator extends PrototypeGenerator { /** Partition of the training data set used as training. */ protected PrototypeSet T = null; /** Partition of the training data set used as validation set. */ protected PrototypeSet V = null; /** Reduced data set. */ protected PrototypeSet reduced = null; /** Percentage of the original set used in the initial partition. */ protected double percentageInitPartition = 80.0; /** Epsilon parameter of the LBG partition algorithm. */ double epsilonLBG = 0.4; /** Number of iterations of the AVQGenerator. */ protected int numberOfIterations = 1000; /** * Constructor of the AVQGenerator. * @param tDataSet Training data set. * @param parameters Parameters of the algorithm. */ public AVQGenerator(PrototypeSet tDataSet, Parameters parameters) { super(tDataSet, parameters); algorithmName = "AVQ"; this.percentageInitPartition = parameters.getNextAsDouble(); this.numberOfIterations = parameters.getNextAsInt(); this.epsilonLBG = parameters.getNextAsDouble(); //Debug.errorln("% init part " +percentageInitPartition); //Debug.errorln("num_iter " +this.numberOfIterations); //Debug.errorln("epsilonLBG " +this.epsilonLBG); } /** * Constructor of the AVQGenerator. * @param tDataSet Training data set. * @param percentPart Percentage of the first set in the initial partition of training data set. * @param numIterations Number of iterations of the method. * @param epsilonLBG Epsilon parameter of the LBG. */ public AVQGenerator(PrototypeSet tDataSet, double percentPart, int numIterations, double epsilonLBG) { super(tDataSet); this.algorithmName = "AVQ"; this.percentageInitPartition = percentPart; this.numberOfIterations = numIterations; this.epsilonLBG = epsilonLBG; } /** * Count prototypes whose nearest prototype is the given. * @param center Given prototype. * @param set Set to be tested. * @return Number of prototypes of the set whose nearest neighbor is center. */ protected static int countPrototypesWhichNearestIs(Prototype center, PrototypeSet set) { int count = 0; for(Prototype p : set) { double dCenter = Distance.d(p, center); Prototype nearest = set.nearestTo(p); double dNearest = Distance.d(p, nearest); if(dCenter <= dNearest) ++count; } return count; } /** * Generate the R-count: prototypes which its centroid is its nearest prototypes. * @param c Cluster to be examinated. * @return Number of clusters which its centroid its the nearest prototypes. */ protected int R(Cluster c) { PrototypeSet setC = c.getPrototypeSet(); //Debug.errorln("setC"); int count = 0; for(Prototype p : setC) if(c.isCentroidItsNearestPrototoype(p)) ++count; return count; } /** * Generate the Q-count: prototypes which its nearest is the center of the cluster, and not any of the other reduced-prototypes. * @param reduced Reduced data set. * @param center Center of the prototype set. * @return Number of clusters which its centroid its the nearest prototypes. */ protected int Q(PrototypeSet reduced, Prototype center) { int count = 0; //PrototypeSet Tc = T.getFromClass(center.label()); PrototypeSet Tc = T; for(Prototype p : Tc) { Prototype nearest = reduced.nearestTo(p); double dNearest = Distance.d(p, nearest); double dCenter = Distance.d(p, center); if(dCenter < dNearest) ++count; } return count; } /** * Performs Q - R for a cluster. * @param c Cluster to be computed Q - R. */ protected int incorrectlyClassifiedSamples(Cluster c) { return R(c) - Q(reduced, c.center()); } /** * Reduce the data set by the AVQGenerator method. * @return Reduced data set by the AVQGenerator method. */ @Override public PrototypeSet reduceSet() { boolean forcedEnd = false; ArrayList<Double> classes = Prototype.possibleValuesOfOutput(); reduced = new PrototypeSet(classes.size());//final reduced set Pair<PrototypeSet,PrototypeSet> parted = trainingDataSet.makePartition(percentageInitPartition); T = parted.first(); V = parted.second(); ArrayList<PrototypeSet> classPartition = T.classPartition(); ArrayList<Cluster> clusters = new ArrayList<Cluster>(classPartition.size()); for(PrototypeSet ps : classPartition) { //Debug.errorln("Añado al cluster: center"); Prototype center_ps = ps.avg(); reduced.add(center_ps); clusters.add(new Cluster(center_ps, ps)); } //Now each class has got a cluster with a centroid boolean end = false; int it=0; int i = 0; double errAnt = Double.NEGATIVE_INFINITY; while(!end) { int Emax = Integer.MIN_VALUE; Cluster Cmax = null; //Debug.errorln("Iteración " + it); //Debug.errorln("I " + (i++)); //Buscamos el máximo Ec for(Cluster c : clusters) { //Debug.errorln("Cluster "); int Ec = incorrectlyClassifiedSamples(c); //Debug.errorln("Ec es " + Ec); if(Emax < Ec) { Emax = Ec; Cmax = c; } } //Debug.endsIf(Cmax == null, "Cmax es null"); reduced.remove(Cmax.center()); //Debug.errorln("LBG"); //Pair<Cluster, Cluster> pair = Cmax.divideByLBG(); int Cmax_size = Cmax.size(); if(Cmax_size > 2) { Pair<Prototype,Prototype> newCenters = Cmax.centersOfLBGCLuster(epsilonLBG); Prototype newCenter1 = newCenters.first(); Prototype newCenter2 = newCenters.second(); //Prototype newCenter1 =Cmax.getPrototypeSet().getRandom(); //Prototype newCenter2 =Cmax.getPrototypeSet().getRandom(); reduced.uniqueAdd(newCenter1); reduced.uniqueAdd(newCenter2); } else if(Cmax_size==2) { reduced.uniqueAdd(Prototype.avg(Cmax.get(0), Cmax.get(1))); } else { reduced.uniqueAdd(Cmax.get(0)); forcedEnd = true; } //clusters.add(pair.first()); //clusters.add(pair.second()); int err = AVQGenerator.absoluteAccuracyAndError(reduced, V).second(); //Debug.errorln(err +">=?"+errAnt+" : " + (err >= errAnt)); if(err >= errAnt) it++; else { it=0; errAnt = err; } end = (it >= numberOfIterations) || forcedEnd; } return reduced; } /** * General main for all the prototoype generators * Arguments: * 0: Filename with the training data set to be condensed. * 1: Filename wich contains the test data set * 2: Seed of the random generator. * 3: Number of prototypes to be generated. * @param args Arguments of the main function. */ public static void main(String[] args) { Parameters.setUse("AVQ", "<seed> <number of prototypes>"); Parameters.assertBasicArgs(args); PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]); PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]); long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE); double percentPart = Parameters.assertExtendedArgAsDouble(args,3,"percentage of partition",0,100); int n = Parameters.assertExtendedArgAsInt(args,4,"number of iterations", 1, Integer.MAX_VALUE); double eLBG = Parameters.assertExtendedArgAsDouble(args,5,"epsilon of the LBG partition algorithm", 0, 1); AVQGenerator.setSeed(seed); AVQGenerator generator = new AVQGenerator(training, percentPart, n, eLBG); PrototypeSet resultingSet = generator.execute(); resultingSet.save("resultados_avq.txt"); //System.err.println(resultingSet.toString()); //System.err.println("-------------------------------------------------"); int accuracy1NN = KNN.classficationAccuracy(resultingSet, test); generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test); } }//end-of-AVQGenerator