/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Instance_Generation.PNN;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.utilities.KNN.KNN;
import keel.Algorithms.Instance_Generation.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import org.core.*;
import java.util.*;
/**
* Prototypes for Nearest Neighbor Classifiers (Chang para los colegas)
* @author diegoj
*/
public class PNNGenerator extends PrototypeGenerator
{
/** Informs if the algorithm must generate a specified number of prototypes. */
protected boolean useNumberOfPrototypes = false;
/** Number of prototypes to be generated. */
protected int numberOfPrototypes = 10;
/**
* Build a new algorithm PNNGenerator that will reduce a prototype set.
* @param tSet Traning data set to be reduced.
*/
public PNNGenerator(PrototypeSet tSet)
{
super(tSet);
this.algorithmName="PNN";
useNumberOfPrototypes = false;
}
/**
* Build a new algorithm PNNGenerator that will reduce a prototype set.
* @param tSet Traning data set to be reduced.
* @param numberOfProts Number of prototypes to be generated.
*/
public PNNGenerator(PrototypeSet tSet, int numberOfProts)
{
super(tSet);
useNumberOfPrototypes = true;
Debug.force(numberOfProts < tSet.size(), "Number of prototypes desired is bigger than actual size of the training data set");
numberOfPrototypes = numberOfProts;
this.algorithmName="PNN";
}
/**
* Build a new algorithm PNNGenerator that will reduce a prototype set.
* @param tSet Traning data set to be reduced.
* @param percentageOfPrototypes Percentage of prototypes of training to be generated.
*/
public PNNGenerator(PrototypeSet tSet, double percentageOfPrototypes)
{
super(tSet);
useNumberOfPrototypes = true;
numberOfPrototypes = this.getSetSizeFromPercentage(percentageOfPrototypes);
this.algorithmName="PNN";
}
/**
* Build a new algorithm PNNGenerator that will reduce a prototype set.
* @param parameters Parameters needed for the algoritm, in this case, random seedDefaultValueList only.
*/
public PNNGenerator(PrototypeSet _trainingDataSet, Parameters parameters)
{
super(_trainingDataSet, parameters);
this.algorithmName="PNN";
useNumberOfPrototypes=false;
if(parameters.existMore())
{
useNumberOfPrototypes=true;
numberOfPrototypes = parameters.getNextAsInt();
}
}
/**
* Returns the two nearest prototypes in two different sets.
* @param A Set which first prototype belongs to.
* @param B Set which second prototype belongs to.
* @return A pair which elements are the nearest prototypes in A and B (first and second, resp.).
*/
protected Pair<Prototype,Prototype> nearestPrototypesIn(PrototypeSet A, PrototypeSet B, MatrixOfDistances m)
{
double minimumDist = Double.MAX_VALUE;
Pair<Prototype, Prototype> nearest = new Pair<Prototype,Prototype>(A.get(0),B.get(0));
for(Prototype a : A)
for(Prototype b : B)
{
double dist = m.get(a, b);
if (dist < minimumDist)
{
minimumDist = dist;
nearest = new Pair<Prototype, Prototype>(a, b);
}
}
return nearest;
}
/**
* Performs the maximum reduction of the training data set by the PNNGenerator (aka Chang) method.
* @return Reduced prototype set by Chang's method.
*/
protected PrototypeSet maximumReduction()
{
PrototypeSet A = new PrototypeSet();
PrototypeSet B = trainingDataSet.copy();
//Weight used in the centroid operation
HashMap<Prototype,Double> W = new HashMap<Prototype,Double>();
for(Prototype b : B)
W.put(b, 1.0);
int counterOfMerges = 0;
//int i=0, k=0;
//int currentAccuracy = absoluteAccuracy(B, trainingDataSet);
do
{
Prototype arbitraryPoint = B.removeRandom();
A.add(arbitraryPoint);
//Debug.println("Iteración " + (i++));
counterOfMerges = 0;
//k=0;
MatrixOfDistances dist = new MatrixOfDistances(A,B);
while(B.size()>0)
{
//Debug.println("SubIteración " + (k++));
Pair<Prototype,Prototype> nearest = nearestPrototypesIn(A,B,dist);
Prototype p = nearest.first();
Prototype q = nearest.second();
if(p.label() == q.label())
{
Prototype pStar = Prototype.avg(p, W.get(p), q, W.get(q));
//Debug.endsIf(A==null, "es null");
//PrototypeSet X = A.join(q);
//Debug.endsIf(X==null, "X es null");
int currentAccuracy = absoluteAccuracy(A.join(q), trainingDataSet);
int newAccuracy = absoluteAccuracy(A.join(pStar), trainingDataSet);
//currentAccuracy = newAccuracy;
//Debug.errorln("Mejora? " + (newAccuracy >= currentAccuracy));
if(newAccuracy < currentAccuracy)
{
A.add(q);
B.remove(q);
}
else
{
W.put(pStar, W.get(p)+W.get(q));
A.remove(p);
B.remove(q);
dist.removeFromA(p);
dist.removeFromB(q);
A.add(pStar);
dist.addToA(pStar);
++counterOfMerges;
}
}
else
{
A.add(q);
B.remove(q);
}
}//del while(B.size()>0)
if(counterOfMerges>0)
{
B = A;
A = new PrototypeSet();
}
}
while(counterOfMerges>0);
return A;
}
/**
* Performs a reduction of the training data set by the PNNGenerator (aka Chang) method. Stopped by reaching a specified number of prototypes.
* @return Reduced prototype set by Chang's method limited by the number of prototypes.
*/
protected PrototypeSet controlledReduction()
{
PrototypeSet A = new PrototypeSet();
PrototypeSet B = trainingDataSet.copy();
//Weight used in the centroid operation
HashMap<Prototype,Double> W = new HashMap<Prototype,Double>();
for(Prototype b : B)
W.put(b, 1.0);
int counterOfMerges = 0;
//int i=0, k=0;
//int currentAccuracy = absoluteAccuracy(B, trainingDataSet);
boolean sizeReached = false;
do
{
Prototype arbitraryPoint = B.removeRandom();
A.add(arbitraryPoint);
//Debug.println("Iteración " + (i++));
counterOfMerges = 0;
//k=0;
MatrixOfDistances dist = new MatrixOfDistances(A,B);
while(B.size()>0 && !sizeReached)
{
//Debug.println("SubIteración " + (k++));
//Debug.errorln("A.size(): " + A.size() + " de " + numberOfPrototypes);
Pair<Prototype,Prototype> nearest = nearestPrototypesIn(A,B,dist);
Prototype p = nearest.first();
Prototype q = nearest.second();
if(p.label() == q.label())
{
Prototype pStar = Prototype.avg(p, W.get(p), q, W.get(q));
W.put(pStar, W.get(p)+W.get(q));
//Debug.endsIf(A==null, "es null");
//PrototypeSet X = A.join(q);
//Debug.endsIf(X==null, "X es null");
int currentAccuracy = absoluteAccuracy(A.join(q), trainingDataSet);
int newAccuracy = absoluteAccuracy(A.join(pStar), trainingDataSet);
//currentAccuracy = newAccuracy;
//Debug.errorln("Mejora? " + (newAccuracy >= currentAccuracy));
if(newAccuracy < currentAccuracy)
{
A.add(q);
B.remove(q);
}
else
{
A.remove(p);
B.remove(q);
dist.removeFromA(p);
dist.removeFromB(q);
A.add(pStar);
dist.addToA(pStar);
++counterOfMerges;
}
}
else
{
A.add(q);
B.remove(q);
}
sizeReached = (A.size() == numberOfPrototypes);
}//del while(B.size()>0 && !sizeReached)
if(counterOfMerges>0 && !sizeReached)
{
B = A;
A = new PrototypeSet();
}
}
while(counterOfMerges>0 && !sizeReached);
//Debug.errorln("Acurracy of PNNGenerator " + PNNGenerator.accuracy(A, trainingDataSet));
return A;
}
/**
* Performs a reduction of the training data set by the PNNGenerator (aka Chang) method. It can use early stopping of the method.
* @return Reduced prototype set by Chang's method limited by the number of prototypes.
*/
@Override
public PrototypeSet reduceSet()
{
PrototypeSet reduced = null;
if(this.useNumberOfPrototypes)
reduced = controlledReduction();
else
reduced = maximumReduction();
return reduced;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename wich contains the test data set.
* 3: Seed of the Random Number Generator.
* 4: number of prototypes to be generated (OPTIONAL)
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Debug.setStdDebugMode(false);
Parameters.setUse("PNN", "<seed> [percentageOfPrototypes]");
Parameters.assertBasicArgs(args);
//Debug.set(true);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
PNNGenerator.setSeed(seed);
PNNGenerator generator = null;
if(args.length >= 4)
{
double pc = Parameters.assertExtendedArgAsDouble(args,3,"percentage of prototypes",0,100);
//Debug.errorln("Use " + num + " prototypes");
generator = new PNNGenerator(training, pc);
}
else
generator = new PNNGenerator(training);
PrototypeSet resultingSet = generator.execute();
int accuracy1NN = KNN.classficationAccuracy1NN(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
//generator.showResultsOfAccuracy(accuracyKNN, accuracy1NN, KNN.k(), test);
}
}