/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
RSP.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 2-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.RSP;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import java.util.StringTokenizer;
/**
*
* @param numberOfBlocks
* @author Isaac Triguero
* @version 1.0
*/
public class RSPGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
// We need the variable K to use with k-NN rule
private int numberOfBlocks;
protected int numberOfPrototypes; // Particle size is the percentage
/** Parameters of the initial reduction process. */
private String[] paramsOfInitialReducction = null;
private String Subset_choice = "diameter";
/**
* Build a new PSOGenerator Algorithm
*
*/
public RSPGenerator(PrototypeSet _trainingDataSet, int blocks, String choice)
{
super(_trainingDataSet);
algorithmName="RSP";
this.numberOfBlocks = blocks;
this.Subset_choice = choice;
}
/**
* Build a new RSPGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public RSPGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="RSP";
this.numberOfBlocks = parameters.getNextAsInt();
this.Subset_choice= parameters.getNextAsString();
System.out.println("Isaac dice: numberOFblock= " + this.numberOfBlocks + " choice = " + this.Subset_choice);
}
/**
* Generate a reduced prototype set by the RSPGenerator method.
* @return Reduced set by RSPGenerator's method.
*/
/**
*
* Edited nearest neighbor of T.
* @return
*/
protected PrototypeSet ENN (PrototypeSet T)
{
//T.print();
PrototypeSet Sew = new PrototypeSet (T);
//this.k = 7;
// Elimination rule kohonen
int majority = 3/2 + 1;
// System.out.println("Mayor�a " + majority);
int toClean[] = new int [T.size()];
Arrays.fill(toClean, 0);
int pos = 0;
for ( Prototype p : T){
double class_p = p.getOutput(0);
PrototypeSet neighbors = KNN.knn(p, trainingDataSet, 3);
int counter= 0;
for(Prototype q1 :neighbors ){
double class_q1 = q1.getOutput(0);
if(class_q1 == class_p){
counter++;
}
}
//System.out.println("Misma clase = "+ counter);
if ( counter < majority){ // We must eliminate this prototype.
toClean [pos] = 1; // we will clean
}
pos++;
}
//Clean the prototypes.
PrototypeSet aux= new PrototypeSet();
for(int i= 0; i< toClean.length;i++){
if(toClean[i] == 0)
aux.add(T.get(i));
}
//Remove aux prototype set
Sew = aux;
//System.out.println("Result of filtering");
//Sew.print();
return Sew;
}
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm is starting...\n Computing...\n");
System.out.print("\nEditing algorithm is needed...\n Computing...\n");
trainingDataSet = new PrototypeSet(ENN(trainingDataSet));
/*
* RSP3=
En esta aproximaci�n, el par�metro inicial 'b' no es necesario. Siempre y cuando dentro
de un bloque tengamos mezcla entre dos o m�s clases, ese bloque se va a dividir. Por
tanto, no se conoce apriori el n�mero de bloques final.
Por ejemplo, suponte que tienes un bloque con 5 ejemplos de la clase A y 4 de la B.
Divides, calculas los dos m�s alejados y consigues un subbloque con 3 ejemplos de la
clase A y los 4 de la B, mientras que el otro tiene el resto (2 de la clase A). El
primer bloque se tiene que dividir de nuevo (cuando le toque, depender� del solapamiento
y/o di�metro comparado con los dem�s). El segundo ya es homog�neo.
*/
if(this.numberOfBlocks == 0){
System.out.println("Executing RSP3 with " + this.Subset_choice);
}else if(this.Subset_choice == "diameter"){
System.out.println("Executing RSP2");
}else{
System.out.println("Executing RSP1");
}
// Variables.
int bc ; // Current number of subset in T.
int i ;
ArrayList<PrototypeSet> C = new ArrayList<PrototypeSet>(this.numberOfBlocks);// To save the subsets.
PrototypeSet B = new PrototypeSet( trainingDataSet.clone());//note: hard-copy --> B = T
for( i=0; i<B.size(); ++i)
B.get(i).setIndex(i);
//for ( bc=1; bc < this.numberOfBlocks; bc++){
// C.add(new PrototypeSet());
// }
i=0;
Prototype p1,p2;
p1 = (B.farthestPrototypes()).first();
p2 = (B.farthestPrototypes()).second();
boolean everyHomogenity = false;
boolean rsp3 = false;
if(this.numberOfBlocks == 0){ //RSP3
rsp3 = true; // Always
}
// if rsp3 is true, while there is no homogenity there is no end.
for ( bc=1; (bc <this.numberOfBlocks || (rsp3 && !everyHomogenity)); bc++){ //
//System.out.println("PRototypos mas lejanos son " + p1.getIndex() + " y " + p2.getIndex());
Pair<PrototypeSet,PrototypeSet> Di = B.partIntoSubsetsWhichSeedPointsAre(p1.formatear(),p2.formatear());
C.remove(B);
PrototypeSet D1 = Di.first();
PrototypeSet D2 = Di.second();
C.add(D1);
C.add(D2);
//Before new iteration, if rsp3, we must check the homogenity.
if(rsp3){
everyHomogenity = true;
for(PrototypeSet pSet : C)
{
if(!pSet.homogeneity())
everyHomogenity = false;
}
}
// I sets
ArrayList<PrototypeSet> I = null;
ArrayList<PrototypeSet> I1 = new ArrayList<PrototypeSet>();
ArrayList<PrototypeSet> I2 = new ArrayList<PrototypeSet>();
//System.out.println("C.size ="+ C.size());
for(PrototypeSet pSet : C)
{
if(pSet.containsSeveralClasses())
I1.add(pSet);
else
I2.add(pSet);
}
if(I1.size() != 0)
I=I1;
else
I =I2;
double distMax = -1.0;
PrototypeSet Cj =(PrototypeSet) I.get(0);
Pair<Prototype,Prototype> diameterPoints = null;
/* Esto solo modifica el punto 9, la selecci�n del bloque m�s apropiado entre los que
tienes disponibles.
Elegir entre ovelapping o distancia m�s lejanos.
*/
for(PrototypeSet q : I)
{
if(q.size()>1)//limit-chase. Prototype set with only 1 element
{
Pair<Prototype,Prototype> farthest = q.farthestPrototypes();
double curDist;
if (this.Subset_choice == "diameter"){
curDist= Distance.d(farthest.first().formatear(), farthest.second().formatear());
}else{ // If you use overlapping.
curDist = q.Overlapping();
//System.out.println ("Over = "+ curDist);
}
if(distMax < curDist)
{
distMax = curDist;
Cj = q;
diameterPoints = farthest;
}
}
}
B = Cj;
if(diameterPoints != null){
p1 = diameterPoints.first();
p2 = diameterPoints.second();
}else{
// limit -chase, finish.
everyHomogenity = true;
}
}
//Find the centroids
int numberOfClass = trainingDataSet.get(0).possibleValuesOfOutput().size();
PrototypeSet result = new PrototypeSet();
for(i=0; i<bc; ++i)
{
for( int j= 0; j< numberOfClass; j++){
PrototypeSet aux = C.get(i).getFromClass(j);
//Calculate centroid.
if(aux.size()>0){ // Checking there is this class in the subset.
Prototype averaged = aux.avg();
result.add(averaged.formatear()); // FORMATEANDO AQU�
}
}
}
PrototypeSet nominalPopulation;
nominalPopulation = new PrototypeSet();
nominalPopulation.formatear(result);
numberOfPrototypes = result.size();
System.err.println("\n% de acierto en training Nominal " + RSPGenerator.accuracy(nominalPopulation, trainingDataSet) );
// nominalPopulation.print();
System.out.println("Reduction %, result set = "+((trainingDataSet.size()-numberOfPrototypes)*100)/trainingDataSet.size()+ "\n");
return nominalPopulation;
/*
System.out.println("% de acierto en training " + RSPGenerator.accuracy(result, trainingDataSet) );
numberOfPrototypes = result.size();
return result;
*/
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("RSP", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
RSPGenerator.setSeed(seed);
int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
RSPGenerator generator = new RSPGenerator(training,blocks , "diameter");
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}