/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
ENPC.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 20-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.ENPC;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import org.core.*;
import org.core.*;
import java.util.StringTokenizer;
/**
*
* @param numberOfInitialsCentroids
* @param k, to use with knn rule in the initialization.
* @param GradientStep
* @param Temperature
* @author Isaac Triguero
* @version 1.0
*/
public class ENPCGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
// We need the variable K to use with k-NN rule
private int k;
private int MaxIter;
//others variables.
protected int numberOfPrototypes;
protected int numberOfClass;
/**
* Build a new ENPCGenerator Algorithm
*
*/
public ENPCGenerator(PrototypeSet _trainingDataSet, int k, int max)
{
super(_trainingDataSet);
algorithmName="ENPC";
this.k = k;
this.MaxIter = max;
}
/**
* Build a new RSPGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public ENPCGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="ENPC";
this.k = parameters.getNextAsInt();
this.MaxIter = parameters.getNextAsInt();
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.println("Isaac dice: k= " + this.k );
System.out.println("Number of class= "+ this.numberOfClass);
}
/**
* Function regions, see equation (2).
* @param sj
* @param S
* @return
*/
protected int regions(double sj, PrototypeSet S){
int number = 0;
for(Prototype p: S){
if(p.getOutput(0) == sj){
number++;
}
}
return number;
}
/**
* Function expectation of class sj in the S prototypeSet., see equation (3).
* @param sj
* @param S
* @return
*/
protected double expectation(double sj, PrototypeSet S){
return (S.size()*1.)/regions(sj,S);
}
/**
* Mutation operator.
* Label each prototype with the most populate class in each region.
* It is the way of obtain the main class that is usually use when unsupervised learning is applied to supervise classification.
* But not onl in a posteriori phase. Remember tha the quality of each prototype depends on the relationship among the number of patterns in
* its regions and....,
* @param output
*/
protected void mutation(PrototypeSet classifier, PrototypeSet V[][]){
for(int i=0; i< classifier.size(); i++){
int max = 1;
double clasToLabel =classifier.get(i).getOutput(0);
for(int j=0; j< V[i].length; j++){
if(V[i][j].size() > max || (V[i][j].size() == max && j != classifier.get(i).getOutput(0))){
max = V[i][j].size();
clasToLabel = V[i][j].get(0).getOutput(0); // obtain one of class from one prototype.
}
}
//Solo modificamos si no hay empates.
if(max != V[i][(int) classifier.get(i).getOutput(0)].size()){
//System.out.println("Class original =" +classifier.get(i).getOutput(0));
classifier.get(i).setFirstOutput(clasToLabel);
//System.out.println("Class etiquetada =" +classifier.get(i).getOutput(0));
}
}
}
/**
* Reproduction operator.
* Introduce new prototypes in the classifier, each prototype has the opportunity to introduce a new prototype
* in order to increase its own quality. If a region r1 has two non-empty sets V11 and V12 we need
* to have another region r2 that contain V12
* @param output
*/
protected PrototypeSet[][] reproduction(PrototypeSet classifier, PrototypeSet V[][]){
//Each prototype has the opportunity of introducing a new prototype
int initialSize = classifier.size();
for(int i=0; i< initialSize; i++){
// probability to reproduce. Proportional to number of non-empty sets.
int max = Integer.MIN_VALUE;
double clase =0.0;
//Intento ruleta
double limiteRuleta = 0;
for(int j=0; j<this.numberOfClass; j++){
limiteRuleta+= V[i][j].size();
}
//System.out.println("Limite Ruleta = " + limiteRuleta);
double aleatorio= RandomGenerator.Randdouble(0, limiteRuleta);
//System.out.println("Aleatorio = "+aleatorio);
double suma =0;
boolean encontrado = false;
for(int j=0; j< this.numberOfClass && !encontrado; j++){
suma += V[i][j].size();
if( aleatorio < suma){
clase = j*1.0;
encontrado =true;
}
}
/*// intento m�ximo
for(int j=0; j<this.numberOfClass; j++){
if(V[i][j].size() > max || (V[i][j].size() == max && j != classifier.get(i).getOutput(0))){
max = V[i][j].size();
clase = j*1.0;
}
}
*/
if(clase != classifier.get(i).getOutput(0)){ // Reproduction execute.
//System.out.println("Reproduction " + i + ", " + clase + " pero "+ classifier.get(i).getOutput(0));
//V[i][(int)clase].avg().print();
classifier.add(V[i][(int)clase].avg()); // Nos quedamos con el centroide
// modifcamos los conjuntos de patrones.
PrototypeSet nuevoV[][] = new PrototypeSet[classifier.size()][];
int j;
for( j=0; j< classifier.size()-1; j++){
nuevoV[j]= new PrototypeSet[this.numberOfClass];
for(int m=0; m<this.numberOfClass; m++){
if(V[j][m] != null)
nuevoV[j][m] = V[j][m].clone(); //Hard.Copy.
else
nuevoV[j][m] =null;
}
}
nuevoV[j]= new PrototypeSet[this.numberOfClass];
nuevoV[classifier.size()-1][(int)clase] = V[i][(int)clase].clone();
nuevoV[i][(int) clase] = null;
// Lo copiamos de nuevo.
V = new PrototypeSet[classifier.size()][];
V = nuevoV.clone();
}
}
return V;
}
protected void fight(PrototypeSet classifier, PrototypeSet V[][], double quality[]){
//Establish the index
for (int i=0;i<classifier.size();i++){
classifier.get(i).setIndex(i);
}
for(int i=0; i< classifier.size(); i++){
PrototypeSet neighbors = KNN.getNearestNeighbors(classifier.get(i), classifier, k);
neighbors.remove(classifier.get(i));
// �Con que vecino luchamos?
double max= Double.MIN_VALUE;
Prototype select = new Prototype();
for(Prototype p: neighbors){
if((quality[i] - quality[p.getIndex()]) > max ){
select = p;
max = quality[i] - quality[p.getIndex()];
}
}
double aleatorio= RandomGenerator.Randint(0, 1);
double si= classifier.get(i).getOutput(0);
if( max < aleatorio){ // Fight
if (select.getOutput(0) != si ){
// cooperation
V[i][(int) si].add(V[select.getIndex()][(int)si]);
V[select.getIndex()][(int)si] = null;
} else{
// competition
double limiteRuleta = quality[i]+quality[select.getIndex()];
aleatorio= RandomGenerator.Randdouble(0, limiteRuleta);
if (aleatorio < quality[i]){
// Win si
V[i][(int) si].add(V[select.getIndex()][(int)si]);
V[select.getIndex()][(int)si] = null;
}else{
// Win si'
V[select.getIndex()][(int)si].add(V[i][(int) si]);
V[i][(int) si] = null;
}
}
}
}
}
/**
* Lo �ncio que hace esta funci�n es llevarlo todo a su centroide.
* @param classifier
* @param V
*/
protected void move(PrototypeSet classifier, PrototypeSet V[][]){
for( int i=0; i< classifier.size(); i++){
int clase = (int) classifier.get(i).getOutput(0);
if(V[i][clase].size() >0)
classifier.get(i).set(V[i][clase].avg());
}
}
/**
* Die operator. Sirve para eliminar los prototipos que no tienen buena calidad.
* @param classifier
* @param V
*/
protected PrototypeSet die(PrototypeSet classifier, double quality[]){
double pDie = 0.0;
boolean toClean[] = new boolean [classifier.size()];
for (int j=0; j< classifier.size(); j++){
if (quality[j] > 0.5){
pDie =0.0;
}else{
pDie = 1- 2*quality[j];
}
double aleatorio = RandomGenerator.Randdouble(0, 1);
// System.out.println("Prob to die = "+ aleatorio + ", pDie ="+ pDie);
if(aleatorio < pDie){
toClean[j] = true;
// System.out.println("Muere!!");
}else{
toClean[j] = false;
}
}
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< classifier.size();i++){
if(!toClean[i]){
clean.add(classifier.get(i));
}
}
return clean;
}
/**
* Funci�n que devuelve que prototipos de un conjunto dado pertenece a cada regi�n (outputdatSet)
* @return
*/
PrototypeSet[] nearPrototype(PrototypeSet initial, PrototypeSet outputDataSet)
{
double dMin = Double.POSITIVE_INFINITY;
PrototypeSet region[] = new PrototypeSet[outputDataSet.size()];
Prototype nearest = null;
for(int i=0; i< outputDataSet.size(); i++){
region[i] = new PrototypeSet();
if(outputDataSet.get(i)!=null)
outputDataSet.get(i).setIndex(i); // Establish the index.
}
// para cada prototipo del trainiing.
for(Prototype p : initial)
{
dMin = Double.POSITIVE_INFINITY;
for(Prototype q : outputDataSet){ // calculo cual es el m�s cercano en los que tengo en el clasificador
if(q!=null){
double d = Distance.d(q, p);
if(d < dMin && q!=p)
{
dMin = d;
nearest = q;
}
}
}
region[nearest.getIndex()].add(p); // lo a�ado a la regi�n perteneciente.
}
return region;
}
/**
* Funci�n que devuelve que prototipos de un conjunto dado pertenece a cada conjunto Vij (outputdatSet)
* @return
*/
PrototypeSet[][] nearPrototypeWithClass(PrototypeSet initial, PrototypeSet outputDataSet)
{
double dMin = Double.POSITIVE_INFINITY;
PrototypeSet region[][] = new PrototypeSet[outputDataSet.size()][];
Prototype nearest = null;
for(int i=0; i< outputDataSet.size(); i++){
region[i] = new PrototypeSet[this.numberOfClass];
for(int j=0; j< this.numberOfClass; j++)
region[i][j] = new PrototypeSet();
outputDataSet.get(i).setIndex(i); // Establish the index.
}
// para cada prototipo del trainiing.
for(Prototype p : initial)
{
dMin = Double.POSITIVE_INFINITY;
for(Prototype q:outputDataSet){ // calculo cual es el m�s cercano en los que tengo en el clasificador
double d = Distance.d(q, p);
if(d < dMin && q!=p)
{
dMin = d;
nearest = q;
}
}
region[nearest.getIndex()][(int)p.getOutput(0)].add(p); // lo a�ado a la Vij perteneciente.
}
return region;
}
/**
* Generate a reduced prototype set by the ENPCGenerator method.
* @return Reduced set by ENPCGenerator's method.
*/
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm is starting...\n Computing...\n");
System.out.println("Number of class "+ this.numberOfClass);
PrototypeSet outputDataSet = new PrototypeSet();
//Initialization.
int aleatory = RandomGenerator.Randint(0, trainingDataSet.size()-1);
outputDataSet.add(trainingDataSet.get(aleatory));
int iter =0 ;
PrototypeSet Sj[] = new PrototypeSet[this.numberOfClass];
//Main Loop.
while (iter<this.MaxIter){
//First, getting information.
// System.out.println("Getting Information");
/*
* La ecuaci�n (4) define una funci�n de pertenencia a los conjuntos de patrones,
* mientras que la ecuaci�n (6) define la funci�n de pertenencia a los conjuntos de patrones.
* Adem�s, la ecuaci�n (1) define la funci�n de pertenencia a un conjunto de clase.
* Un prototipo pertenece a la clase Sj si es de la clase sj (ecuaci�n 1). Una instancia pertenece a
* un conjunto de prototipos Ri, si el prototipo ri es el m�s cercano a la instancia (ecuaci�n (6));
* una instancia pertenece a un conjunto de patrones Vij. si es de la clase sj y su prototipo m�s cercano es el prototipo Ri.
* Si te fijas en la Figura 1, puede ver c�mo un conjunto Vij no es m�s que la intersecci�n enter un Ri y un Sj.
*
*
*La �nica distinci�n es que Vij se corresponde con el conjunto de patrones que est�n m�s
cerca del prototipo i-�simo que de cualquier otro y pertenecen a la clase j. Ri es el
conjunto de patrones m�s cercanos al prototipo i-�simo, sin tener en cuenta la clase.
Los autores se inventan dos nombres que no aclaran nada, pero la idea es esa (una chorrez).
*
*
*
*/
// Getting information (1), Sj.(4) and (6)
PrototypeSet V[][]= new PrototypeSet[outputDataSet.size()][];
PrototypeSet R[] = new PrototypeSet[outputDataSet.size()];
double accuracy[] = new double[outputDataSet.size()];
double apportation[] = new double[outputDataSet.size()];
double quality[] = new double[outputDataSet.size()];
for(int i=0; i< this.numberOfClass; i++){
Sj[i] = new PrototypeSet(trainingDataSet.getFromClass(i));
}
R = nearPrototype(trainingDataSet,outputDataSet);
/*for(int i=0; i< outputDataSet.size();i++)
System.out.println("Size R_"+i+ " = "+ R[i].size());
*/
V = nearPrototypeWithClass(trainingDataSet,outputDataSet);
/*
for(int i=0; i< outputDataSet.size();i++){
for(int j=0; j< this.numberOfClass; j++){
System.out.println("V "+j+ " ="+ V[i][j].size()+ " ");
}
}
*/
for(int i=0; i< outputDataSet.size();i++){
int clase =(int) outputDataSet.get(i).getOutput(0);
accuracy[i] = V[i][clase].size()*1./R[i].size();
// System.out.println("accuracy = " +accuracy[i]);
apportation[i] = V[i][clase].size()/ (this.expectation(clase, Sj[clase])/2);
quality[i] = Math.min(1, accuracy[i]*apportation[i]);
//System.out.println("Quality " + quality[i]);
}
int initialSize = outputDataSet.size();
// Mutation.
mutation(outputDataSet,V);
// Reproduction.
//System.out.println("V before = " + V.length);
// outputDataSet.print();
V = reproduction(outputDataSet,V);
//System.out.println("V after = " +V.length);
//outputDataSet.print();
// Calculamos de nuevo las calidades. si se ha reproducido
if(outputDataSet.size()> initialSize){
accuracy = new double[outputDataSet.size()];
apportation = new double[outputDataSet.size()];
quality = new double[outputDataSet.size()];
R = nearPrototype(trainingDataSet,outputDataSet);
//Check if there is some region ==null
boolean toClean[] = new boolean[R.length];
for (int j=0; j< R.length; j++){
if (R[j].size()==0){
toClean[j] = true;
}else{toClean[j] = false;}
}
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< outputDataSet.size();i++){
if(!toClean[i]){
clean.add(outputDataSet.get(i));
}
}
outputDataSet = new PrototypeSet(clean);// Copy again
// Recalculamos..
R = nearPrototype(trainingDataSet,outputDataSet);
V = nearPrototypeWithClass(trainingDataSet,outputDataSet);
/*for(int i=0; i< outputDataSet.size();i++){
System.out.println("Size R_"+i+ " = "+ R[i].size());
for(int j=0; j< this.numberOfClass; j++){
if(V[i][j] != null){
System.out.println("V "+i+","+j+ " ="+ V[i][j].size()+ " ");
}
}
}*/
for(int i=0; i< outputDataSet.size();i++){
int clase =(int) outputDataSet.get(i).getOutput(0);
if(V[i][clase] != null){ // Checking it's not empty.
accuracy[i] = V[i][clase].size()*1./ R[i].size();
apportation[i] = V[i][clase].size()/ (this.expectation(clase, Sj[clase])/2);
}else{
accuracy[i] = 0;
apportation[i] =0;
}
quality[i] = Math.min(1, accuracy[i]*apportation[i]);
//System.out.println("Quality " + quality[i]);
}
}
// Fight
fight(outputDataSet,V, quality);
// Move operator
move(outputDataSet, V);
// die operator
outputDataSet = new PrototypeSet(die(outputDataSet,quality));
// System.out.println("*********Fin iter *******");
iter++;
}
System.out.println("Accuracy % " +accuracy(outputDataSet,trainingDataSet));
System.out.println("Reduction % " + (100-(outputDataSet.size()*100)/trainingDataSet.size()) );
/*
System.out.println("Cleaning");
// Limpiamos, a�ado RNN para mejora los resultados
boolean marcas[];
marcas = new boolean[outputDataSet.size()];
Arrays.fill(marcas, true);
double accuracyInic =KNN.classficationAccuracy(outputDataSet, trainingDataSet);
double accuracy;
for(int i=0; i< outputDataSet.size(); i++){
marcas[i] = false; //At the begining you don't think you can elimante.
PrototypeSet leaveOneOut = outputDataSet.without(outputDataSet.get(i));
accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet);
if(accuracy > accuracyInic){
marcas[i] = true; // we can eliminate
}
}
//Then we create the result set..
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< marcas.length; i++){
if(!marcas[i]){
clean.add(outputDataSet.get(i));
}
}
System.out.println("Accuracy % " +accuracy(clean,trainingDataSet));
System.out.println("Reduction % " + (100-(clean.size()*100)/trainingDataSet.size()) );
*/
return outputDataSet;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("ENPC", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
ENPCGenerator.setSeed(seed);
int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
ENPCGenerator generator = new ENPCGenerator(training, 3, 250);
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}