/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
SGP.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 11-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.SGP;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import java.util.StringTokenizer;
/* TENEMOS QUE METERLO.
import Jama.EigenvalueDecomposition;
import Jama.Matrix;
import Jama.Matrix.*;
import Jama.SingularValueDecomposition;
* */
/**
*
* @author Isaac Triguero
* @version 1.0
*/
public class SGPGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
// SGP1 or SGP2, use the pruning and merge or not..
private int method;
//threshold Rmin and Rmis
protected int Rmin;
protected int Rmis;
protected int numberOfClass;
/**
* Build a new SGPGenerator Algorithm
*
*/
public SGPGenerator(PrototypeSet _trainingDataSet, int method, int Rmin, int Rmis)
{
super(_trainingDataSet);
algorithmName="SGP";
this.method = method;
this.Rmin = Rmin;
this.Rmis = Rmis;
}
void intercambiar(PrototypeSet v[],int pos1,int pos2){
PrototypeSet aux = v[pos2];
v[pos2] = v[pos1];
v[pos1] = aux;
}
/**
* Build a new SGPGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public SGPGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="SGP";
this.method = parameters.getNextAsInt();
this.Rmin = parameters.getNextAsInt();
this.Rmis = parameters.getNextAsInt();
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.println("Isaac dice: method SGP" + this.method+ " Rmin = " + this.Rmin + " Rmis= "+ this.Rmis );
System.out.println("Number of class= "+ this.numberOfClass);
}
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm is starting...\n Computing...\n");
System.out.println("Number of class "+ this.numberOfClass);
PrototypeSet G[] = new PrototypeSet [this.numberOfClass*100]; // enough memory.
PrototypeSet outputDataSet = new PrototypeSet(this.numberOfClass*5000);
PrototypeSet realoutput;
int k,M;
// 1. Set Gk.
// 2. Compute initial prototypes Pk = means(Gk), outputDataSet == Pk
for( int i = 0; i< this.numberOfClass; i++){
G[i] = new PrototypeSet(trainingDataSet.getFromClass(i));
//System.out.println("G[ "+i+"] . size ="+ G[i].size());
if(G[i].size()>0) // check the class is not empty.
outputDataSet.add(G[i].avg());
}
PrototypeSet nominalPopulation;
nominalPopulation = new PrototypeSet();
nominalPopulation.formatear(outputDataSet);
//
System.out.println("Initial Accuracy % " +accuracy(nominalPopulation,trainingDataSet));
System.out.println("Initial reduction % " + (100-(outputDataSet.size()*100.)/trainingDataSet.size()) );
//3)
k=0;
//M= this.numberOfClass;// initially the number of groups is numberOfclass
M = outputDataSet.size();
//4. Compute the distance
boolean cambio = true;
while (cambio){
//cambio = false;
// Distancia entre cada prototipo xj de G a las distintas medias.
double distance[][];
distance = new double[G[k].size()][];
double min = Double.MAX_VALUE;
//int indexNN[][] = new int[M][]; // index of the nearest prototype
int indexNN[] = new int[M];
int pkNN[][]= new int[G[k].size()][];
// initial mejor index, a su grupo.
for(int j=0; j< M;j++){
indexNN[j]=k;
}
// indexNN[i] = new int[G[k].size()];
for(int j=0; j< G[k].size(); j++){ // para cada prototipo del grupo.
distance[j] = new double[M];
pkNN [j]= new int[M];
Prototype xj = G[k].get(j);
min = Double.MAX_VALUE;
for(int i = 0; i< M; i++){ // para cada grupo
//System.out.println(outputDataSet.size());
// System.out.println("**********************");
//outputDataSet.get(i).print();
if(outputDataSet.get(i) != null){
distance[j][i]= Distance.d(xj.formatear(), outputDataSet.get(i).formatear()); // distancia entre el prototipo y la media.
if(distance[j][i] < min){
min = distance[j][i];
indexNN[k] = j; // te quedas con el prototipo del grupo
pkNN[j][k] = i; // te quedas con el grupo. Pk
}
}
else{
distance[j][i] = Double.MAX_VALUE;
}
}
}
// Comprobamos si para cada grupo el vecino m�s cercano eres tu mismo.(tu media)
// First occur, All patterns of a gruop the closest prototype is the group prototype.
// Then no modification is performed.
boolean continuar = false;
for(int i=0; i< G[k].size() && !continuar;i++){
//System.out.print(pkNN[i][k]+" ");
if(pkNN[i][k]!=k){
continuar = true; // go to step 4
}
}
boolean paso7 = true;
if(continuar){ // si podemos continuar paso7.
//System.out.println("Continuo");
for(int i=0; i<G[k].size() && paso7;i++){
// Pk y Pij
if((outputDataSet.get(k).getOutput(0) != outputDataSet.get(pkNN[i][k]).getOutput(0)) && G[k].size()>1){ //
paso7 = true;
}else{
// System.out.println("falla paso 7" );
paso7 = false; // si falla una vez, paramos no se hace el paso 7.
}
}
if(paso7){
// 7, C(Pij*) != C(Pk)
// System.out.println("Paso 7") ;
paso7 = true;
//System.out.println("Splitting Gk = "+ k + " size= "+ G[k].size());
//Split G(k)
// Point mean of the Gk.
Prototype mean = G[k].avg();
PrototypeSet zi = new PrototypeSet(G[k].size());
// Centramos los valores en el 0,0
for (int l=0; l<G[k].size(); l++){
zi.add( G[k].get(l).sub(mean));
}
//zi.print();
// Falta calcular el auto-vector, alpha, y se divide, tal que.
//descomentar Matrix alpha= new Matrix(zi.prototypeSetTodouble());
//EigenvalueDecomposition autovalores = new EigenvalueDecomposition(alpha);
// alpha.
//descomentar SingularValueDecomposition val = new SingularValueDecomposition(alpha);
//descomentar Matrix aux =val.getV();
//System.out.println("Filas*Columnas" + aux.getRowDimension() +aux.getColumnDimension());
double autoval[] = new double[G[k].get(0).numberOfInputs()];
//descomentar for(int i=0; i<aux.getColumnDimension(); i++)
//descomentar autoval[i] = aux.get(0,i);
// zi*alpha >= 0, un grupo, zi*alpha <0 otro grupo.
double output[] = new double[1];
output[0] = 0;
Prototype alpha2 = new Prototype(autoval,output);
// HIPERPLANO !! :S
PrototypeSet removed = new PrototypeSet();
for(int l=0; l< G[k].size();l++){
//zi.get(l).print();
//alpha2.print();
//System.out.println("Pro escalar = " + alpha2.mulEscalar(zi.get(l)));
if(alpha2.mulEscalar(zi.get(l))>0) {
removed.add(G[k].get(l));
}
}
//System.out.println("Removed size= "+ removed.size());
for(int i=0; i<removed.size();i++)
G[k].remove(removed.get(i));
// Tenemos que tener en cuenta el Rmin.
// si no tiene un minimo n�mero se pueden descartar :)
if(removed.size()>Rmin){
G[M] = new PrototypeSet(removed.size());
for(int i=0; i<removed.size();i++)
G[M].add(removed.get(i));
outputDataSet.set(k, G[k].avg());
// System.out.println("Tama�o output = "+ outputDataSet.size());
outputDataSet.add(G[M].avg());
//Stablish the class too.
//double ClassK= outputDataSet.get(k).getOutput(0);
outputDataSet.get(M).setFirstOutput(outputDataSet.get(k).getOutput(0));
/* System.err.println(" Gk ");
G[k].print();
System.err.println(" GM");
G[M].print();
System.err.println(" ****");
*/
M++;
}else{
paso7 =false;
}
// System.out.println("G[k] size = "+ G[k].size()+"\n**************\n");
//G[k].print();
//cambio = false;
} // Fin paso 7.
// paso 8. Cpij = Cpk and Pi != Pk for some xj E Gk.
if(!paso7 && G[k].size()>1){
PrototypeSet removed = new PrototypeSet();
boolean paso8 = false;
for(int i=0; i<G[k].size();i++){
if((outputDataSet.get(k).getOutput(0) == outputDataSet.get(pkNN[i][k]).getOutput(0)) && (pkNN[i][k]!=k)){
//System.out.println("Paso 8") ;
paso8=true;
removed.add(G[k].get(i));
// add to the Group ij*.
G[pkNN[i][k]].add(G[k].get(i));
outputDataSet.set(pkNN[i][k], G[pkNN[i][k]].avg());
}
}
//eliminar del G[k]
for(int i=0; i<removed.size();i++)
G[k].remove(removed.get(i));
if(paso8){
outputDataSet.set(k, G[k].avg());
}
//System.out.println("Paso 8 -> G[k] size = "+ G[k].size()+"\n**************\n");
removed = new PrototypeSet();
boolean paso9 = false;
//Paso 9
for(int i=0; i<G[k].size();i++){
if(outputDataSet.get(k).getOutput(0) != outputDataSet.get(pkNN[i][k]).getOutput(0) && (pkNN[i][k]!=k)){ //
paso9=true;
// System.out.println("Gk size = " + G[k].size());
removed.add( G[k].get(i));
}
}
// Si tiene el mismo tama�o no vamos aborrar todos :)
if(removed.size() < G[k].size()){
for(int i=0; i<removed.size();i++)
G[k].remove(removed.get(i));
//System.out.println("Paso 9-> G[k] size = "+ G[k].size()+"\n************** " + removed.size());
if(paso9){
//System.out.println("Paso 9") ;
G[M] = new PrototypeSet();
for(int i=0; i<removed.size();i++)
G[M].add(removed.get(i));
outputDataSet.set(k, G[k].avg());
outputDataSet.add(M, G[M].avg());
M++;
}
}
}
} // Fin continuar
//System.out.println(k);
if(!paso7 || !continuar){
//System.out.println("M = " + M);
if(k==M-1){
cambio = false; //END
}else if(k != M-1){
k++;
cambio =true; // come back to 4.
}
}
// When we finish the SGP algorithm we check if the methos selected is SGP2, so we introduce the merging and
// pruning steps now.
if(this.method == 2){
System.out.println("Method SGP2");
// Merging step. We check by pair.
for(int i=0; i <outputDataSet.size(); i++){
for(int j=0; j< outputDataSet.size(); j++){
if(i!=j){
if(outputDataSet.get(i).getOutput(0) == outputDataSet.get(j).getOutput(0)){
// A == i, B == j
//System.out.println("Clases Son iguales " +i+","+j);
boolean continuar2 = true;
// si para todos los prototipos el 2� m�s ceano es j.
for(int m=0; m< G[i].size() && continuar2;m++){
// obtain the index of the seconde nearest neighbor
int indexSNN =outputDataSet.IndexSecondNearestTo(G[i].get(m));
//int prueba = outputDataSet.IndexNearestTo(G[i].get(m));
if (indexSNN != j){
//System.out.println("prueba = "+prueba+" index "+ indexSNN+ ", "+i+","+j);
continuar2 = false;
}
}
if (continuar2)
{
// Now we check the opposite situation.
System.out.println("Pre-Merging");
for(int m=0; m< G[i].size() && continuar2;m++){
// obtain the index of the seconde nearest neighbor
int indexSNN =outputDataSet.IndexSecondNearestTo(G[j].get(m));
if (indexSNN != i){
continuar2 = false;
}
}
if(continuar2){
// Merging.
System.out.println("Merging");
G[i].add(G[j]);
outputDataSet.get(i).set(G[i].avg());
intercambiar(G,j,M-1);
M--; // Lo pongo al final
//outputDataSet.remove(j);
}
}
}
}
}
}
// Pruning step.
} // End Method 2.
} // Fin while
realoutput = new PrototypeSet(M); // with M prototypes, M groups.
for(int i=0; i< M; i++){
if(outputDataSet.get(i)!=null)
realoutput.add(outputDataSet.get(i));
}
nominalPopulation = new PrototypeSet();
nominalPopulation.formatear(realoutput);
/*if( M<1){
realoutput = new PrototypeSet(this.numberOfClass);
for(int i=0; i< this.numberOfClass; i++){
realoutput.add(trainingDataSet.getFromClass(i).getRandom());
}
}
*/
System.out.println("Accuracy % " +accuracy(nominalPopulation,trainingDataSet));
System.out.println("Reduction % " + (100-(realoutput.size()*100.)/trainingDataSet.size()) );
// Limpiamos!
/*
boolean marcas[];
marcas = new boolean[realoutput.size()];
Arrays.fill(marcas, true);
double accuracyInic =KNN.classficationAccuracy(realoutput, trainingDataSet);
double accuracy;
for(int i=0; i< realoutput.size(); i++){
marcas[i] = false; //At the begining you don't think you can elimante.
PrototypeSet leaveOneOut = realoutput.without(realoutput.get(i));
accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet);
if(accuracy > accuracyInic){
marcas[i] = true; // we can eliminate
}
}
//Then we create the result set..
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< marcas.length; i++){
if(!marcas[i]){
clean.add(realoutput.get(i));
}
}
System.out.println("Accuracy % " +accuracy(clean,trainingDataSet));
System.out.println("Reduction % " + (100-(clean.size()*100)/trainingDataSet.size()) );
*/
return nominalPopulation;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("SGP", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
SGPGenerator.setSeed(seed);
// int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
SGPGenerator generator = new SGPGenerator(training, 1, 1, 1);
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}