/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package keel.Algorithms.Instance_Generation.GMCA;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.*;
import keel.Algorithms.Instance_Generation.MCA.*;
import keel.Algorithms.Instance_Generation.PNN.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import org.core.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
/**
* Implements GMCAGenerator algorithm.
* @author diegoj
*/
public class GMCAGenerator extends MCAGenerator
{
/** Clusters of the prototype set */
ClusterSet clusters;
PrototypeSet R = null;
/**
* Basic constructor
* @param _trainingDataSet Prototype training data set.
*/
public GMCAGenerator(PrototypeSet _trainingDataSet)
{
super(_trainingDataSet);
algorithmName="GMCA";
}
/**
* Constructor
* @param _trainingDataSet Prototype training data set.
* @param parameters Parameters of the method.
*/
public GMCAGenerator(PrototypeSet _trainingDataSet, Parameters parameters)
{
super(_trainingDataSet, parameters);
algorithmName="GMCA";
}
/*protected void initClustersByOnePrototypeByCluster(PrototypeSet trainingSet)
{
clusters = new ClusterSet();
Cluster.setClusterSet(clusters);
int i=0;
for(Prototype p : trainingSet)
{
p.setIndex(i++);
clusters.add(new Cluster(p));
}
}*/
protected void initClusters(PrototypeSet T)
{
R = new PrototypeSet();//representatives set
clusters = new ClusterSet();
//Cluster.setClusterSet(clusters);
ArrayList<Double> classes = T.nonVoidClasses();
for(double k : classes)
{
PrototypeSet Tk = T.getFromClass(k);
Tk.randomize();
while (Tk.size()>0)
{
int neighbors = 2;
if(Tk.size()==3)
neighbors = 3;
PrototypeSet clusterSet = KNN.getNearestNeighbors(Tk.get(0), Tk, neighbors);
clusterSet.add(Tk.get(0));
Cluster newCluster = new Cluster(clusterSet);
clusters.add(newCluster);
R.add(newCluster.getRepresentative());
for(Prototype p : clusterSet)
Tk.remove(p);
}
//Debug.endsIf(Tk.size()>0, "Tk no está vacío para k = " + k);
}
}
/**
* Hard-checking consistency method.
* @param modified Set to be tested its consistecy.
* @return TRUE if the prototype si consisten, FALSE in other chase.
*/
protected boolean isPrototypeConsistent(PrototypeSet modified)
{
int accuracyWithPStar = absoluteAccuracy(modified, trainingDataSet);
//Debug.errorln(currentAccuracy + " =? " + currentAccuracy + " " + (currentAccuracy == currentAccuracy));
//foundBetter = (accuracyWithPStar >= currentAccuracy && accuracyWithPStar >= bestAccuracy);
boolean foundBetter = (accuracyWithPStar >= currentAccuracy);
return foundBetter;
}
protected static double d(Prototype a, Prototype b)
{
return Distance.d(a,b);
}
protected boolean isConsistent(Cluster mix, PrototypeSet modified)
{
//Debug.errorln("Entramos en isConsistent");
boolean merge = true;//TRUE if merge is sucess, FALSE in other chase
ArrayList<Double> classes = modified.nonVoidClasses();
Prototype pStar = mix.getRepresentative();
PrototypeSet setStar = mix.getPrototypeSet();
double kStar = pStar.label();
double rStar = mix.getRadiusLength();
//Debug.errorln("Clase kStar " + kStar);
//Nearest prototypes to pStar of each present class
HashMap<Double,Prototype> sK = new HashMap<Double,Prototype>();
for(double k : classes)
{
Prototype p = modified.nearestToWithClass(pStar, k);
sK.put(k, p);
}
//Debug.errorln("Cargadas las classes");
//Maximum radius
for(double k : classes)
if(k != kStar)
{
double maxRadius = clusters.maxRadiusLengthOfClass(k);
//Debug.errorln("Max radius of class "+ k +" is " + maxRadius);
//Debug.errorln("d(pStar, sK.get(k)) < (2 * Math.max(rStar, maxRadius))");
// Debug.errorln(d(pStar, sK.get(k)) +" < "+ (2 * Math.max(rStar, maxRadius)) + "? " + (d(pStar, sK.get(k)) <= 2 * Math.max(rStar, maxRadius)));
if( d(pStar, sK.get(k)) <= 2 * Math.max(rStar, maxRadius) )
{
PrototypeSet Pk = modified.getFromClass(k);
//Debug.errorln("P"+k+" tiene " + Pk.size() + " prototipos");
for(Prototype s : Pk)
{
Cluster clusterOfs = clusters.get(s);
//Debug.endsIfNull(clusterOfs, "cluster de "+s.getIndex()+" es NULLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL");
PrototypeSet setOfs = clusterOfs.getPrototypeSet();
double rs = clusterOfs.getRadiusLength();
//Debug.errorln("d(pStar, s) <= 2*Math.max(rStar, rs)");
//Debug.errorln(d(pStar, s) +"<="+ 2*Math.max(rStar, rs)+"? "+(d(pStar, s) <= 2*Math.max(rStar, rs)));
if( d(pStar, s) <= 2*Math.max(rStar, rs) )
{
//List of pairs: first element moves to second element cluster.
ArrayList<Pair<Prototype,Prototype>> setStarMoves = new ArrayList<Pair<Prototype,Prototype>>();
for(Prototype x : setStar)
if( d(pStar, x) >= d(s, x) )
{
Prototype nx = setStar.nearestTo(x);
if( d(nx,x) < d(s,x) )
{
//Debug.errorln("Cambia de sitio X ("+x.getIndex()+")");
setStarMoves.add(new Pair<Prototype,Prototype>(x, nx));
//NO USAR clusters.moveTo(x, clusters.getClusterOf(nx));
}
else
{
//Debug.errorln("No hay merge. Corta nx " + nx.getIndex());
merge = false;
return false;
}
}
ArrayList<Pair<Prototype,Prototype>> setOfsMoves = new ArrayList<Pair<Prototype,Prototype>>();
for(Prototype y : setOfs)
if( d(pStar, y) <= d(s, y))
{
Prototype ny = Pk.nearestTo(y);
//Debug.errorln(d(ny, y) +" < "+ d(pStar, y));
if( d(ny, y) < d(pStar, y))
{
//Debug.errorln("Cambia de sitio Y ("+y.getIndex()+")");
setOfsMoves.add(new Pair<Prototype,Prototype>(y, ny));
//NO USAR clusters.moveTo(y, clusters.getClusterOf(ny));
}
else
{
//Debug.errorln("No hay merge. Corta ny");
merge = false;
return false;
}
}
/* Performs movements over setStar */
//Debug.errorln("Tenemos que mover en setStar: " + setStarMoves.size());
ArrayList<Prototype> movedX = new ArrayList<Prototype>();
for(Pair<Prototype,Prototype> p : setStarMoves)
{
Prototype x = p.first();
if(!movedX.contains(x))
{
movedX.add(x);
Prototype nx = p.second();
clusters.moveTo(x, clusters.getClusterOf(nx));
}
}
/* Performs movements over setS */
//Debug.errorln("Tenemos que mover en setS: " + setOfsMoves.size());
ArrayList<Prototype> movedY = new ArrayList<Prototype>();
for(Pair<Prototype,Prototype> p : setOfsMoves)
{
Prototype y = p.first();
if(!movedY.contains(y))
{
movedY.add(y);
Prototype ny = p.second();
clusters.moveTo(y, clusters.getClusterOf(ny));
}
}
}//if( d(pStar, s) <= 2*Math.max(rStar, rs) )
}//for(Prototype s : Pk)
}//if( d(pStar, sK.get(k)) <= 2 * Math.max(rStar, maxRadius) )
}//if(k != kStar)
//Debug.errorln("=======================FIN, isConsistent. Hay "+ clusters.size() + " clusters =======================");
return true;
//Proposition 4
//return isPrototypeConsistent(modified);//last condition to be tested
}
//El espíritu es el mismo, eso es lo que cuenta
@Override
public PrototypeSet reduceSet()
{
int count = 0;
int counterOfMerges = 0;
PrototypeSet V = trainingDataSet.copy();
int numClasses = V.nonVoidClasses().size();
initClusters(V);
Random r = new Random();
r.setSeed(SEED);
currentAccuracy = absoluteAccuracy(V, trainingDataSet);
do
{
counterOfMerges = 0;
//Pair<Prototype, Prototype> neighbors = R.nearestPair();
ArrayList<Pair<Cluster,Cluster>> nearest = clusters.nearestClustersWithSameClass();
int nearestSize = nearest.size();
boolean foundBetter = false;
for(int i=0; !foundBetter && i<nearestSize; ++i)
{
clusters.test(V);
Cluster Cp = nearest.get(i).first();
Cluster Cq = nearest.get(i).second();
Prototype p = Cp.getRepresentative();
Prototype q = Cq.getRepresentative();
//Debug.errorln("Antes " + clusters.size());
//Debug.errorln("Merge de " + + Cp.id + "("+ p.getIndex() + ") y "+ Cq.id +"(" + q.getIndex()+")");
Cluster mix = clusters.merge(Cp, Cq);
//Debug.errorln("Despues " + clusters.size());
//Debug.force(clusters.assignment.containsKey(m), "Mix no está");
//clusters.save();
PrototypeSet modified = new PrototypeSet(V);
modified.remove(p);
modified.remove(q);
foundBetter = isConsistent(mix, modified);
if (foundBetter)
{
Prototype avg = mix.getRepresentative();
//Debug.errorln("NUEVO CLUSTER " + mix.id + " con rep = " + mix.getRepresentative().getIndex());
clusters.assignment.put(avg, mix);
count++;
//clusters.remove(Cp);
//clusters.remove(Cq);
//clusters.add(mix);
//Debug.errorln(m.getIndex()+" tiene como cluster " + clusters.assignment.get(m).id);
R.remove(p);
R.remove(q);
R.add(avg);
//V.remove(p);
//V.remove(q);
//V.add(avg);
//currentAccuracy = absoluteAccuracy(V, trainingDataSet);
//double currentAccuracyR = absoluteAccuracy(R, trainingDataSet);
//Debug.errorln("SS V-> " + V.size() + " accur->" + currentAccuracy);
//Debug.errorln("SS R-> " + R.size() + " accur->" + currentAccuracyR);
++counterOfMerges;
if(clusters.size() == numClasses)
counterOfMerges = 0;
}
}
} while (counterOfMerges > 0);
//System.err.println(absoluteAccuracy(V, trainingDataSet) + " es la accuracy de V ("+V.size()+" prototipos)");
//System.err.println(absoluteAccuracy(R, trainingDataSet) + " es la accuracy de R ("+R.size()+" prototipos)");
//Debug.errorln("Hemos mejorado " + count +" veces");
return R;
//return V;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename wich will contain the test data set
* 3: k Number of neighbors used in the KNN function
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Debug.setStdDebugMode(false);
Parameters.setUse("GMCA", "<seed>");
Parameters.assertBasicArgs(args);
//Debug.set(false);
//Debug.setErrorDebugMode(true);
//Debug.setStdDebugMode(true);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
GMCAGenerator.setSeed(seed);
GMCAGenerator generator = new GMCAGenerator(training);
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//System.out.println(resultingSet.toString());
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test);
//Debug.errorln("Tamaño es " + resultingSet.size());
int accuracy1NN = KNN.classficationAccuracy1NN(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
//generator.showResultsOfAccuracy(accuracyKNN, accuracy1NN, KNN.k(), test);
}
}