/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
MSE.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 5-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.MSE;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import org.core.*;
import java.util.StringTokenizer;
/**
*
* @param numberOfInitialsCentroids
* @param k, to use with knn rule in the initialization.
* @param GradientStep
* @param Temperature
* @author Isaac Triguero
* @version 1.0
*/
public class MSEGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
// We need the variable K to use with k-NN rule
private int k;
private int numberOfInitialCentroids;
private double GradientStep;
private double Temperature;
//others variables.
protected int numberOfPrototypes;
protected int numberOfClass;
/**
* Build a new MSEGenerator Algorithm
*/
public MSEGenerator(PrototypeSet _trainingDataSet, int k, int centroid, double gradStep, double temp)
{
super(_trainingDataSet);
algorithmName="MSE";
this.k = k;
this.numberOfInitialCentroids = centroid;
this.GradientStep = gradStep;
this.Temperature = temp;
}
/**
* Build a new RSPGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public MSEGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="MSE";
this.k = parameters.getNextAsInt();
this.numberOfInitialCentroids = parameters.getNextAsInt();
this.GradientStep = parameters.getNextAsDouble();
this.Temperature =parameters.getNextAsDouble();
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.println("Isaac dice: k= " + this.k + " cent = " + this.numberOfInitialCentroids + " gs= " + this.GradientStep + " t ="+ this.Temperature);
System.out.println("Number of class= "+ this.numberOfClass);
}
/**
* Zindex probability, di = x - conjuntoi
* @param x
* @param conjunto
* @param index
* @return
*/
protected double probabilityBelongCluster(Prototype x, PrototypeSet conjunto, int index){
double dist = 0.0;
double numerator;
double denominator =0.0;
for(Prototype p: conjunto){
dist = Distance.d(x, p);
// dist*=dist; //dist^2
denominator += Math.exp( -dist/ this.Temperature);
}
dist = Distance.d(x, conjunto.get(index));
//dist=dist*dist; //dist^2
numerator = Math.exp( -dist/ this.Temperature);
//System.out.println(" ziX = "+ numerator/denominator);
return (numerator/denominator);
}
/**
* Desired Probabilities.
* @param x
* @param conjunto
* @param index
* @return
*/
protected double desiredProbabilities(Prototype x, PrototypeSet conjunto, int index){
double output = 0.0; // probabilityBelongCluster(x, conjunto, index);
double dist = 0;
double numerator;
double denominator =0.0;
//if not has the same class
if(x.getOutput(0) != (conjunto.get(index)).getOutput(0)){
output = 0;
}else{
for(Prototype p: conjunto){
if(x.getOutput(0) == p.getOutput(0)){
dist = Distance.d(x, p);
// dist*=dist; //dist^2
denominator += Math.exp( -dist/ this.Temperature);
}
}
dist = Distance.d(x, conjunto.get(index));
//dist=dist*dist; //dist^2
numerator = Math.exp( -dist/ this.Temperature);
output = numerator /denominator;
}
//System.out.println(" ziX* = "+ output);
return output;
}
/**
* Calculate the cost function.
* @param X
* @param conjunto (vectors)
* @return
*/
protected double costFunction (Prototype X, PrototypeSet conjunto){
double coste = 0.0;
double term1, term2;
for(int i=0; i< conjunto.size(); i++){
term1 =desiredProbabilities(X,conjunto,i);
term2 = probabilityBelongCluster(X,conjunto, i);
coste += (term1-term2) * (term1-term2);
}
coste = coste*0.5*this.Temperature;
return coste;
}
/**
* Correct the position of the prototype function.
* @param X , prototype front Training Set.
* @param i, prototype to correct
* @param tData
*/
protected void modifyLocation(Prototype X, PrototypeSet vectors, Prototype lastIncrements[], int index){
//double increment =0.0;
// Pi(t+1) = Pi(t) + APi(t+1).
double gradient = 0;
double sumatory = 0;
double sigmaij= 0;
PrototypeSet tData = trainingDataSet; //Soft copy
//(X- Pi).
//System.out.println("******");
Prototype diference = X.sub(vectors.get(index));
// diference.print();
diference = diference.mul(probabilityBelongCluster(X,vectors,index));
//diference.print();
// System.out.println("Gradient = " +this.GradientStep);
diference = diference.mul(this.GradientStep);
// diference.print();
// System.out.println("******");
//Gradient = (X-Pi) *Zi(X) * Sumj (Zj*(X) - Zj(X)) * (sigmaij - Zj(X))
// gradient = probabilityBelongCluster(X,vectors,index); //Distance.d(X, vectors.get(index)) *
//System.out.println("ZiX = " + gradient);
for(int j= 0; j<vectors.size(); j++){
double calc;
calc= (desiredProbabilities(X,vectors,j) -probabilityBelongCluster(X,vectors,j));
//Kronocker Delta
if(index == j){
sigmaij = 1;
}else{
sigmaij = 0;
}
calc *= (sigmaij - probabilityBelongCluster(X,vectors,j));
//System.out.println("Diferencia de Sumatoria= " + calc );
sumatory += calc;
}
//System.out.println("Sumatoria = "+ sumatory);
//gradient *= this.GradientStep;
// System.out.println("Gradiente = "+ gradient);
Prototype Increment = diference.mul(sumatory);
// mu = 0.9, I write only 0.9
Prototype MuLastIncrement = (lastIncrements[index]).mul(0.9);
Prototype IncrementFinal =Increment.add(MuLastIncrement);
IncrementFinal.applyThresholds();
//IncrementFinal.print();
//(lastIncrements[index]).print();
//increment =( this.GradientStep*gradient) + 0.9*lastIncrements[index];
//System.out.println("Increment= " + increment);
//add the increment: Pi(t+1) = Pi(t) + LastIncrement
((Prototype)vectors.get(index)).set( ((Prototype)vectors.get(index)).add(IncrementFinal));
//Modify the increment.
lastIncrements[index].set(IncrementFinal);
}
/**
* Initialize the output data set
* @return Initial prototypeSet
*/
@SuppressWarnings("unchecked")
protected PrototypeSet initDataSet()
{
/*
* Three steps.
* 1) K-means.
* 2) Elimination rule Kohonen.
* 3) Elimination Rule Van de Merckt.
*/
PrototypeSet initial = new PrototypeSet();
//Clustering: Calling k-means with trainingDataSet and the numberOfIniticialCentroid
// For each class we applies the standard k-means.
LinkedList clusters = new LinkedList();
/*
* la idea es ejecutar el k-medias C veces, siendo C el n�mero de clases. Coges todos los
ejemplos de la clase 1 por separado, y ejecutas k-medias con un valor k entre 10 y 20
(depender� del n�mero de ejemplos que haya en esa clase). Te quedas con los k centroides
finales y repites el proceso con la segunda clase. As� en todas las clases.
*/
for(int i= 0; i< this.numberOfClass; i++){
PrototypeSet conjunto = trainingDataSet.getFromClass(i).clone();
if(conjunto.size()>=this.numberOfInitialCentroids){ //SOLUTION TO FAIL
//System.out.println(" conjunto size = " + conjunto.size());
PrototypeSet centroid = new PrototypeSet();
double conjunto2[][];
conjunto2 = conjunto.prototypeSetTodouble();
double center[][];
center = new double [this.numberOfInitialCentroids][conjunto2[0].length];
// System.out.println("Calculados los centroides aleatorios!");
int clusteres[] =centroid.Cmeans (conjunto2, this.numberOfInitialCentroids, center);
clusters.add(clusteres); //i,
//CENTER has been modified.
centroid.doubleToprototypeSet(center,i);
//centroid.print();
initial.add(centroid);
}
}
//System.out.println("Calculados clusters! Initial size= " + initial.size());
//--End clustering.
//initial.print();
//k = 20;
// Elimination rule kohonen
int majority = this.k / 2 + 1;
//System.out.println("Mayor�a " + majority);
int toClean[] = new int [initial.size()];
Arrays.fill(toClean, 0);
int pos =0;
for(Prototype q : initial){
double class_q = q.getOutput(0);
//double class_q = clusters[pos];
PrototypeSet neighbors=KNN.knn(q, trainingDataSet, this.k);
int counter= 0;
for(Prototype q1 :neighbors ){
double class_q1 = q1.getOutput(0);
if(class_q1 == class_q){
counter++;
}
}
//System.out.println("Misma clase = "+ counter);
if ( counter < majority){ // We must eliminate this prototype.
/* if(!initial.remove(q)){
System.err.println("Ocurred an error to clean");
}*/
toClean [pos] = 1; // we will clean
}
pos++;
}
//Now we clean.
/*
for(int proto: toClean){
if(proto == 1){
initial.remove(proto);
}
}*/
//Clean the prototypes.
PrototypeSet aux= new PrototypeSet();
for(int i= 0; i< toClean.length;i++){
if(toClean[i] == 0)
aux.add(initial.get(i));
}
//initial = initial.without(aux);
initial = aux.clone();
//--End elimination rule kohonen.
// Van de Merckt elimination rule.
/* Para hacer esto, los prototipos se reordenan en funci�n del tama�o de su correspondiente cluster.
*- Tercero, una segunda regla para eliminar propuesta por Van de Merckt descarta prototipos
* redundantes. (Aquellos que se pueden eliminar sin que decrezca la calidad de clasificaci�n en
* el TS.) Para hacer esto, los prototipos se reordenan en funci�n del tama�o de su correspondiente
* cluster. (n�mero de patrones atra�dos en el TS)., y a continuaci�n se en ese orden creciente
* se calcula el �ndice de clasificaci�n en Training, poniendo y quitando prototipos.
Los prototipos que no contribuyen a una mejora se eliminan.*/
//First, Prototypes are sorted as functions of the size of therir clusters.
// sort descending order b distance of an instance to this nearest unlike neigbor.
//Calculo el n�mero de prototipos en cada cluster, por clase.
/* int counter[] = new int[this.numberOfInitialCentroids];
for (int i= 0; i< this.numberOfClass ; i++){
Arrays.fill(counter, 0);
for(int j= 0; j< this.numberOfInitialCentroids*this.numberOfClass; j++){
//System.out.println( j+i*this.numberOfInitialCentroids);
//System.out.println(((int[])clusters.get(i))[j+i*this.numberOfInitialCentroids]);
counter[((int[])clusters.get(i))[j]]++;
}
PrototypeSet aux = new PrototypeSet();
for(int k=0; k< this.numberOfInitialCentroids; k++){
aux.add((Prototype)initial.get(k+i*this.numberOfInitialCentroids));
}
for(int k=0; k< this.numberOfInitialCentroids; k++){
Pair<PrototypeSet,Integer> ordenar = new Pair<PrototypeSet,Integer>((Prototype)initial.get(k+i*this.numberOfInitialCentroids),counter[k]);
}
}
*/
/* for (int k = 0; k < initial.size(); k++)
{
for (int j = 0; j < initial.size()-1; j++)
{
if(DistunlikeNeighbor[result.get(j).getIndex()] > DistunlikeNeighbor[result.get(j+1).getIndex()] ){
Prototype aux = result.get(j);
result.set(j, result.get(j+1));
result.set(j+1,aux);
}
}
}
*/
// Following, the classification rate is compute on the TS in both presece and absece of each prototype,
// if there is no modification, we eliminated this prototype.
boolean marcas[];
marcas = new boolean[initial.size()];
Arrays.fill(marcas, true);
double accuracyInic =KNN.classficationAccuracy(initial, trainingDataSet);
double accuracy;
for(int i=0; i< initial.size(); i++){
marcas[i] = false; //At the begining you don't think you can elimante.
PrototypeSet leaveOneOut = initial.without(initial.get(i));
accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet);
if(accuracy > accuracyInic){
marcas[i] = true; // we can eliminate
}
}
//Then we create the result set..
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< marcas.length; i++){
if(!marcas[i]){
clean.add(initial.get(i));
}
}
//--Endd Van de Merckt elimination rule.
System.out.println("Initial size = " + initial.size());
System.out.println("Clean size = " + clean.size());
return clean;
// return super.selecRandomSet(numberOfPrototypesGenerated, true);
}
/**
* Generate a reduced prototype set by the RSPGenerator method.
* @return Reduced set by RSPGenerator's method.
*/
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm is starting...\n Computing...\n");
System.out.println("Number of class "+ this.numberOfClass);
PrototypeSet outputDataSet = initDataSet();
System.out.println("Accuracy % " +accuracy(outputDataSet,trainingDataSet));
System.out.println("Reduction % " + (100-(outputDataSet.size()*100)/trainingDataSet.size()) );
int dsort[] = new int [trainingDataSet.size()];
inic_vector(dsort);
desordenar_vector(dsort);
int it=0;
double error = Double.POSITIVE_INFINITY;
double newError = 0;
Prototype increments[] = new Prototype[outputDataSet.size()];
// Arrays.fill(increments, 0); // initially there wasn't Increments.
for(int i=0; i< increments.length; i++){
increments[i] = new Prototype (trainingDataSet.get(0).numberOfInputs(),1);
for(int j=0; j< increments[i].numberOfInputs(); j++){
increments[i].setInput(j, 0);
}
}
boolean cambio = true;
while(cambio)
{
cambio = false;
//Debug.errorln("Iteration " + it);
Prototype instance = trainingDataSet.get(dsort[it% trainingDataSet.size()]);
for ( int i= 0; i< outputDataSet.size(); i++){
modifyLocation(instance, outputDataSet, increments,i);
}
newError = costFunction (instance, outputDataSet);
// if there is a stabilisation of the error function or n(t) is too small
// we finish.
if ( newError < error || this.GradientStep == 0){
cambio = true;
error = newError;
}
++it;
//Deterministic annealing.
this.GradientStep *= 0.5;
this.Temperature = 0.9*this.Temperature;// - 0.1* this.Temperature;
}
//Checking all values is in the interval [0,1]
outputDataSet.applyThresholds();
// 7. Detect and eliminate the inactive prototypes.
System.out.println("Iterations = "+ it);
System.out.println("Accuracy % " +accuracy(outputDataSet,trainingDataSet));
System.out.println("Reduction % " + (100-(outputDataSet.size()*100)/trainingDataSet.size()) );
boolean marcas[];
marcas = new boolean[outputDataSet.size()];
Arrays.fill(marcas, true);
double accuracyInic =KNN.classficationAccuracy(outputDataSet, trainingDataSet);
double accuracy;
for(int i=0; i< outputDataSet.size(); i++){
marcas[i] = false; //At the begining you don't think you can elimante.
PrototypeSet leaveOneOut = outputDataSet.without(outputDataSet.get(i));
accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet);
if(accuracy > accuracyInic){
marcas[i] = true; // we can eliminate
}
}
//Then we create the result set..
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< marcas.length; i++){
if(!marcas[i]){
clean.add(outputDataSet.get(i));
}
}
System.out.println("Accuracy % " +accuracy(clean,trainingDataSet));
System.out.println("Reduction % " + (100-(clean.size()*100)/trainingDataSet.size()) );
return outputDataSet;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("MSE", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
MSEGenerator.setSeed(seed);
int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
MSEGenerator generator = new MSEGenerator(training, 3,20,0.05,50);
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}