/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
MixtGauss.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 2-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.MixtGauss;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import java.util.StringTokenizer;
/**
*
* @author Isaac Triguero
* @version 1.0
*/
public class MixtGaussGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
private int numberOfGaussians; // Number of gaussians per class. (number of prototypes in the condensed set..)
protected int numberOfPrototypes; // Particle size is the percentage
protected int numberOfClass;
/**
* Build a new PSOGenerator Algorithm
*/
public MixtGaussGenerator(PrototypeSet _trainingDataSet, int blocks, String choice)
{
super(_trainingDataSet);
algorithmName="MixtGauss";
}
/**
* Build a new MixtGaussGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public MixtGaussGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="MixtGauss";
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
this.numberOfGaussians = parameters.getNextAsInt(); // Lo tomo del patr�n..
this.numberOfPrototypes = getSetSizeFromPercentage(this.numberOfGaussians); // caluclo el porcentaje..
//Calcluo el nuevo numer de gausisanas..
// System.out.println("NUmber of prototypes = "+ this.numberOfPrototypes);
this.numberOfGaussians = this.numberOfPrototypes / this.numberOfClass;
if(this.numberOfGaussians == 0) this.numberOfGaussians = 1;
// System.out.println("NUmber of gausianas = "+ this.numberOfGaussians);
}
/**
* Return the value of PDF normal.
* @param x
* @return the value of PDF normal
*/
public double pdfNormal(double x){
double result;
result = 1/ Math.sqrt(2*Math.PI);
result *= Math.exp(-0.5*(x*x));
return result;
}
/**
* F(x) = N(x^t_k)
* @param x
* @return F(x) = N(x^t_k)
*/
public double f_x(double x, double mu, double sigma){
double result = 1./sigma;
return result*pdfNormal( (x-mu)/sigma);
}
/**
* Accuracy per class.
* @return Accuracy per class
*/
public double[] CalculateAccuracy(PrototypeSet actual){
double current_accuracy[] = new double[this.numberOfClass];
for (int i=0; i< this.numberOfClass; i++){
if(actual.getFromClass(i).size() >0){
current_accuracy[i] = accuracy(actual.getFromClass(i),trainingDataSet.getFromClass(i));
}else{
current_accuracy[i] = 0;
}
}
return current_accuracy;
}
/**
* Expectation-Maximisation Algorithm
*/
public Pair<PrototypeSet, PrototypeSet> EMstep(PrototypeSet actual, PrototypeSet SD){
//stablish the index... Por lo que pueda pasar..
for(int j=0;j<actual.size();j++){
//Now, I establish the index of each prototype.
actual.get(j).setIndex(j);
}
//Se aplica independientemente a cada clase
double pdfs[][] = new double[this.numberOfGaussians][];
double pdfsNum[][] = new double[this.numberOfGaussians][];
double sumPDFS[];
double acc = accuracy(actual, trainingDataSet);
double acc2 = acc-1;
// Initial AlfaMJ
double alfaMj[] = new double[this.numberOfGaussians];
for(int j=0; j< this.numberOfGaussians; j++){
alfaMj[j] = 1./this.numberOfGaussians;
}
for(int i=0; i<this.numberOfClass; i++){
PrototypeSet perClass = trainingDataSet.getFromClass(i);
if(perClass.size() > 0){
acc2 = acc-1; // Initially..
while(acc > acc2){ // Iterative process..
acc2 = acc;
//E step
sumPDFS = new double[perClass.size()];
// Inicializaci�n..
for(int j=0; j< this.numberOfGaussians; j++){
pdfs[j] = new double[perClass.size()]; // Save the probability Pm(xt, Cj)
pdfsNum[j] = new double[perClass.size()];
}
for(int t=0; t< perClass.size(); t++){ //Recorro cada una de las instancias de training.
sumPDFS[t] =0;
for(int j=0; j< this.numberOfGaussians; j++){
double productorio = 1;
for(int k=0; k< perClass.get(0).numberOfInputs(); k++){
double value =f_x(perClass.get(t).getInput(k), actual.getFromClass(i).get(j).getInput(k), SD.getFromClass(i).get(j).getInput(k));
// System.out.println("f_x = " + value+ " ,"+ SD.get(i).getInput(k));
productorio *= value;
}
pdfsNum[j][t] = (alfaMj[j]* productorio*1.);
sumPDFS[t] += pdfsNum[j][t];
}// Numeradores..
for(int j=0; j< this.numberOfGaussians; j++){
pdfs[j][t] = pdfsNum[j][t]/sumPDFS[t];
// System.out.println("PDF j = " +j+ " t= "+t+ " => " +pdfs[j][t]);
} // pdfs completos.
}
//M step
//Calculamos los nuevos alfaMj, y mu|Mkj
double sum =0;
for(int j=0; j< this.numberOfGaussians; j++){
sum =0;
alfaMj[j] = 1./perClass.size();
for(int t=0; t< perClass.size(); t++){
sum += pdfs[j][t];
}
alfaMj[j] *= sum;
}
//---------Calculo las medias..
PrototypeSet nuevo = new PrototypeSet(actual);
double denominator =0;
double numerator[][] = new double[perClass.get(0).numberOfInputs()][this.numberOfGaussians];
for(int k =0; k< perClass.get(0).numberOfInputs(); k++)
for(int j=0; j< this.numberOfGaussians; j++)
numerator[k][j] =0;
for(int j=0; j< this.numberOfGaussians; j++){
denominator =0;
for(int t=0; t< perClass.size(); t++){
for(int k =0; k< perClass.get(0).numberOfInputs(); k++){
numerator[k][j]+= pdfs[j][t]*perClass.get(t).getInput(k);
}
denominator += pdfs[j][t];
}
// System.out.println("Denominator = "+ denominator);
Prototype element = new Prototype(perClass.get(0));
//Construyendo la media...
for(int k =0; k< perClass.get(0).numberOfInputs(); k++){
double media = numerator[k][j]/(denominator*1.);
// System.out.print(" , "+ media);
//System.out.print(", "+ numerator[k][j]);
element.setInput(k, media);
}
//System.out.println(" ");
int index = actual.getFromClass(i).get(j).getIndex();
nuevo.set(index, element);
}
// Calculo de las sigmas.
PrototypeSet newSD = new PrototypeSet(SD);
for(int k =0; k< perClass.get(0).numberOfInputs(); k++)
for(int j=0; j< this.numberOfGaussians; j++)
numerator[k][j] =0;
for(int j=0; j< this.numberOfGaussians; j++){
denominator =0;
for(int t=0; t< perClass.size(); t++){
for(int k =0; k< perClass.get(0).numberOfInputs(); k++){
int index = actual.getFromClass(i).get(j).getIndex();
numerator[k][j]+= pdfs[j][t]* Math.pow(perClass.get(t).getInput(k)- nuevo.get(index).getInput(k),2);
}
denominator += pdfs[j][t];
}
// System.out.println("Denominator = "+ denominator);
Prototype element = new Prototype(perClass.get(0));
//Construyendo la media...
for(int k =0; k< perClass.get(0).numberOfInputs(); k++){
double media = numerator[k][j]/(denominator*1.);
// System.out.print(" , "+ media);
//System.out.print(", "+ numerator[k][j]);
element.setInput(k, media);
}
//System.out.println(" ");
int index = actual.getFromClass(i).get(j).getIndex();
SD.set(index, element);
}
// �Seguir?
acc = accuracy(nuevo , trainingDataSet);
if(acc > acc2){
//System.out.println("Mejora");
actual = new PrototypeSet(nuevo);
for(int j=0;j<actual.size();j++){
//Now, I establish the index of each prototype.
actual.get(j).setIndex(j);
}
SD = new PrototypeSet(newSD);
}
//nuevo.print();
} //End While
}//end if
}//End for
Pair <PrototypeSet, PrototypeSet> salida = new Pair<PrototypeSet,PrototypeSet> (actual,SD);
return salida;
}
/**
* Generate a reduced prototype set by the MixtGaussGenerator method.
* @return Reduced set by MixtGaussGenerator's method.
*/
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm MixtGauss is starting...\n Computing...\n");
PrototypeSet result = new PrototypeSet(); // In the condensed set, each class is represented by the same number of prototypes.
// Initialisation Process.
Prototype mean = new Prototype();
PrototypeSet SD = new PrototypeSet();
for(int i=0; i< this.numberOfClass; i++){
PrototypeSet classi = trainingDataSet.getFromClass(i);
if(classi.size() >0){
mean = classi.avg();
for(int j=0; j< this.numberOfGaussians ; j++){
Prototype Perturbance = new Prototype(mean);
Prototype sdP = new Prototype(mean);
for(int k=0; k< Perturbance.numberOfInputs(); k++){
Perturbance.setInput(k, mean.getInput(k)+RandomGenerator.Randdouble(-0.01, 0.01));
sdP.setInput(k,0.1); // Initially Sigma to 0,1
//double Rg = mean[i].getInput(k)*RandomGenerator.RandGaussian() + 0.1;
//Perturbance.setInput(k, Rg/500. + RandomGenerator.Randdouble(0, 1) ); // Normal distribution
}
result.add(Perturbance);
SD.add(sdP); // Gaussian. Mean and sigma..
}
}
}
result.applyThresholds();
// result.print();
// Iterative Optimisation
double current_accuracy[] = new double[this.numberOfClass];
current_accuracy = CalculateAccuracy(result);
double classes_improve = -1;
PrototypeSet PreviousGaussians = new PrototypeSet();
double previous_accuracy[] = new double[this.numberOfClass];
while(classes_improve !=0){
PreviousGaussians = new PrototypeSet(result);
Pair <PrototypeSet,PrototypeSet > salidaR = EMstep(PreviousGaussians, SD);
result = salidaR.first();
SD = salidaR.second();
previous_accuracy = current_accuracy;
current_accuracy = CalculateAccuracy(result);
classes_improve = 0;
for( int c=0; c< this.numberOfClass; c++){
if( current_accuracy[c] > previous_accuracy[c]){
classes_improve += 1;
}else if(current_accuracy[c] < previous_accuracy[c]){
for(int g=0; g< this.numberOfGaussians; g++){
int index = c*this.numberOfGaussians + g;
result.set(index, PreviousGaussians.get(index));
}
}
}
}
//Print Result.
PrototypeSet nominalPopulation = new PrototypeSet();
nominalPopulation.formatear(result);
System.err.println("\n% de acierto en training Nominal " + KNN.classficationAccuracy(nominalPopulation,trainingDataSet,1)*100./trainingDataSet.size() );
//result.print();
return result;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("MixtGauss", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
MixtGaussGenerator.setSeed(seed);
int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
MixtGaussGenerator generator = new MixtGaussGenerator(training,blocks , "diameter");
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}