/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
CoBC.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 11-1-2011
Copyright (c) 2008 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Semi_Supervised_Learning.CoBC;
import keel.Algorithms.Semi_Supervised_Learning.Basic.C45.*;
//import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerAdaBoost;
import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerNB;
import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerSMO;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerator;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Utilidades;
import keel.Algorithms.Semi_Supervised_Learning.*;
import java.util.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.KNN.*;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Dataset.InstanceAttributes;
import keel.Dataset.InstanceSet;
import org.core.*;
import org.core.*;
import java.util.StringTokenizer;
/**
* This class implements the Co-traning wrapper. You can use: Knn, C4.5, SMO as classifiers.
* @author triguero
*
*/
public class CoBCGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
protected int numberOfClassifiers;
private int MaxIter;
protected int numberOfPrototypes; // Particle size is the percentage
protected int numberOfClass;
/** Parameters of the initial reduction process. */
private String[] paramsOfInitialReducction = null;
int pre[][];
double [][][] probabilities;
String EnsembleLearn = "Bagging";
String BaseLearn = "NN";
int poolU = 100;
/**
* Build a new CoBCGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param perc Reduction percentage of the prototype set.
*/
public CoBCGenerator(PrototypeSet _trainingDataSet, int neigbors,int poblacion, int perc, int iteraciones, double c1, double c2, double vmax, double wstart, double wend)
{
super(_trainingDataSet);
algorithmName="CoBC";
}
/**
* Build a new CoBCGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param unlabeled Original unlabeled prototype set for SSL.
* @param params Parameters of the algorithm (only % of reduced set).
*/
public CoBCGenerator(PrototypeSet t, PrototypeSet unlabeled, PrototypeSet test, Parameters parameters)
{
super(t,unlabeled, test, parameters);
algorithmName="CoBC";
this.MaxIter = parameters.getNextAsInt();
this.numberOfClassifiers = parameters.getNextAsInt();
this.EnsembleLearn = parameters.getNextAsString();
this.BaseLearn = parameters.getNextAsString();
this.poolU = parameters.getNextAsInt();
pre = new int[this.numberOfClassifiers][];
probabilities = new double[this.numberOfClassifiers][][];
//Last class is the Unknown
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.print("\nIsaacSSL dice: " + this.numberOfClass +"\n");
}
/**
* This methods implement the voting rule in order to classify unlabeled data with the prediction pre[][]
* @param unlabeled
* @param pre
* @return
*/
double [] votingRule(PrototypeSet unlabeled, int pre[][]){
double predicho[] = new double[unlabeled.size()];
for(int i=0; i< unlabeled.size(); i++){ // voting rule
double perClass[] = new double [this.numberOfClass];
Arrays.fill(perClass, 0);
for(int j=0; j< this.numberOfClassifiers; j++){
if(pre[j][i]!=-1)
perClass[(int) pre[j][i]]++;
}
int Maximo = Integer.MIN_VALUE;
for (int j=0 ; j< this.numberOfClass; j++){
if(perClass[j]>Maximo){
Maximo =(int) perClass[j];
predicho[i] = j;
}
}
} // End voting Rule
return predicho;
}
/**
* Classify
* @param train
* @param test
* @throws Exception
*/
public double[] clasificar(PrototypeSet train[], PrototypeSet test) throws Exception{
double predicho[] = new double[test.size()];
for (int i=0; i<this.numberOfClassifiers; i++){
getSolicitaGarbageColector();
if(this.BaseLearn.equalsIgnoreCase("NN")){ // 3NN
// System.out.println("Executing KNN");
pre[i] = KNN.classify(train[i], test, 3, probabilities[i]);
}else if(this.BaseLearn.equalsIgnoreCase("NB")){ // NB
//System.out.println("Executing NB");
HandlerNB nb = new HandlerNB(train[i].prototypeSetTodouble(), train[i].prototypeSetClasses(), test.prototypeSetTodouble(), test.prototypeSetClasses(),this.numberOfClass);
pre[i] = nb.getPredictions();
probabilities[i] = nb.getProbabilities();
nb = null;
}else if(this.BaseLearn.equalsIgnoreCase("C45")){ //C45
//System.out.println("Executing C45");
InstanceSet uno = train[i].toInstanceSet();
InstanceSet dos = test.toInstanceSet();
C45 c45 = new C45(train[i].toInstanceSet(), test.toInstanceSet()); // C4.5 called
pre[i] = c45.getPredictions();
probabilities[i] = c45.getProbabilities();
uno = null;
dos = null;
c45 = null;
}else if(this.BaseLearn.equalsIgnoreCase("SMO")){ //SMO
InstanceSet uno = train[i].toInstanceSet();
InstanceSet dos = test.toInstanceSet();
HandlerSMO SMO = new HandlerSMO(uno,dos, this.numberOfClass,String.valueOf(this.SEED)); // SMO
pre[i] = SMO.getPredictions(0);
probabilities[i] = SMO.getProbabilities();
// probabilities = SMO.getProbabilities();
uno = null;
dos = null;
SMO = null;
}
getSolicitaGarbageColector();
}
predicho = votingRule(test, pre); // in predicho we have the possible label, but we have to contrast this information with the confidence level.
return predicho;
}
/**
*
* @param Labeled
* @throws Exception
*/
public double [] EnsembleLearn(PrototypeSet Labeled[], PrototypeSet test) throws Exception{
double predicho[] = new double[test.size()];
if(this.EnsembleLearn.equalsIgnoreCase("Bagging")){
for(int i=0; i< this.numberOfClassifiers; i++){
getSolicitaGarbageColector();
if(this.BaseLearn.equalsIgnoreCase("NN")){ // 3NN
// System.out.println("Executing KNN");
pre[i] = KNN.classify(Labeled[i], test, 3, probabilities[i]);
}else if(this.BaseLearn.equalsIgnoreCase("NB")){ // NB
//System.out.println("Executing NB");
HandlerNB nb = new HandlerNB(Labeled[i].prototypeSetTodouble(), Labeled[i].prototypeSetClasses(), test.prototypeSetTodouble(), test.prototypeSetClasses(),this.numberOfClass);
pre[i] = nb.getPredictions();
probabilities[i] = nb.getProbabilities();
nb = null;
}else if(this.BaseLearn.equalsIgnoreCase("C45")){ //C45
//System.out.println("Executing C45");
InstanceSet uno = Labeled[i].toInstanceSet();
InstanceSet dos = test.toInstanceSet();
C45 c45 = new C45(uno, dos); // C4.5 called
pre[i] = c45.getPredictions();
probabilities[i] = c45.getProbabilities();
uno = null;
dos = null;
c45 = null;
}else if(this.BaseLearn.equalsIgnoreCase("SMO")){ //SMO
InstanceSet uno = Labeled[i].toInstanceSet();
InstanceSet dos = test.toInstanceSet();
HandlerSMO SMO = new HandlerSMO(uno,dos, this.numberOfClass,String.valueOf(this.SEED)); // SMO
pre[i] = SMO.getPredictions(0);
probabilities[i] = SMO.getProbabilities();
// probabilities = SMO.getProbabilities();
uno = null;
dos = null;
SMO = null;
}
getSolicitaGarbageColector();
} // End training.
predicho = votingRule(test, pre); // in predicho we have the possible label, but we have to contrast this information with the confidence level.
}else if(this.EnsembleLearn.equalsIgnoreCase("AdaBoost")){
/*
for(int i=0; i< this.numberOfClassifiers; i++){
getSolicitaGarbageColector();
HandlerAdaBoost adaboost = new HandlerAdaBoost(Labeled[i].toInstanceSet(), test.toInstanceSet(), this.numberOfClassifiers, this.numberOfClass);
predicho = adaboost.getPredictions();
// probabilities[i] = adaboost.getProbabilities();
adaboost = null;
getSolicitaGarbageColector();
}
*/
}
return predicho;
}
public void getSolicitaGarbageColector(){
try{
// System.out.println( "********** INICIO: 'LIMPIEZA GARBAGE COLECTOR' **********" );
Runtime basurero = Runtime.getRuntime();
// System.out.println( "MEMORIA TOTAL 'JVM': " + basurero.totalMemory() );
// System.out.println( "MEMORIA [FREE] 'JVM' [ANTES]: " + basurero.freeMemory() );
basurero.gc(); //Solicitando ...
// System.out.println( "MEMORIA [FREE] 'JVM' [DESPUES]: " + basurero.freeMemory() );
//System.out.println( "********** FIN: 'LIMPIEZA GARBAGE COLECTOR' **********" );
}
catch( Exception e ){
e.printStackTrace();
}
}
/**
* Apply the CoBCGenerator method with 3 classifiers: C45, NB, and 3NN
*
* @return
*/
public Pair<PrototypeSet, PrototypeSet> applyAlgorithm() throws Exception
{
System.out.print("\nThe algorithm CoBC is starting...\n Computing...\n");
PrototypeSet labeled;
PrototypeSet unlabeled;
labeled = new PrototypeSet(trainingDataSet.getAllDifferentFromClass(this.numberOfClass)); // Selecting labeled prototypes from the training set.
unlabeled = new PrototypeSet(trainingDataSet.getFromClass(this.numberOfClass));
// establishing the indexes
for (int j=0; j< labeled.size();j++){
labeled.get(j).setIndex(j);
}
for (int j=0; j< unlabeled.size();j++){
unlabeled.get(j).setIndex(j);
}
// In order to avoid problems with C45 and NB.
for(int p=0; p<unlabeled.size(); p++){
unlabeled.get(p).setFirstOutput(0); // todos con un valor válido.
}
// 1), 2) Get the class priori probabilities, and the class growth rate.
double kj[] = new double[this.numberOfClass];
double minimo = Double.MAX_VALUE;
for(int i=0; i<this.numberOfClass; i++){
if(labeled.getFromClass(i).size() == 0){
kj[i] = 0;
}else{
kj[i] = (labeled.getFromClass(i).size()*1./labeled.size());
}
if(kj[i]<minimo && kj[i]!=0){
minimo = kj[i];
}
//System.out.println(kj[i]);
}
// The minimum ratio is establish to this.numberOfselectedExamples
// We have to determine the maximu kj[i]
double maximoKj = 0;
for(int i=0; i<this.numberOfClass; i++){
kj[i] = Math.round(kj[i]/minimo);
maximoKj+=kj[i];
//System.out.println((int)kj[i]);
}
// 3) construct an initial committee of N classifiers, Ensemble Learn (L,BaseLearner, N),
PrototypeSet labeledBoostrapped[] = new PrototypeSet[this.numberOfClassifiers]; // for each classiffier.
for(int i=0; i< this.numberOfClassifiers; i++){
labeledBoostrapped[i] = new PrototypeSet(labeled.resample()); //L_i <-- Bootstrap(L)
}
PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone());
PrototypeSet test = new PrototypeSet(this.testDataSet.clone());
double traPrediction[] = null;
double tstPrediction[] = null;
int aciertoTrs = 0;
int aciertoTst = 0;
probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass];
traPrediction = clasificar(labeledBoostrapped, tranductive);
for(int i=0; i<tranductive.size(); i++){
if(tranductive.get(i).getOutput(0) == traPrediction[i]){
aciertoTrs++;
}
tranductive.get(i).setFirstOutput(traPrediction[i]);
}
// Test phase
probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass];
tstPrediction = clasificar(labeledBoostrapped, test);
for(int i=0; i<test.size(); i++){
if(test.get(i).getOutput(0) == tstPrediction[i]){
aciertoTst++;
}
test.get(i).setFirstOutput(tstPrediction[i]);
}
System.out.println("Initial Labeled size "+ labeledBoostrapped[1].size());
System.out.println("Initial % de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size());
System.out.println("Initial% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size());
// 6) Starting the learning process.
for (int i=0; i<this.MaxIter && unlabeled.size()>maximoKj; i++){
PrototypeSet labeledPrima[] = new PrototypeSet[this.numberOfClassifiers]; // 7) L't <- 0
double confidence[][] = new double[unlabeled.size()][this.numberOfClass];
PrototypeSet UnlabeledPrima[] = new PrototypeSet[this.numberOfClassifiers];
for(int j=0; j< this.numberOfClassifiers && unlabeled.size()>maximoKj; j++){ // for each committee.
labeledPrima[j] = new PrototypeSet();
UnlabeledPrima[j] = new PrototypeSet(); // 10) RandomSubsample (U, poolU).
unlabeled.randomize(); //shufflte the unlabeled set.
//System.out.println("UNLabeled size BEFORE = "+unlabeled.size());
for(int z=0; z< this.poolU && z< unlabeled.size(); z++){
Prototype remove = unlabeled.remove(z);
UnlabeledPrima[j].add(remove);
}
for (int z=0; z< UnlabeledPrima[j].size();z++){
UnlabeledPrima[j].get(z).setIndex(z);
}
// System.out.println("UNLabeled size AFTER = "+unlabeled.size());
// 11) Classify UnlabeledPrima with the ensembleLearn...
probabilities = new double[this.numberOfClassifiers][UnlabeledPrima[j].size()][this.numberOfClass];
double prediction[] = EnsembleLearn(labeledBoostrapped,UnlabeledPrima[j]);
PrototypeSet S = new PrototypeSet(); // most compent unlabeledprima data.
// 12) Select most confident examples
double maximoClase[][] = new double[this.numberOfClass][];
int indexClase[][] = new int[this.numberOfClass][];
indexClase = new int[this.numberOfClass][];
for (int z=0 ; z< this.numberOfClass; z++){
// maximoClase[j] = new double[(int) kj[j]];
indexClase[z] = new int[(int) kj[z]];
//Arrays.fill(maximoClase[j], Double.MIN_VALUE);
Arrays.fill(indexClase[z], -1);
}
for (int z=0; z< this.numberOfClass; z++){
// for each class, ordenar vector de prob.
double [] aOrdenar = new double[UnlabeledPrima[j].size()];
int [] position = new int [UnlabeledPrima[j].size()];
for(int q=0;q<UnlabeledPrima[j].size(); q++){
aOrdenar[q] = probabilities[j][q][z];
position[q] = q;
}
Utilidades.quicksort(aOrdenar, position); // orden ascendente!
/*
for(int q=0; q<unlabeled.size(); q++){
System.out.print(position[q]+", ");
}
*/
//System.out.println(" ");
for(int w=UnlabeledPrima[j].size()-1; w>=UnlabeledPrima[j].size()-kj[z] && w>=0;w--){
indexClase[z][(UnlabeledPrima[j].size()-1)-w] = position[w];
}
}
// IndexClase point out to the best prototypes
// Set U' = U'-S, and L= L U S.
PrototypeSet toClean = new PrototypeSet();
for (int z=0 ; z< this.numberOfClass; z++){
//if(contadorClase[j]< kj[j]){
for(int w=0; w<kj[z];w++){
//From classifier 1.
if(indexClase[z][w]!=-1){
Prototype nearUnlabeled = new Prototype(UnlabeledPrima[j].get(indexClase[z][w]));
if(this.BaseLearn.equalsIgnoreCase("NN")){
Prototype clase = labeled.nearestTo(nearUnlabeled);
nearUnlabeled.setFirstOutput(clase.getOutput(0));
if(clase.getOutput(0)==j){
labeledPrima[j].add(new Prototype(nearUnlabeled));
}else{
toClean.add(UnlabeledPrima[j].get(indexClase[z][w]));
}
// contadorClase[(int)clase.getOutput(0)]++;
}else if(this.BaseLearn.equalsIgnoreCase("C45") || this.BaseLearn.equalsIgnoreCase("NB") || this.BaseLearn.equalsIgnoreCase("SMO")){
nearUnlabeled.setFirstOutput(pre[j][indexClase[z][w]]);
if(pre[j][indexClase[z][w]]==j){
labeledPrima[j].add(new Prototype(nearUnlabeled));
}else{
toClean.add(UnlabeledPrima[j].get(indexClase[z][w]));
}
// contadorClase[pre[indexClase[j]]]++;
}
}
}
}
//Then we have to clean the unlabeled have to clean.
for (int z=0 ; z< labeledPrima[j].size(); z++){
//unlabeled.removeWithoutClass(labeledPrima.get(j));
UnlabeledPrima[j].borrar(labeledPrima[j].get(z).getIndex());
}
for (int z=0 ; z<toClean.size(); z++){
// unlabeled.remove(toClean.get(j));
UnlabeledPrima[j].borrar(toClean.get(z).getIndex());
}
// Replenish U with the rest of prototypes of U'.
for(int z=0; z< UnlabeledPrima[j].size();z++){
unlabeled.add(UnlabeledPrima[j].get(z));
}
} // End For (for each committee).
for(int j=0; j< this.numberOfClassifiers; j++){
// System.out.println("Added Li prototypes: "+ labeledPrima[j].size());
if(labeledPrima[j]!=null) labeledBoostrapped[j].add(labeledPrima[j]); // It is possible that there are not prototypes in U, so labeledPrima is null
}
// update fitness:
tranductive = new PrototypeSet(this.transductiveDataSet.clone());
test = new PrototypeSet(this.testDataSet.clone());
traPrediction = null;
tstPrediction = null;
aciertoTrs = 0;
aciertoTst = 0;
/*
probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass];
traPrediction = clasificar(labeledBoostrapped, tranductive);
for(int m=0; m<tranductive.size(); m++){
if(tranductive.get(m).getOutput(0) == traPrediction[m]){
aciertoTrs++;
}
tranductive.get(m).setFirstOutput(traPrediction[m]);
}
// Test phase
probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass];
tstPrediction = clasificar(labeledBoostrapped, test);
for(int m=0; m<test.size(); m++){
if(test.get(m).getOutput(0) == tstPrediction[m]){
aciertoTst++;
}
test.get(m).setFirstOutput(tstPrediction[m]);
}
System.out.println("update Labeled size "+ labeledBoostrapped[1].size());
System.out.println("update - % de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size());
System.out.println("update -% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size());
*/
} // end FOR iterations.
// Combining stage.
/*
PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone());
PrototypeSet test = new PrototypeSet(this.testDataSet.clone());
double traPrediction[] = null;
double tstPrediction[] = null;
int aciertoTrs = 0;
int aciertoTst = 0;
*/
tranductive = new PrototypeSet(this.transductiveDataSet.clone());
test = new PrototypeSet(this.testDataSet.clone());
traPrediction = null;
tstPrediction = null;
aciertoTrs = 0;
aciertoTst = 0;
probabilities = new double[this.numberOfClassifiers][tranductive.size()][this.numberOfClass];
traPrediction = clasificar(labeledBoostrapped, tranductive);
for(int i=0; i<tranductive.size(); i++){
if(tranductive.get(i).getOutput(0) == traPrediction[i]){
aciertoTrs++;
}
tranductive.get(i).setFirstOutput(traPrediction[i]);
}
// Test phase
probabilities = new double[this.numberOfClassifiers][test.size()][this.numberOfClass];
tstPrediction = clasificar(labeledBoostrapped, test);
for(int i=0; i<test.size(); i++){
if(test.get(i).getOutput(0) == tstPrediction[i]){
aciertoTst++;
}
test.get(i).setFirstOutput(tstPrediction[i]);
}
System.out.println("Labeled size "+ labeledBoostrapped[1].size());
System.out.println("Final - % de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size());
System.out.println("Final -% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size());
return new Pair<PrototypeSet,PrototypeSet>(tranductive,test);
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{ }
}