/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
SNNRCE.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 11-1-2011
Copyright (c) 2008 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Semi_Supervised_Learning.SNNRCE;
import keel.Algorithms.Semi_Supervised_Learning.Basic.NormalDistribution;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerator;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Semi_Supervised_Learning.*;
import java.util.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.KNN.*;
import keel.Dataset.Attributes;
import org.core.*;
import org.core.*;
import java.util.StringTokenizer;
/**
* This class implements the Self-traning wrapper. You can use: Knn, C4.5, SMO and Ripper as classifiers.
* @author triguero
*
*/
public class SNNRCEGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
private int numberOfselectedExamples;
private String classifier;
private double threshold;
protected int numberOfPrototypes; // Particle size is the percentage
protected int numberOfClass;
/** Parameters of the initial reduction process. */
private String[] paramsOfInitialReducction = null;
/**
* Build a new SNNRCEGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param perc Reduction percentage of the prototype set.
*/
public SNNRCEGenerator(PrototypeSet _trainingDataSet, int neigbors,int poblacion, int perc, int iteraciones, double c1, double c2, double vmax, double wstart, double wend)
{
super(_trainingDataSet);
algorithmName="SNNRCE";
}
/**
* Build a new SNNRCEGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param unlabeled Original unlabeled prototype set for SSL.
* @param params Parameters of the algorithm (only % of reduced set).
*/
public SNNRCEGenerator(PrototypeSet t, PrototypeSet unlabeled, PrototypeSet test, Parameters parameters)
{
super(t,unlabeled, test, parameters);
algorithmName="SNNRCE";
this.numberOfselectedExamples = parameters.getNextAsInt();
this.threshold = parameters.getNextAsDouble();
//Last class is the Unknown
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.print("\nIsaacSSL dice: " + this.numberOfselectedExamples+ ", "+ this.numberOfClass +"\n");
}
/**
* Apply the SelfTrainingGenerator method.
* @return
*/
public Pair<PrototypeSet, PrototypeSet> applyAlgorithm()
{
System.out.print("\nThe algorithm SELF TRAINING is starting...\n Computing...\n");
PrototypeSet labeled;
PrototypeSet unlabeled;
labeled = new PrototypeSet(trainingDataSet.getAllDifferentFromClass(this.numberOfClass)); // Selecting labeled prototypes from the training set.
unlabeled = new PrototypeSet(trainingDataSet.getFromClass(this.numberOfClass));
// Accuracy with initial labeled data.
System.out.println("AccTrs with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.transductiveDataSet,1)*100./this.transductiveDataSet.size());
System.out.println("AccTst with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.testDataSet,1)*100./this.testDataSet.size());
// System.out.println("AccTrs with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.transductiveDataSet,1)*100./this.transductiveDataSet.size());
//System.out.println("AccTst with initial labeled data ="+ KNN.classficationAccuracy(labeled,this.testDataSet,1)*100./this.testDataSet.size());
//labeled.print();
//unlabeled.print();
System.out.println("Labeled size = " +labeled.size());
System.out.println("Unlabeled size = " + unlabeled.size());
// kj is the number of prototypes added from class j, that it must be propornotional to its ratio.
//First step: proportion of prototypes of class y.
double kj[] = new double[this.numberOfClass];
double proportion[] = new double[this.numberOfClass];
double minimo = Double.MAX_VALUE;
for(int i=0; i<this.numberOfClass; i++){
if(labeled.getFromClass(i).size() == 0){
proportion[i] = 0;
}else{
proportion[i] = (labeled.getFromClass(i).size()*1./labeled.size());
}
if(proportion[i]<minimo && proportion[i]!=0){
minimo = proportion[i];
}
//System.out.println(kj[i]);
}
double maximoKj = 0;
// The minimum ratio is establish to this.numberOfselectedExamples
for(int i=0; i<this.numberOfClass; i++){
kj[i] = Math.round(proportion[i]/minimo);
// System.out.println(kj[i]);
maximoKj+=kj[i];
}
// System.out.println("maximo = "+ maximoKj);
// Step 3: Construction of a neighborhodod graph for each unlabeled example.
//Construction of a neighborhood graph
boolean adjacencia [][] = new boolean[unlabeled.size()][labeled.size()];
for(int l=0; l<unlabeled.size(); l++){
Arrays.fill(adjacencia[l], false);
}
//Calculing all the distances:
double dist[][] = new double[unlabeled.size()][labeled.size()];
for(int p=0; p<unlabeled.size(); p++){ // From unlabeled to labeled
for(int q=0; q<labeled.size(); q++){
dist[p][q]=Distance.absoluteDistance(unlabeled.get(p), labeled.get(q));
}
}
//Build a neighborhood graph for each unlabeled data.
for(int p=0; p<unlabeled.size(); p++){ // From unlabeled to labeled
for(int q=0; q<labeled.size(); q++){
boolean edge = true;
for(int n=0; n<labeled.size() && edge; n++){
if(n!=q){ // n!=p &&
if(dist[p][q]> Math.max(dist[p][n], dist[q][n])){
edge = false;
}
}
}
adjacencia[p][q] = edge;
}
} //End Graph-Construcction.
// Checking cutEdges.
int originalLABELEDsize = labeled.size();
for(int p=0; p<unlabeled.size(); p++){ // For each unlabeled data
boolean cutEdge = false;
int examples =0;
double clase=0;
for(int q=0; q<originalLABELEDsize && !cutEdge; q++){
// We have to check if all the neigborhood has the same class.
// System.out.println(p + ", " + q);
if(adjacencia[p][q]){ // if this instance belongs to its neighborhood
examples++;
if(examples == 1){
clase = labeled.get(q).getOutput(0);
}else if(labeled.get(q).getOutput(0) != clase){
cutEdge = true;
}
}
}
if(!cutEdge && examples >0){
// we have to classify this unlabeled data.
Prototype nearUnlabeled = new Prototype(unlabeled.get(p));
nearUnlabeled.setFirstOutput(clase);
labeled.add(nearUnlabeled);
//unlabeled.remove(nearUnlabeled);
}
}
//Always, we remove the most confident examples:
for(int p=originalLABELEDsize; p<labeled.size(); p++){
unlabeled.removeWithoutClass(labeled.get(p));
}
// System.out.println("Labeled size = " +labeled.size());
// System.out.println("Unlabeled size = " + unlabeled.size());
// System.gc();
//Step 4: standard Selftraining.
// determine Nmax for each class.
double nmax[] = new double[this.numberOfClass];
for(int i=0; i<this.numberOfClass; i++){
nmax[i] = proportion[i]*unlabeled.size();
System.out.println(nmax[i]);
}
//For each class, we select the nearest unlabeled example.
PrototypeSet labeledPrima = new PrototypeSet();
double confidence[][] = new double[unlabeled.size()][this.numberOfClass];
boolean condicionFIN = false;
double contadorClase[] = new double[this.numberOfClass];
Arrays.fill(contadorClase, 0);
while(!condicionFIN){
for (int q=0; q<unlabeled.size(); q++){ // for each unlabeled.
Prototype NearClass[] = new Prototype[this.numberOfClass];
double sumatoria = 0;
for (int j=0 ; j< this.numberOfClass; j++){
if(labeled.getFromClass(j).size() >0){
NearClass[j] = new Prototype (labeled.getFromClass(j).nearestTo(unlabeled.get(q)));
confidence[q][j] = Math.exp(-1*(Distance.absoluteDistance(NearClass[j], unlabeled.get(q))));
sumatoria+= confidence[q][j];
}else{
confidence[q][j] = 0;
}
}
for (int j=0 ; j< this.numberOfClass; j++){
confidence[q][j]/=sumatoria;
}
}
// selecting best kj[j] prototypes.
// determine who are the best prototypes
PrototypeSet best[] = new PrototypeSet[this.numberOfClass];
double maximoClase[] = new double[this.numberOfClass];
int indexClase[] = new int[this.numberOfClass];
Arrays.fill(maximoClase, Double.MIN_VALUE);
for (int q=0; q<unlabeled.size(); q++){ // for each unlabeled.
for (int j=0 ; j< this.numberOfClass; j++){
if(confidence[q][j]> maximoClase[j]){
maximoClase[j] = confidence[q][j];
indexClase[j] = q;
}
}
}
for (int j=0 ; j< this.numberOfClass; j++){
if(contadorClase[j]< nmax[j]){
Prototype nearUnlabeled = new Prototype(unlabeled.get(indexClase[j]));
Prototype clase = labeled.nearestTo(nearUnlabeled);
nearUnlabeled.setFirstOutput(clase.getOutput(0));
labeledPrima.add(new Prototype(nearUnlabeled));
contadorClase[(int)clase.getOutput(0)]++;
}
}
//Then we have to clean the unlabeled have.
for (int j=0 ; j< labeledPrima.size(); j++){
unlabeled.removeWithoutClass(labeledPrima.get(j));
}
condicionFIN = true;
//System.out.println(contadorClase[0]);
for(int j=0; j< this.numberOfClass && condicionFIN; j++){
if(contadorClase[j] >= nmax[j]){ // N+max
condicionFIN = true;
}else{
condicionFIN = false;
}
}
if (unlabeled.size()< maximoKj){
condicionFIN = true;
}
} // END CONDITION
labeled.add(labeledPrima);
// System.out.println("Labeled size = "+labeled.size());
// System.out.println("UNLabeled size = "+unlabeled.size());
// System.out.println("Labeled size = "+labeled.size());
//System.out.println("UNLabeled size = "+unlabeled.size());
// Step 6: Construct a relative neighborhood graph using labeled. // RELABEL STAGE!
System.gc();
//Construction of a neighborhood graph
boolean adjacencia2 [][] = new boolean[labeled.size()][labeled.size()];
for(int l=0; l<labeled.size(); l++){
Arrays.fill(adjacencia2[l], false);
}
//Calculing all the distances:
double dist2[][] = new double[labeled.size()][labeled.size()];
for(int p=0; p<labeled.size(); p++){ // From labeled to labeled
for(int q=0; q<labeled.size(); q++){
if(p!=q){
dist2[p][q]=Distance.absoluteDistance(labeled.get(p), labeled.get(q));
}
}
}
//Build a neighborhood graph for each unlabeled data.
for(int p=0; p<labeled.size(); p++){ // From unlabeled to labeled
for(int q=0; q<labeled.size(); q++){
if(p!=q){
boolean edge = true;
for(int n=0; n<labeled.size() && edge; n++){
if(n!=p && n!=q){
if(dist2[p][q]> Math.max(dist2[p][n], dist2[q][n])){
edge = false;
}
}
}
adjacencia2[p][q] = edge;
}
}
} //End Graph-Construcction.
double sumCutEdge[] = new double[labeled.size()]; // Ji
double sumNoCutEdge[] = new double[labeled.size()]; // Ii
double ratio[] = new double[labeled.size()]; // Ii
double muRatio=0, sigmaRatio=0;
for(int p=0; p< labeled.size(); p++){
sumCutEdge[p] = 0;
sumNoCutEdge[p] = 0;
for(int q=0; q<labeled.size(); q++){
if(adjacencia2[p][q]){ // if this instance belongs to its neighborhood
if(labeled.get(p).getOutput(0)!=labeled.get(q).getOutput(0)){
sumCutEdge[p] += 1./(1+dist2[p][q]);
//System.out.println("Alguna vez soy igual");
}else{
sumNoCutEdge[p] += 1./(1+dist2[p][q]);
}
}
}
ratio[p] = sumCutEdge[p]/sumNoCutEdge[p];
// System.out.println("Ratio p " + ratio[p]);
if(!Double.isInfinite(ratio[p])){
//System.out.println("SumCutEdge = " + sumCutEdge[p]);
//System.out.println("SumNoCutEdge = " + sumNoCutEdge[p]);
muRatio+= ratio[p];
}
}
muRatio/=labeled.size();
for(int p=0; p<labeled.size(); p++){
if(!Double.isInfinite(ratio[p])){
sigmaRatio += (ratio[p]-muRatio)*(ratio[p]-muRatio);
}
}
sigmaRatio/=labeled.size();
//System.out.println("Mean = " + muRatio + ", Sigma = "+ sigmaRatio);
NormalDistribution normal = new NormalDistribution();
normal.setMean(muRatio);
normal.setSigma(Math.sqrt(sigmaRatio));
double Ucritic = 1-(this.threshold/2.);
//System.out.println("Ucritic " + Ucritic);
double InvNormal = normal.inverseNormalDistribution(Ucritic);
//System.out.println("Inversa normal " + InvNormal);
double RatioCritical = muRatio + InvNormal*Math.sqrt(sigmaRatio);
// System.out.println("RatioCritical " + RatioCritical);
//Step 7: relabel
for(int p=0; p< labeled.size(); p++){
if(ratio[p]>RatioCritical && labeled.getFromClass(labeled.get(p).getOutput(0)).size()>1){
if(labeled.getAllDifferentFromClass(labeled.get(p).getOutput(0)).size()>1 ){
Prototype NearWithDifferent = new Prototype(labeled.nearestToWithDifferentClass(labeled.get(p), labeled.get(p).getOutput(0)));
labeled.get(p).setFirstOutput(NearWithDifferent.getOutput(0));
}
// System.out.println("I have changed the class label, ratio = "+ ratio[p]);
}
}
// labeled.print();
// Step 8: Apply the NNrule for the rest of prototypes of Unlabeled.
for(int p=0; p<unlabeled.size(); p++){
Prototype nearest = labeled.nearestTo(unlabeled.get(p));
unlabeled.get(p).setFirstOutput(nearest.getOutput(0));
}
labeled.add(unlabeled.clone());
// System.out.println("Labeled size = " +labeled.size());
// System.out.println("Unlabeled size = " + unlabeled.size());
// Results
PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone());
PrototypeSet test = new PrototypeSet(this.testDataSet.clone());
//We have to return the classification done.
for(int i=0; i<this.transductiveDataSet.size(); i++){
tranductive.get(i).setFirstOutput((labeled.nearestTo(this.transductiveDataSet.get(i))).getOutput(0));
}
for(int i=0; i<this.testDataSet.size(); i++){
test.get(i).setFirstOutput((labeled.nearestTo(this.testDataSet.get(i))).getOutput(0));
}
// Transductive Accuracy
System.out.println("AccTrs ="+KNN.classficationAccuracy(labeled,this.transductiveDataSet,1)*100./this.transductiveDataSet.size());
// test accuracy
System.out.println("AccTst ="+KNN.classficationAccuracy(labeled,this.testDataSet,1)*100./this.testDataSet.size());
return new Pair<PrototypeSet,PrototypeSet>(tranductive,test);
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{ }
}