/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
CLCC.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 4/3/2011
Copyright (c) 2008 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Semi_Supervised_Learning.CLCC;
import keel.Algorithms.Semi_Supervised_Learning.Basic.C45.*;
import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerNB;
import keel.Algorithms.Semi_Supervised_Learning.Basic.HandlerSMO;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeSet;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerator;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Prototype;
import keel.Algorithms.Semi_Supervised_Learning.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Semi_Supervised_Learning.Basic.Utilidades;
import keel.Algorithms.Semi_Supervised_Learning.*;
import java.util.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.*;
import keel.Algorithms.Semi_Supervised_Learning.utilities.KNN.*;
import keel.Dataset.Attribute;
import keel.Dataset.Attributes;
import keel.Dataset.InstanceAttributes;
import keel.Dataset.InstanceSet;
import org.core.*;
import org.core.*;
import java.util.StringTokenizer;
/**
* This class implements the CLCC. You can use: Knn, C4.5, SMO and NB as classifiers.
* @author triguero
*
*/
public class CLCCGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
private int numberOfselectedExamples;
private int MaxIter;
private int num_classifier;
private double threshold = 0.75;
private double beta = 0.4;
private int initialCluster = 2;
private int frequency = 3;
private int set_num = 6;
private boolean optionalPart = true;
private int m_numOriginalLabeledInsts = 0;
/** Number of features to consider in random feature selection.
If less than 1 will use int(logM+1) ) */
protected int m_numFeatures = 0;
/** Final number of features that were considered in last build. */
protected int m_KValue = 0;
private int [][] predictions;
private double [][] confidence;
// private String final_classifier;
protected int numberOfPrototypes; // Particle size is the percentage
protected int numberOfClass;
/** Parameters of the initial reduction process. */
private String[] paramsOfInitialReducction = null;
RandomTree [] m_classifiers;
/**
* Build a new CLCCGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param perc Reduction percentage of the prototype set.
*/
public CLCCGenerator(PrototypeSet _trainingDataSet, int neigbors,int poblacion, int perc, int iteraciones, double c1, double c2, double vmax, double wstart, double wend)
{
super(_trainingDataSet);
algorithmName="CLCC";
}
/**
* Build a new CLCCGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param unlabeled Original unlabeled prototype set for SSL.
* @param params Parameters of the algorithm (only % of reduced set).
*/
public CLCCGenerator(PrototypeSet t, PrototypeSet unlabeled, PrototypeSet test, Parameters parameters)
{
super(t,unlabeled, test, parameters);
algorithmName="CLCC";
this.predictions = new int[6][];
this.num_classifier = parameters.getNextAsInt();
this.threshold = parameters.getNextAsDouble();
this.beta = parameters.getNextAsDouble();
this.initialCluster = parameters.getNextAsInt();
this.frequency = parameters.getNextAsInt();
this.set_num = parameters.getNextAsInt();
if(parameters.getNextAsString().equalsIgnoreCase("true")){
this.optionalPart = true;
}else{
this.optionalPart = false;
}
//this.final_classifier = parameters.getNextAsString();
//Last class is the Unknown
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
if(this.initialCluster < this.numberOfClass || this.initialCluster > 2* this.numberOfClass) // if the value is not correctly specified
{
this.initialCluster = RandomGenerator.Randint(this.numberOfClass, 2*this.numberOfClass);
}
System.out.println("Initial Cluster = "+this.initialCluster);
System.out.print("\nIsaacSSL dice: " + this.numberOfselectedExamples+ ", "+ this.numberOfClass +"\n");
}
/**
*
* @param inst
* @param idxInst
* @param inbags
* @param idExcluded
* @return
* @throws Exception
*/
private double[] outOfBagDistributionForInstanceExcluded(Prototype inst, int idxInst, boolean[][] inbags, int idExcluded) throws Exception
{
double[] distr = new double[this.numberOfClass];
for(int i = 0; i < this.num_classifier; i++)
{
if(inbags[i][idxInst] == true || i == idExcluded)
continue;
double[] d = m_classifiers[i].distributionForInstance(inst);
if(d!=null){
for(int iClass = 0; iClass < this.numberOfClass; iClass++){
distr[iClass] += d[iClass];
}
}
}
double sumatoria = 0;
for(int i=0; i< distr.length;i++){
sumatoria+= distr[i];
}
if(sumatoria != 0){
//Utils.normalize(distr);
for (int i=0; i<distr.length; i++){
distr[i] /= sumatoria;
}
}
return distr;
}
/**
*
* @param data
* @param weights of the instances
* @param inbags
* @param id
* @return
* @throws Exception
*/
private double measureError(PrototypeSet data, boolean[][] inbags, int id) throws Exception
{
double err = 0;
double count = 0;
for(int i = 0; i < data.size() && i < m_numOriginalLabeledInsts; i++)
{
Prototype inst = data.get(i);
double[] distr = outOfBagDistributionForInstanceExcluded(inst, i, inbags, id);
double maximo= Double.MIN_VALUE;
int claseMax =0;
for(int j=0; j< distr.length; j++){
if(distr[j]> maximo){
maximo = distr[j];
claseMax = j;
}
}
if(maximo > this.threshold)
{
count += inst.getWeight();
if(claseMax != inst.getOutput(0))
err += inst.getWeight();
}
}
err /= count;
return err;
}
/**
* Resample instances w.r.t the weight
*
* @param data Instances -- the original data set
* @param id of the classifier
* @param sampled boolean[] -- the output parameter, indicating whether the instance is sampled
* @return Instances
*/
public final PrototypeSet resampleWithWeights(PrototypeSet data, int id, boolean[] sampled)
{
double[] weights = new double[data.size()];
for (int i = 0; i < weights.length; i++) {
weights[i] = data.get(i).getWeight();
}
PrototypeSet newData = new PrototypeSet(data.clone());
if (data.size() == 0) {
return newData;
}
double[] probabilities = new double[data.size()];
double sumProbs = 0, sumOfWeights=0;
for(int i=0; i<weights.length;i++){
sumOfWeights+=weights[i];
}
for (int i = 0; i < data.size(); i++) {
sumProbs += Randomize.Rand();
probabilities[i] = sumProbs;
}
for (int i = 0; i < probabilities.length; i++) {
probabilities[i] /= (sumProbs / sumOfWeights);
}
// Make sure that rounding errors don't mess things up
probabilities[data.size() - 1] = sumOfWeights;
int k = 0; int l = 0;
sumProbs = 0;
while ((k < data.size() && (l < data.size()))) {
if (weights[l] < 0) {
throw new IllegalArgumentException("Weights have to be positive.");
}
sumProbs += weights[l];
while ((k < data.size()) &&
(probabilities[k] <= sumProbs)) {
newData.add(data.get(l));
sampled[l] = true;
newData.get(k).setWeight(1);
k++;
}
l++;
}
return newData;
}
private double[] distributionForInstanceExcluded(Prototype inst, int idExcluded) throws Exception
{
double[] distr = new double[this.numberOfClass];
for(int i = 0; i < this.num_classifier; i++)
{
if(i == idExcluded)
continue;
double[] d = m_classifiers[i].distributionForInstance(inst);
for(int iClass = 0; iClass < this.numberOfClass; iClass++)
distr[iClass] += d[iClass];
}
// Normalize:
double sum = 0;
for (int i = 0; i < distr.length; i++) {
sum += distr[i];
}
for (int i = 0; i < distr.length; i++) {
distr[i] /= sum;
}
return distr;
}
/**
* To judege whether the confidence for a given instance of H* is high enough,
* which is affected by the onfidence threshold. Meanwhile, if the example is
* the confident one, assign label to it and weigh the example with the confidence
*
* @param inst Instance -- The instance
* @param idExcluded int -- the index of the individual should be excluded from H*
* @return boolean -- true for high
* @throws Exception - some exception
*/
protected boolean isHighConfidence(Prototype inst, int idExcluded) throws Exception
{
double[] distr = distributionForInstanceExcluded(inst, idExcluded);
double maximo= Double.MIN_VALUE;
int claseMax =0;
for(int j=0; j< distr.length; j++){
if(distr[j]> maximo){
maximo = distr[j];
claseMax = j;
}
}
double confidence = maximo;// getConfidence(distr);
if(confidence > this.threshold)
{
double classval = claseMax;//Utils.maxIndex(distr);
inst.setFirstOutput(classval); // .setClassValue(classval); //assign label
inst.setWeight(confidence); //set instance weight
return true;
}
else return false;
}
public int votingRule(Prototype inst) throws Exception{
double[] res = new double[this.numberOfClass];
for(int j = 0; j < this.num_classifier; j++)
{
double[] distr = m_classifiers[j].distributionForInstance(inst); // Probability of each class.
if(distr!=null){
for(int z = 0; z < res.length; z++)
res[z] += distr[z];
}
}
// Normalice RES
double sum=0;
for(int j=0; j<res.length; j++){
sum+=res[j];
}
for(int j=0; j<res.length; j++){
res[j]/=sum;
}
this.confidence[inst.getIndex()] = res.clone(); // MODIFIED FOR CLCC ALGORITHM!!
/// determine the maximum value
double maximum = 0;
int maxIndex = 0;
for (int j = 0; j < res.length; j++) {
if ((j == 0) || (res[j] > maximum)) {
maxIndex = j;
maximum = res[j];
}
}
return maxIndex;
}
/**
* It applies a coforest-sim algorithm and fill the this.confidence matrix.
* @param labeled
* @param unlabeled
* @return
* @throws Exception
*/
public PrototypeSet co_forest_sim(PrototypeSet labeled, PrototypeSet unlabeled) throws Exception{
double[] err = new double[this.num_classifier]; // e_i
double[] err_prime = new double[this.num_classifier]; // e'_i
double[] s_prime = new double[this.num_classifier]; // l'_i
boolean[][] inbags = new boolean[this.num_classifier][];
m_numOriginalLabeledInsts = labeled.size(); //from the original labeled data sets
RandomTree rTree = new RandomTree();
// set up the random tree options
m_KValue = m_numFeatures;
if (m_KValue < 1) m_KValue = (int) (Math.log(labeled.get(0).numberOfInputs())/Math.log(2)) +1;
m_classifiers = new RandomTree[this.num_classifier];
for(int i=0; i< this.num_classifier; i++){
m_classifiers[i] = new RandomTree();
m_classifiers[i].setKValue(m_KValue);
}
PrototypeSet [] labeleds = new PrototypeSet[this.num_classifier];
int[] randSeeds = new int[this.num_classifier];
for(int i = 0; i < this.num_classifier; i++)
{
((RandomTree)m_classifiers[i]).setSeed(randSeeds[i]);
inbags[i] = new boolean[labeled.size()];
labeleds[i] = resampleWithWeights(labeled, i, inbags[i]);
// labeleds[i].print();
m_classifiers[i].buildClassifier(labeleds[i]);
// System.out.println("*******************FIN BUILD!");
err_prime[i] = 0.5;
s_prime[i] = 0; //l'_i <-- 0
}
//labeled.print();
//labeledBoostrapped[0].print();
PrototypeSet[] Li = null;
boolean bChanged = true;
/** repeat until none of h_i ( i \in {1...3} ) changes */
while(bChanged)
{
bChanged = false;
boolean[] bUpdate = new boolean[this.num_classifier];
Li = new PrototypeSet[this.num_classifier];
for(int i = 0; i < this.num_classifier; i++)
{
err[i] = measureError(labeled, inbags, i);
Li[i] = new PrototypeSet();
/** if (e_i < e'_i) */
if(err[i] < err_prime[i])
{
if(s_prime[i] == 0)
s_prime[i] = Math.min(unlabeled.sumOfWeights() / 10, 100);
/** Subsample U for each hi */
double weight = 0;
unlabeled.randomize(this.SEED);
int numWeightsAfterSubsample = (int) Math.ceil(err_prime[i] * s_prime[i] / err[i] - 1);
for(int k = 0; k < unlabeled.size(); k++)
{
weight += unlabeled.get(k).getWeight();
if (weight > numWeightsAfterSubsample)
break;
Li[i].add((Prototype)unlabeled.get(k));
}
/** for every x in U' do */
for(int j = Li[i].size() - 1; j > 0; j--)
{
Prototype curInst = Li[i].get(j);
if(!isHighConfidence(curInst, i)) //in which the label is assigned
Li[i].remove(j);
}//end of j
if(s_prime[i] < Li[i].size())
{
if(err[i] * Li[i].sumOfWeights() < err_prime[i] * s_prime[i])
bUpdate[i] = true;
}
}
}//end of for i
//update
RandomTree [] newClassifier = new RandomTree[this.num_classifier];
for(int i = 0; i < this.num_classifier; i++)
{
newClassifier[i] = new RandomTree();
if(bUpdate[i])
{
double size = Li[i].sumOfWeights();
bChanged = true;
m_classifiers[i] = newClassifier[i];
((RandomTree)m_classifiers[i]).setSeed(randSeeds[i]);
for(int j = 0; j < labeled.size(); j++) // Combine labeled and Li.
Li[i].add(new Prototype(labeled.get(j)));
m_classifiers[i].buildClassifier(Li[i]);
err_prime[i] = err[i];
s_prime[i] = size;
}
}
} //end of while
PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone());
this.confidence = new double[tranductive.size()][this.numberOfClass]; // for the next step.
int traPrediction[] = new int[tranductive.size()];
int aciertoTrs = 0;
for(int i=0; i<tranductive.size(); i++){
tranductive.get(i).setIndex(i); // establish de index
// Voting RULE
traPrediction[i]=this.votingRule(tranductive.get(i));
// maxIndex is the class label.
if(tranductive.get(i).getOutput(0) == traPrediction[i]){
aciertoTrs++;
}
tranductive.get(i).setFirstOutput(traPrediction[i]);
}
System.out.println("% de acierto TRS Coforest = "+ (aciertoTrs*100.)/transductiveDataSet.size());
return tranductive;
}
public double penalty(int numCluster, int n){
if (numCluster >= this.numberOfClass){
return Math.sqrt((numCluster-this.numberOfClass)/(n*1.));
}else{
return 0.;
}
}
public double objetiveFunction(PrototypeSet[] cluster, PrototypeSet Lstar, PrototypeSet centers){ //int currentCluster, int Nj
double result =0;
int n = Lstar.size();
for(int j=0; j< cluster.length; j++){ // for each cluster
int clase = (int) centers.get(j).getOutput(0); // the same class than the center of the cluster
for(int i=0; i< cluster[j].size(); i++){
int indice = cluster[j].get(i).getIndex(); // index of the prototype inserted in the cluster (Corresponing with the confidence
result+= (1.-this.confidence[indice][clase])/(n*1.) ;
}
}
result += this.beta*penalty(cluster.length, n);
// System.out.println("result = "+result);
return result;
}
/**
* Create a cluster from the centers and the complete set of prototypes
* @param centers
* @param set
* @return
*/
public PrototypeSet[] createCluster(PrototypeSet clusterCenters, PrototypeSet Lstar){
PrototypeSet clusters [] = null;
clusters = new PrototypeSet[clusterCenters.size()];
for(int i=0; i< clusterCenters.size(); i++){
clusters[i] = new PrototypeSet();
}
for(int j=0; j< Lstar.size(); j++){ // for each prototype
Prototype near = clusterCenters.nearestTo(Lstar.get(j)); // near have the index of its center.
//System.out.println(near.getIndex());
clusters[near.getIndex()].add(new Prototype(Lstar.get(j))); // adding the prototype to its corresponding cluster.
}
return clusters;
}
/**
*
* @param Lstar
*/
public PrototypeSet[] localClusterCenter(PrototypeSet Lstar){
//step 1: initialize
PrototypeSet CX [] = new PrototypeSet [this.frequency]; // in each iteration the best centers are stored in CX.
PrototypeSet clusterCenters = new PrototypeSet(); // CMS
PrototypeSet clusters [] = null; // CR
for (int i=0; i< this.frequency; i++){ // step 2
clusterCenters = new PrototypeSet(); // CMS
// 3) Select k cluster randomly from Lstar: Checking if all the classes have been included: (added by ISaac)
/*
for (int j=0; j<this.numberOfClass;j++){
PrototypeSet delaClase = Lstar.getFromClass(j);
if(delaClase.size()>0){
clusterCenters.add(new Prototype(delaClase.getRandom()));
clusterCenters.get(j).setIndex(j); // establish its index.
}
}
*/
ArrayList <Integer> lista = RandomGenerator.generateDifferentRandomIntegers(0, Lstar.size()-1);
for(int j=0; j< this.initialCluster; j++){ //this.numberOfClass
clusterCenters.add(new Prototype(Lstar.get(lista.get(j))));
clusterCenters.get(j).setIndex(j); // establish its index.
}
// 4) Cluster are created:
clusters = createCluster(clusterCenters, Lstar); // in each iteration
// Greedy search.
boolean change = true;
PrototypeSet clusterCentersAdd = new PrototypeSet(clusterCenters.clone()); //CMSadd
PrototypeSet clustersAdd [] = null; // CRadd
PrototypeSet clusterCentersRe = new PrototypeSet(clusterCenters.clone()); // CMSre
PrototypeSet clustersRe [] = null; // CRre
while (change){ // step 5
// System.out.println("Greedy time");
// System.out.println("Size = "+clusterCenters.size());
clusterCentersAdd = new PrototypeSet(clusterCenters.clone());
clusterCentersRe = new PrototypeSet(clusterCenters.clone());
// 6a) Adding a single non-center object in Lstar ot the set of centers.
boolean inserted= false;
while(!inserted){
inserted = true;
Prototype random = Lstar.getRandom();
for (int j=0; j< clusterCenters.size(); j++){ // checking that it is not include in clusterCenters.
if(random.equals(clusterCenters.get(j))){
inserted = false;
}
}
if(inserted){
clusterCentersAdd.add(new Prototype(random));
clusterCentersAdd.get(clusterCentersAdd.size()-1).setIndex(clusterCentersAdd.size()-1);
}
}
clustersAdd = createCluster(clusterCentersAdd, Lstar); // create cluster.
// 6b) Remove a center object from the set of centers.
int borrar = RandomGenerator.Randint(0, clusterCenters.size()-1);
if(clusterCenters.size()>this.numberOfClass){
clusterCentersRe.remove(borrar); // I only remove if i have enough clusters
// re-establish indexes of clusterCenterRe
for(int j=0; j<clusterCentersRe.size(); j++){
clusterCentersRe.get(j).setIndex(j);
}
}
clustersRe = createCluster(clusterCentersRe, Lstar);// create cluster.
// 7) Select the best clusters from clustersAdd and clustersRe
double Eadd = objetiveFunction(clustersAdd, Lstar, clusterCentersAdd);
double Ere = objetiveFunction(clustersRe, Lstar, clusterCentersRe);
double Enew;
PrototypeSet clusterCentersNew = null; // CMSnew
PrototypeSet clustersNew [] = null; // CRnew
if(Eadd < Ere){
clusterCentersNew = new PrototypeSet(clusterCentersAdd.clone());
clustersNew = clustersAdd.clone();
Enew = Eadd;
// System.out.println("Add");
}else{
clusterCentersNew = new PrototypeSet(clusterCentersRe.clone());
clustersNew = clustersRe.clone();
Enew = Ere;
// System.out.println("Re");
}
// 8) Checking if the new clustering is better or not in terms of objetive function.
// System.out.println("Clusters size = "+ clusters.length);
// System.out.println("ClustersCenter size = "+ clusterCenters.size());
double Eold = objetiveFunction(clusters, Lstar, clusterCenters);
if(Enew < Eold){
clusterCenters = new PrototypeSet(clusterCentersNew.clone()); // CMS = CMSNew
clusters = clustersNew.clone();
}else if((Enew == Eold) && clustersNew.length< clusters.length){
clusterCenters = new PrototypeSet(clusterCentersNew.clone()); // CMS = CMSNew
clusters = clustersNew.clone();
}else{
change = false;
CX[i] = new PrototypeSet(clusterCenters.clone()); // CX = CX U CMS
}
}
}
return CX;
}
/**
*
* @param CX
* @return
*/
public PrototypeSet ProcessCluster(PrototypeSet [] CX, PrototypeSet Lstar){
PrototypeSet LabeledStarStar = new PrototypeSet(Lstar.clone()); // the generated prototypeSet.
PrototypeSet clusters[][] = new PrototypeSet[CX.length][]; // CR
double E[] = new double [CX.length];
// 1) Identify the best set of cluster's centers.
double bestObjective = Double.MAX_VALUE;
int bestObject = -1;
for (int i=0; i< CX.length; i++){
clusters[i] = createCluster(CX[i], Lstar); // Create the cluster.
E[i] = objetiveFunction(clusters[i], Lstar, CX[i]);
if(E[i]< bestObjective){
bestObjective = E[i];
bestObject = i;
}
}
// 2) for each prototype of the center set.
int classCluster[]= new int [CX[bestObject].size()];
for (int i=0; i< CX[bestObject].size(); i++){
// Divide the cluster into three different segments: but I only use 2 segments
// each segment will be formed by num_objects/ 3
int examplesPerSegment = clusters[bestObject][i].size()/3;
// System.out.println("Examples per segment: "+ examplesPerSegment);
classCluster[i] =(int) CX[bestObject].get(i).getOutput(0); // obtaining the class of the center.
// System.out.println("Clase cluster: "+ classCluster[i]);
// obtaining the ''examplesPerSegment'' with the highest confidence and closets to the cluster Center.
// obtaining the ''examplesPerSegment'' with the lowest confidence and closets to the cluster Center.
double confianza[] = new double[clusters[bestObject][i].size()];
double distancia [] = new double[clusters[bestObject][i].size()];
double combinacion[] = new double[clusters[bestObject][i].size()];
for(int j=0; j<clusters[bestObject][i].size(); j++){ // for each prototype of the cluster i
int indice = clusters[bestObject][i].get(j).getIndex();
confianza[j] = this.confidence[indice][classCluster[i]];
distancia[j] = Distance.d(clusters[bestObject][i].get(j), CX[bestObject].get(i));
combinacion[j] = ((1-confianza[j])+distancia[j])/2; // we have to minimize this value.
}
int position[] = Utils.stableSort(combinacion);
// we have to choose the examplesPerSegment for the first segment
// and the last examplesPerSegmente for the last segement.
// step 4: change the class-label of objects in first segment to Clasi and remove the objects in the last segment.
for (int z=0; z<examplesPerSegment; z++){ // relabel first segment.
clusters[bestObject][i].get(position[z]).setFirstOutput(classCluster[i]);
this.confidence[clusters[bestObject][i].get(position[z]).getIndex()][classCluster[i]] = 1;
}
int aBorrar[] = new int[examplesPerSegment];
int cont=0;
for (int z=(clusters[bestObject][i].size()-examplesPerSegment); z<clusters[bestObject][i].size(); z++){ // remove last segment.
aBorrar[cont] = position[z];
cont++;
}
Arrays.sort(aBorrar);
for(int z=examplesPerSegment-1; z>=0; z--){
clusters[bestObject][i].remove(aBorrar[z]);
}
}
LabeledStarStar = new PrototypeSet();
for (int i=0; i< CX[bestObject].size(); i++){
LabeledStarStar.add(new PrototypeSet(clusters[bestObject][i].clone()));
}
return LabeledStarStar;
}
/**
* Apply the CLCCGenerator method.
* @return
*/
public Pair<PrototypeSet, PrototypeSet> applyAlgorithm() throws Exception
{
System.out.print("\nThe algorithm CLCC is starting...\n Computing...\n");
PrototypeSet labeled, unlabeled;
//obtaining labeled and unlabeled data and established indexes.
labeled = new PrototypeSet(trainingDataSet.getAllDifferentFromClass(this.numberOfClass)); // Selecting labeled prototypes from the training set.
unlabeled = new PrototypeSet(trainingDataSet.getFromClass(this.numberOfClass));
for (int j=0; j< labeled.size();j++){
labeled.get(j).setIndex(j);
}
for (int j=0; j< unlabeled.size();j++){
unlabeled.get(j).setIndex(j);
}
// In order to avoid problems with C45 and NB.
for(int p=0; p<unlabeled.size(); p++){
unlabeled.get(p).setFirstOutput(0); // todos con un valor válido.
}
//****************************************
// Step 1: applying co-forest-sim
PrototypeSet Lstar = new PrototypeSet(co_forest_sim(labeled, unlabeled).clone()); // the confidence vector has been stored in this.confidence
for (int j=0; j< Lstar.size();j++){
Lstar.get(j).setIndex(j);
}
// Step 2: Center-based supervised clustering is trained in Lstar
System.out.println("****");
PrototypeSet[] CX = localClusterCenter(Lstar);
if(this.optionalPart){ // steps 3 and 4 are optional.
// Step 3: Process cluster
PrototypeSet LstarStar = ProcessCluster(CX, Lstar);
// Step 4: Center-based supervised clustering is trained agin in Lstar
CX = localClusterCenter(LstarStar);
}
// step 5: Select the first ''this.set_num center'' sets as the CMS_Set accoring to the value of the objective function.
PrototypeSet clusters[][] = new PrototypeSet[CX.length][];
double bestClusters [] = new double [this.set_num];
int indexBestClusters[] = new int[this.set_num];
Arrays.fill(bestClusters, Double.MAX_VALUE);
Arrays.fill(indexBestClusters, -1);
double E[] = new double [CX.length];
System.out.println("Cx length "+ CX.length);
for (int i=0; i< CX.length; i++){
clusters[i] = createCluster(CX[i], Lstar); // Create the cluster.
E[i] = objetiveFunction(clusters[i], Lstar, CX[i]);
// System.out.println(E[i]);
}
System.out.println("*************");
int position[] = Utils.stableSort(E); // sort in ascending order.
// Step 6: Train ''set_num" of 1-NN classifiers by using center set CMS in CMS_Set
// construct the final prototypesets
PrototypeSet finalSet [] = new PrototypeSet[this.set_num];
for(int i=0; i< this.set_num; i++){
finalSet[i] = new PrototypeSet();
//System.out.println(position[i]);
for(int j=0; j<CX[position[i]].size(); j++ ){
finalSet[i].add(CX[position[i]].get(j));
}
System.out.println(finalSet[i].size());
}
// testing phase.
PrototypeSet tranductive = new PrototypeSet(this.transductiveDataSet.clone());
PrototypeSet test = new PrototypeSet(this.testDataSet.clone());
int traPrediction[] = new int[tranductive.size()];
int tstPrediction[] = new int[test.size()];
int aciertoTrs = 0;
int aciertoTst = 0;
//transductive phase
for(int i=0; i<tranductive.size(); i++){
// Voting RULE
double prob[] = new double[this.numberOfClass];
Arrays.fill(prob,0);
for(int j=0; j<this.set_num; j++ ){ // for each classifier
Prototype cercano = finalSet[j].nearestTo(tranductive.get(i));
prob[(int)cercano.getOutput(0)]++; // increment this counter
}
// determine the class.
double maximo = Double.MIN_VALUE;
for(int j=0; j< this.numberOfClass; j++){
if(prob[j] > maximo){
maximo = prob[j];
traPrediction[i] = j;
}
}
if(tranductive.get(i).getOutput(0) == traPrediction[i]){
aciertoTrs++;
}
tranductive.get(i).setFirstOutput(traPrediction[i]);
}
// test phase
for(int i=0; i<test.size(); i++){
// Voting RULE
double prob[] = new double[this.numberOfClass];
Arrays.fill(prob,0);
for(int j=0; j<this.set_num; j++ ){ // for each classifier
Prototype cercano = finalSet[j].nearestTo(test.get(i));
prob[(int)cercano.getOutput(0)]++; // increment this counter
}
// determine the class.
double maximo = Double.MIN_VALUE;
for(int j=0; j< this.numberOfClass; j++){
if(prob[j] > maximo){
maximo = prob[j];
tstPrediction[i] = j;
}
}
if(test.get(i).getOutput(0) == tstPrediction[i]){
aciertoTst++;
}
test.get(i).setFirstOutput(tstPrediction[i]);
}
System.out.println("% de acierto TRS = "+ (aciertoTrs*100.)/transductiveDataSet.size());
System.out.println("% de acierto TST = "+ (aciertoTst*100.)/testDataSet.size());
return new Pair<PrototypeSet,PrototypeSet>(tranductive,test);
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{ }
}