/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
POC.java
Isaac Triguero Velazquez.
Created by Isaac Triguero Velazquez 11-3-09
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
*/
package keel.Algorithms.Instance_Generation.POC;
import keel.Algorithms.Genetic_Rule_Learning.Globals.Rand;
import keel.Algorithms.Instance_Generation.Basic.PrototypeSet;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerator;
import keel.Algorithms.Instance_Generation.Basic.Prototype;
import keel.Algorithms.Instance_Generation.Basic.PrototypeGenerationAlgorithm;
import keel.Algorithms.Instance_Generation.Chen.ChenGenerator;
import keel.Algorithms.Instance_Generation.HYB.HYBGenerator;
import keel.Algorithms.Instance_Generation.*;
import java.util.*;
import keel.Algorithms.Instance_Generation.utilities.*;
import keel.Algorithms.Instance_Generation.utilities.KNN.*;
import org.core.*;
import java.util.StringTokenizer;
/**
*
* @param numberOfInitialsCentroids
* @param k, to use with knn rule in the initialization.
* @param GradientStep
* @param Temperature
* @author Isaac Triguero
* @version 1.0
*/
public class POCGenerator extends PrototypeGenerator {
/*Own parameters of the algorithm*/
private double alfaRatio;
private String method;
//others variables.
protected int numberOfPrototypes;
protected int numberOfClass;
/**
* Build a new POCGenerator Algorithm
*
*/
public POCGenerator(PrototypeSet _trainingDataSet, double ratio, String Method)
{
super(_trainingDataSet);
algorithmName="POC";
this.alfaRatio = ratio;
this.method = Method;
}
/**
* Build a new RSPGenerator Algorithm
* @param t Original prototype set to be reduced.
* @param parameters Parameters of the algorithm (only % of reduced set).
*/
public POCGenerator(PrototypeSet t, Parameters parameters)
{
super(t, parameters);
algorithmName="POC";
this.method = parameters.getNextAsString();
this.alfaRatio = parameters.getNextAsDouble();
this.numberOfClass = trainingDataSet.getPosibleValuesOfOutput().size();
System.out.println("Isaac dice: ratio= " + this.alfaRatio+ " method = " + this.method );
System.out.println("Number of class= "+ this.numberOfClass);
}
/**
* S is a training set of n pattern composed of TWO subsets, S1 y S2, with n1,n2 sizes,
* @param S
* @return
* @post the idea is find training points that are close to the decision boundaries and on the correct side of those boundaries.
* We use this function to find poc-nn pattern and the reamining patterns can be discarded.
*/
protected Pair<Prototype,Prototype> finding_poc_nn (PrototypeSet S,double class1, double class2){
//obtein the protototype with class 1 and 2. (by pairwise)
PrototypeSet S1 = S.getFromClass(class1);
PrototypeSet S2 = S.getFromClass(class2);
//System.out.println("S1 size = " + S1.size());
//System.out.println("S2 size = " + S2.size());
Prototype Xm, Xp1, Xp2;
if(S1.size() >= S2.size()){
Xm = S1.avg();
Xp2 = S2.nearestTo(Xm);
Xp1 = S1.nearestTo(Xp2);
}else{
Xm = S2.avg();
Xp1 = S1.nearestTo(Xm);
Xp2 = S2.nearestTo(Xp1);
}
return new Pair<Prototype,Prototype>(Xp1,Xp2);
}
/**
* Prototype Selection by Poc-NN algorithm for TWO class classification problem.
* @param S
* @return
*/
protected PrototypeSet selecting_poc_nn( PrototypeSet S,double class1,double class2){
PrototypeSet pocNNset = new PrototypeSet(S.size());
Pair<Prototype, Prototype> pocnn;
Prototype center, auxC;
Prototype w;
double b;
Prototype Xp1, Xp2;
PrototypeSet R1, R2;
//First find a pocnn prototype in S.
// System.out.println("Selecting con class1= "+class1 + " , class2= "+class2);
pocnn = finding_poc_nn(S,class1,class2);
Xp1 = pocnn.first();
Xp2 = pocnn.second();
// System.out.println("Iter");
// Xp1.print();
//Xp2.print();
if(Xp1 != null && Xp2 != null ){
//Determinae the center point.
auxC = Xp1.add(Xp2);
center = auxC.mul(0.5);
// center.print();
//Create a separating Hyperplane h: {x|w*x - b = 0}
// calculate w. = (xp1-xp2) // ||xp1-xp2|| (the module)
w = Xp1.sub(Xp2);
double module = 1./w.module();
// System.out.println("Module = "+ module);
w = w.mul(module);
// b = w * c.
b = w.mulEscalar(center);
//System.out.println("b = " + b );
// Save Xp1, Xp2 and the hyperplane.
pocNNset.add(Xp1);
pocNNset.add(Xp2);
// pocNNset.add(w);
//pocNNset.add(center);
//Divide all pattern of S into two regions, R1 and R2
R1 = new PrototypeSet();
R2 = new PrototypeSet();
//Distance between Xp1 and Xp2
double dist = Distance.d(Xp1, Xp2);
//System.out.println(alfa);
//Prototype ecuation1 = w*xi - b >= 0
for(int i=0; i< S.size(); i++){
Prototype Xi = S.get(i);
double Aux;
Aux = w.mulEscalar(Xi);
Aux -=b;
//System.out.println("Aux ="+ Aux);
//System.out.println(alfa);
// double AuxAbs = Math.abs(Aux);
//if(AuxAbs > alfa){ //alfa
if(Aux>=0){
R1.add(Xi);
}else{
R2.add(Xi);
}
//} // the restard prototype will be considered outliers
}
//Divide...
// System.out.println("Prototypes R1 "+ R1.size());
//System.out.println("Prototypes R2 " + R2.size());
double clasR1 = Xp1.getOutput(0);
double clasR2 = Xp2.getOutput(0);
// 6. Find any misclassficiation in both regions. (errores de clasificaci�n)
int misR1=0, misR2=0;
/*
for(Prototype p: R1){
PrototypeSet aux = R1.without(p);
if(aux.size()>0){
Prototype near = aux.nearestTo(p);
if(near.getOutput(0) != p.getOutput(0)) misR1++;
}
}
// System.out.println("errores R1= " + misR1);
for(Prototype p: R2){
PrototypeSet aux = R2.without(p);
if(aux.size()>0){
Prototype near = aux.nearestTo(p);
if(near.getOutput(0) != p.getOutput(0)){ misR2++;
//p.print();
//near.print();
}
}
}
*/
//System.out.println("errores R2= " + misR2);
boolean marcas[] = new boolean[R1.size()];
Arrays.fill(marcas, false);
for(int p=0; p< R1.size(); p++){
if(R1.get(p).getOutput(0) != clasR1){ // If not has the same class, Misclasifccication
double Aux = w.mulEscalar(R1.get(p)); // Acceptance interval
Aux -=b;
Aux = Math.abs(Aux);
if(Aux > this.alfaRatio*dist )
misR1++;
else{
marcas[p] = true; // para luego borrarlo...
}
}
}
// Borro lo que est� marcado como outliers!
for(int p=R1.size()-1; p> 0; p--){
if(marcas[p]){
R1.remove(p);
}
}
// System.out.println("errores R1= " + misR1);
marcas = new boolean[R2.size()];
Arrays.fill(marcas, false);
for(int p=0; p<R2.size(); p++){
if(R2.get(p).getOutput(0) != clasR2){ // If not has the same class, Misclasifccication
double Aux = w.mulEscalar(R2.get(p)); // Acceptance interval
Aux -=b;
Aux = Math.abs(Aux);
if(Aux > this.alfaRatio*dist )
misR2++;
else{
marcas[p] = true; // para luego borrarlo...
}
}
}
for(int p=R2.size()-1; p> 0; p--){
if(marcas[p]){
R2.remove(p);
}
}
//System.out.println("errores R2= " + misR2);
if( misR1> 0 && R1.size()>0){
pocNNset.add(selecting_poc_nn(R1,class1,class2));
}
if(misR2 >0 && R2.size()>0){
pocNNset.add(selecting_poc_nn(R2,class1,class2));
}
}else{
return null;
}
return pocNNset;
}
protected PrototypeSet replacing_poc_nn(PrototypeSet S,double class1, double class2){
PrototypeSet morNNset = new PrototypeSet();
Prototype Xmor = new Prototype();
PrototypeSet pocNNset = new PrototypeSet(S.size());
Pair<Prototype, Prototype> pocnn;
Prototype center;
Prototype w;
double b;
Prototype Xp1, Xp2;
PrototypeSet R1, R2;
//First find a pocnn prototype in S.
pocnn = finding_poc_nn(S,class1,class2);
Xp1 = pocnn.first();
Xp2 = pocnn.second();
if(Xp1 != null && Xp2 != null ){
//Determinae the center point.
center = Xp1.add(Xp2);
center = center.mul(0.5);
//Create a separating Hyperplane h: {x|w*x - b = 0}
// calculate w. = (xp1-xp2) // ||xp1-xp2|| (the module)
w = Xp1.sub(Xp2);
double module = 1./w.module();
// System.out.println("Module = "+ module);
w = w.mul(module);
// b = w * c.
b = w.mulEscalar(center);
//System.out.println("b = " + b );
// Save Xp1, Xp2 and the hyperplane.
pocNNset.add(Xp1);
pocNNset.add(Xp2);
// pocNNset.add(w);
pocNNset.add(center);
//Divide all pattern of S into two regions, R1 and R2
R1 = new PrototypeSet();
R2 = new PrototypeSet();
//Distance between Xp1 and Xp2
double dist = Distance.d(Xp1, Xp2);
//Prototype ecuation1 = w*xi - b >= 0
for(int i=0; i< S.size(); i++){
Prototype Xi = S.get(i);
double Aux;
Aux = w.mulEscalar(Xi);
Aux -=b;
//System.out.println("Aux ="+ Aux);
//System.out.println(alfa);
double AuxAbs = Math.abs(Aux);
if(Aux>=0){
R1.add(Xi);
}else{
R2.add(Xi);
}
}
//Divide...
//System.out.println("Prototypes R1 "+ R1.size());
//System.out.println("Prototypes R2 " + R2.size());
double clasR1 = Xp1.getOutput(0);
double clasR2 = Xp2.getOutput(0);
// 6. Find any misclassficiation in both regions. (errores de clasificaci�n)
int misR1=0, misR2=0;
boolean marcas[] = new boolean[R1.size()];
Arrays.fill(marcas, false);
for(int p=0; p< R1.size(); p++){
if(R1.get(p).getOutput(0) != clasR1){ // If not has the same class, Misclasifccication
double Aux = w.mulEscalar(R1.get(p)); // Acceptance interval
Aux -=b;
Aux = Math.abs(Aux);
if(Aux > this.alfaRatio*dist )
misR1++;
else{
marcas[p] = true; // para luego borrarlo...
}
}
}
// Borro lo que est� marcado como outliers!
for(int p=R1.size()-1; p> 0; p--){
if(marcas[p]){
R1.remove(p);
}
}
// System.out.println("errores R1= " + misR1);
marcas = new boolean[R2.size()];
Arrays.fill(marcas, false);
for(int p=0; p<R2.size(); p++){
if(R2.get(p).getOutput(0) != clasR2){ // If not has the same class, Misclasifccication
double Aux = w.mulEscalar(R2.get(p)); // Acceptance interval
Aux -=b;
Aux = Math.abs(Aux);
if(Aux > this.alfaRatio*dist )
misR2++;
else{
marcas[p] = true; // para luego borrarlo...
}
}
}
for(int p=R2.size()-1; p> 0; p--){
if(marcas[p]){
R2.remove(p);
}
}
if( misR1> 0 && R1.size()>0){
morNNset.add(replacing_poc_nn(R1,class1,class2));
}else{
if(R1.size()!=0){
Xmor = R1.avg();
morNNset.add(Xmor);
}
}
if(misR2 >0 && R2.size()>0){
morNNset.add(selecting_poc_nn(R2,class1,class2));
}else{
if(R2.size()!=0){
Xmor = R2.avg();
morNNset.add(Xmor);
}
}
if(misR1==0 && misR2 == 0){
if(R1.size()!=0){
Xmor = R1.avg();
morNNset.add(Xmor);
}
if(R2.size()!=0){
Xmor = R2.avg();
morNNset.add(Xmor);
}
return morNNset;
}
}else return null;
return morNNset;
}
@SuppressWarnings({ "unchecked", "static-access" })
public PrototypeSet reduceSet()
{
System.out.print("\nThe algorithm is starting...\n Computing...\n");
System.out.println("Number of class "+ this.numberOfClass);
PrototypeSet result = new PrototypeSet();
PrototypeSet sal;
if(this.numberOfClass == 2){
if(this.method.equals("selecting")){
result = selecting_poc_nn(trainingDataSet,0.0,1.0);
}else{
result = replacing_poc_nn(trainingDataSet,0.0,1.0);
}
}else{
//Obtain all possibles pairwise
for(int i=0; i<this.numberOfClass-1;i++){
for(int j=i+1; j< this.numberOfClass;j++){
//System.out.println("Selecting between pair "+i + ","+j);
PrototypeSet pairwise = trainingDataSet.getFromClass(i);
pairwise.add(trainingDataSet.getFromClass(j));
if(this.method.equals("selecting")){
sal =selecting_poc_nn(pairwise,i,j);
if(sal!=null)
result.add(sal);
}else{
sal =replacing_poc_nn(pairwise,i,j);
if(sal!=null)
result.add(sal);
}
}
}
}
if(result.size() ==0){
for(int i=1; i< this.numberOfClass;i++){
PrototypeSet aux = trainingDataSet.getFromClass(i);
if(aux.size()!=0)
result.add( aux.get(RandomGenerator.RandintClosed(0,aux.size())));
}
}
System.out.println("Accuracy % " +accuracy(result,trainingDataSet));
System.out.println("Reduction % " + (100-(result.size()*100.)/trainingDataSet.size()) );
if(result.size() > 1){
boolean marcas[];
marcas = new boolean[result.size()];
Arrays.fill(marcas, true);
double accuracyInic =KNN.classficationAccuracy(result, trainingDataSet);
double accuracy;
for(int i=0; i< result.size(); i++){
marcas[i] = false; //At the begining you don't think you can elimante.
PrototypeSet leaveOneOut = result.without(result.get(i));
accuracy = KNN.classficationAccuracy(leaveOneOut, trainingDataSet);
if(accuracy > accuracyInic){
marcas[i] = true; // we can eliminate
}
}
//Then we create the result set..
PrototypeSet clean = new PrototypeSet();
for(int i=0; i< marcas.length; i++){
if(!marcas[i]){
clean.add(result.get(i));
}
}
System.out.println("Accuracy % " +accuracy(clean,trainingDataSet));
System.out.println("Reduction % " + (100-(clean.size()*100)/trainingDataSet.size()) );
}
return result;
}
/**
* General main for all the prototoype generators
* Arguments:
* 0: Filename with the training data set to be condensed.
* 1: Filename which contains the test data set.
* 3: Seed of the random number generator. Always.
* **************************
* 4: .Number of blocks
* @param args Arguments of the main function.
*/
public static void main(String[] args)
{
Parameters.setUse("POC", "<seed> <Number of neighbors>\n<Swarm size>\n<Particle Size>\n<MaxIter>\n<DistanceFunction>");
Parameters.assertBasicArgs(args);
PrototypeSet training = PrototypeGenerationAlgorithm.readPrototypeSet(args[0]);
PrototypeSet test = PrototypeGenerationAlgorithm.readPrototypeSet(args[1]);
long seed = Parameters.assertExtendedArgAsInt(args,2,"seed",0,Long.MAX_VALUE);
POCGenerator.setSeed(seed);
// int blocks =Parameters.assertExtendedArgAsInt(args,10,"number of blocks", 1, Integer.MAX_VALUE);
//String[] parametersOfInitialReduction = Arrays.copyOfRange(args, 4, args.length);
//System.out.print(" swarm ="+swarm+"\n");
POCGenerator generator = new POCGenerator(training, 0.5, "selection");
PrototypeSet resultingSet = generator.execute();
//resultingSet.save(args[1]);
//int accuracyKNN = KNN.classficationAccuracy(resultingSet, test, k);
int accuracy1NN = KNN.classficationAccuracy(resultingSet, test);
generator.showResultsOfAccuracy(Parameters.getFileName(), accuracy1NN, test);
}
}