/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package keel.Algorithms.Rule_Learning.Swap1;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.Random;
import java.util.StringTokenizer;
import keel.Dataset.HeaderFormatException;
import org.core.Files;
/**
*
* @author Javier Rascón Mesa
*/
public class swap1{
private int k_counter; // contador
private rule S; // Conjunto de casos de entrenamiento
private rule tstSet;
private ArrayList<rule> R; // Array de listas de reglas
private ArrayList<rule> C; // Not used Components
private ArrayList<rule> P;
private ArrayList<rule> E;
private ArrayList<Attr_pos> atributos_entrada; //Lista con todos los atributos_entrada nominales de entrada
private ArrayList<String> atributos_salida; //Lista con todos los atributos_entrada nominales de salida
private String trainPrediction[];
private String testPrediction[];
private String trainReal[];
private String testReal[];
private String relation;
protected Attribute[] inputs;
protected Attribute output;
BufferedWriter bw_output;
/**
* Indica a qué atributo pertenece la cadena
*/
private class Attr_pos{
public String attr;
public int pos;
public Attr_pos(String _a,int _p){
attr=_a;
pos=_p;
}
}
private class rule extends InstanceSet{
private String clase=null;
private LinkedList<Integer> attr_pos;
public rule(rule _ru){
super(_ru);
this.clase =_ru.clase;
this.attr_pos = new LinkedList<Integer> (_ru.attr_pos);
}
private rule() {
super();
this.attr_pos = new LinkedList<Integer> ();
}
private rule(String _clase) {
super();
clase=_clase;
this.attr_pos = new LinkedList<Integer> ();
}
@Override
public void removeInstance(int pos){
super.removeInstance(pos);
if(attr_pos.size()!=0)
attr_pos.remove(pos);
}
public void addInstance(Instance inst, int pos){
super.addInstance(inst);
this.attr_pos.add(pos);
}
public int posInstance(int inst_pos){
return this.attr_pos.get(inst_pos);
}
/**
*
* Hace un intercambio disminuyendo el número de errores
*
* @return Una regla
*/
@SuppressWarnings("empty-statement")
public rule swap_min_error(){//Decrease number of errors
rule r_old = new rule(this);
boolean swap_found,d;
do{
//make the single best swap for any component of R that
// reduces the errors made by R on cases in S
swap_found = this.single_best_swap();
if(!swap_found)//if no swap can be found then
return this;// return the rule R
//endif
d=true;//D := true
while(this.n_errors()!=0 && d){//while((R does not have 0 errors) And (D is true)) do
// make the single best swap for any component of R that
// reduces the errors made by R on cases in S
swap_found = this.single_best_swap();
if(!swap_found)// if no swap can be found then
d=false;//D := false
}//endwhile
//consider adding components to R to make it 100% predictive again
}while(this.num_covered_cases()<=r_old.num_covered_cases());
return this;
}
/**
*
* @return Numero de casos cubiertos por la regla
*/
int num_covered_cases(){
int num_covered=0;
boolean covered;
int C_count=C.get(k_counter).getNumInstances();
int this_count=this.getNumInstances();
int i,j;
Instance C_ins,this_ins;
for(i=0;i<C_count;i++){
C_ins = C.get(k_counter).getInstance(i);
covered = true;
for(j=0;j<this_count && covered;j++){
this_ins = this.getInstance(j);
if(!C_ins.getInputNominalValues(this.posInstance(j)).equals(this_ins.getInputNominalValues(0))){
covered = false;
}
}
if(covered)
num_covered++;
}
return num_covered;
}
/**
*
* @return Numero de errores cometidos por la regla
*/
private int n_errors(){
int num_errors=this.num_covered_cases();
int i,j;
int C_count=C.get(k_counter).getNumInstances();
int this_count=this.getNumInstances();
boolean is_correct;
Instance C_ins,this_ins;
for(i=0;i<C_count;i++){
C_ins = C.get(k_counter).getInstance(i);
is_correct = true;
for(j=0;j<this_count;j++){
this_ins = this.getInstance(j);
if(C_ins.getInputNominalValues(this.posInstance(j)).equals(this_ins.getInputNominalValues(0))){//Si esta cubierto
if(!C_ins.getOutputNominalValues(0).equals(this.clase)){// si es correcto
is_correct = false;
}
}
else{
is_correct = false;
}
}
if(is_correct)
num_errors--;
}
return num_errors;
}
/**
*
* @return Nivel Predictivo de la regla
*/
private double predictive_level(){
double level,covered=this.num_covered_cases();
level = covered-this.n_errors();
level /= covered;
return level;
}
/**
*
* Busca el mejor intercambio que reduzca el numero de errores y lo hace si se encuentra
*
* @return boolean that indicates if the swap has been found
*/
private boolean single_best_swap(){ //single best swap for any component of R that reduces the errors made by R on cases in S
boolean found = false; //Single best swap found
rule curr_r = this;//R.get(k);
rule copy;
int curr_err; //current errors
int best_n_err=(curr_r.num_covered_cases() != 0)?curr_r.n_errors():Integer.MAX_VALUE;
int best_i=-1,best_j=-1;
for(int i=0;i<curr_r.getNumInstances();i++){
for(int j=0;j<atributos_entrada.size();j++){
boolean permitido = true;
for(int k=0;k<this.attr_pos.size();k++)
if(this.attr_pos.get(k)==atributos_entrada.get(j).pos && k != i)
permitido = false;
if(permitido){
copy = new rule(curr_r);
//swap
copy.removeInstance(i);
copy.addInstance(new Instance(atributos_entrada.get(j).attr,false,copy.getNumInstances()),atributos_entrada.get(j).pos);
//cálculos
curr_err=copy.n_errors();
if(curr_err<best_n_err && copy.num_covered_cases() != 0){
found=true;
best_i=i;
best_j=j;
best_n_err=curr_err;
}
}
}
}
if(found){//si se encuentra el swap... se hace
//swap
this.removeInstance(best_i);
this.addInstance(new Instance(atributos_entrada.get(best_j).attr,false,this.getNumInstances()),atributos_entrada.get(best_j).pos);
}
return found;
}
/**
*
* @return true Si se ha añadido un atributo a la regla, false en caso contrario
*/
private boolean add_single_best() {
rule copy=null;
double p; //predictive value
double p_best=this.predictive_level(); //best predictive value
int best_covered=this.num_covered_cases();
int best_pos=-1; //best predictive value position //si no se encuentra uno mejor, se añade el primero
for(int i=0;i<atributos_entrada.size();i++){
if(!this.attr_pos.contains(atributos_entrada.get(i).pos)){
copy= new rule(this);//R.get(k));
copy.addInstance(new Instance(atributos_entrada.get(i).attr,false,copy.getNumInstances()),atributos_entrada.get(i).pos);
p=copy.predictive_level();
if(p>p_best || (p_best==p && copy.num_covered_cases() > best_covered)){
best_pos=i;
p_best=p;
best_covered=copy.num_covered_cases();
}
}
}
if(best_pos!=-1){
this.addInstance(new Instance(atributos_entrada.get(best_pos).attr,false,this.getNumInstances()),atributos_entrada.get(best_pos).pos);
return true; // Se han hecho cambios
}
else
return false; //Sin cambios
}
/**
*
* @return Devuelve un InstanceSet de los casos satisfechos por la regla
*/
public rule satisfied_cases(){
int C_count=C.get(k_counter).getNumInstances();
int this_count=this.getNumInstances();
int i,j;
boolean satisfied;
Instance C_ins,this_ins;
rule r_local = new rule();
for(i=0;i<C_count;i++){
C_ins = C.get(k_counter).getInstance(i);
satisfied=true;
for(j=0;j<this_count&&satisfied;j++){
this_ins = this.getInstance(j);
if(!C_ins.getInputNominalValues(this.posInstance(j)).equals(this_ins.getInputNominalValues(0)))
satisfied=false;
}
if(satisfied)
r_local.addInstance(C_ins);
}
return r_local;
}
/**
*
* @param ins1 Intancia a ser comparada
* @param ins2 Intancia a ser comparada
* @return true si son iguales, false en caso contrario
*/
private boolean equal_attr(Instance ins1,Instance ins2){
return equal_inst(ins1,ins2,-1,-1);
}
/**
*
* @param ins1 Intancia a ser comparada
* @param ins2 Intancia a ser comparada
* @param pos1 Posicion del atributo a comparar
* @param pos2 Posicion del atributo a comparar
* @return true si son iguales, false en caso contrario
*/
private boolean equal_inst(Instance ins1,Instance ins2,int pos1,int pos2){
if(pos1==-1 && pos2==-1){
boolean iguales=true;
for(int i=0;i<Attributes.getInputNumAttributes() && iguales;i++)
if(!ins1.getInputNominalValues(i).equalsIgnoreCase(ins2.getInputNominalValues(i)))
iguales=false;
return iguales;
}
else
return ins1.getInputNominalValues(pos1).equalsIgnoreCase(ins2.getInputNominalValues(pos2));
}
/**
*
* Une la regla pasada por parámetro a this sin insertar los repetidos
*
* @param _r Regla que unir
*/
public void union(rule _r){
Instance ins1,ins2;
boolean found;
for(int i=0;i<_r.getNumInstances();i++){
ins1=_r.getInstance(i);
found=false;
for(int j=0;j<this.getNumInstances() && !found;j++){
ins2=this.getInstance(j);
if(equal_inst(ins1,ins2,0,0))//Si son iguales
found=true;
}
if(!found){
this.addInstance(ins1,_r.posInstance(i));
}
}
}
/**
*
* Borra los elementos de _r que haya en this
*
* @param _r Elementos que borrar
*/
public void remove(rule _r){
Instance ins1,ins2;
boolean deleted;
for(int i=0;i<_r.getNumInstances();i++){
ins1=_r.getInstance(i);
deleted =false;
for(int j=0;j<this.getNumInstances() && !deleted;j++){
ins2=this.getInstance(j);
if(equal_attr(ins1,ins2)){//Si son iguales
this.removeInstance(j);//eliminar el elemento
deleted = true;
}
}
}
}
/**
*
* @return true si la regla está vacía, false en caso contrario
*/
public boolean isEmpty(){
return 0==super.getNumInstances();
}
/**
*
* @return Clase sobre la que está trabajando la regla
*/
public String get_clase(){
return clase;
}
}
public swap1(String trainName, String testName) throws ExNotNominalAttr{//S: set of training cases
try{
k_counter=0;//1
S = new rule();
P = new ArrayList<rule>();
E = new ArrayList<rule>();
R = new ArrayList<rule>();
S.readSet(trainName,true);
this.checkNominal();
//inicializar C
C = new ArrayList<rule>();
C.add(k_counter, S);
R.add(k_counter, new rule());
atributos_entrada = new ArrayList<Attr_pos>();
for(int i=0;i<Attributes.getInputNumAttributes();i++){
Attribute a=Attributes.getInputAttribute(i);
for(int j=0;j<a.getNumNominalValues();j++)
atributos_entrada.add(new Attr_pos(a.getNominalValue(j),i));
}
atributos_salida = new ArrayList<String>();
for(int i=0;i<Attributes.getOutputNumAttributes();i++){
Attribute a=Attributes.getOutputAttribute(i);
for(int j=0;j<a.getNumNominalValues();j++)
atributos_salida.add(a.getNominalValue(j));
}
tstSet = new rule();
tstSet.readSet(testName, false);
File archivo = new File (Parameters.logOutputFile);
FileWriter fw;
try {
fw = new FileWriter(archivo);
bw_output = new BufferedWriter(fw);
} catch (IOException ex) {
Logger.getLogger(swap1.class.getName()).log(Level.SEVERE, null, ex);
}
}catch (DatasetException ex){
System.out.println ("\n\n>>>TRAIN Errors");
ex.printAllErrors();
System.out.println("Error: "+ex.getMessage());
}catch (HeaderFormatException e2){
System.err.println ("Exception in header format: "+e2.getMessage());
}
}
/**
* Entrenemiento
*/
public void train(){
StringTokenizer tokens;
boolean changed;
rule b = null;
String curr_class;
curr_class=atributos_salida.remove(0);
//Information for KEEL output files
tokens = new StringTokenizer (S.getHeader()," \n\r");
tokens.nextToken();
relation = tokens.nextToken();
inputs = Attributes.getInputAttributes();
output = Attributes.getOutputAttribute(0);
do{
int ins_counter=0;
if(!quedan_de_la_clase(curr_class))
curr_class=atributos_salida.remove(0);
b = new rule(curr_class);
Attr_pos a_p=atributos_entrada.get(new Random().nextInt(atributos_entrada.size())); //Escoge aleatoriamente un atributo de la lista
b.addInstance(new Instance(a_p.attr,false,ins_counter++),a_p.pos);
// Create a rule B with a randomly chosen attribute as its LHS
changed = true;
//while (B is not 100% predictive) do{
while(changed){
// make the single best swap for any component of B, including
// deleting the component, using cases in C[k]
if(!(changed = b.single_best_swap())) // if no swap is found, add the single best component to B
changed = b.add_single_best();
}
P.add(k_counter,b.swap_min_error()); // P[k] := swap_min_error(B,C[k])
E.add(k_counter,b.satisfied_cases());//E[k] := cases in C that satisfy the single-best-rule P[k]
R.add(k_counter+1,P.get(k_counter)); //R[k+1] := R U {P[k]}
C.add(k_counter+1, new rule(C.get(k_counter)));
C.get(k_counter+1).remove(E.get(k_counter));//C[k+1] := C - {E[k]}
k_counter++;
}while(!C.get(k_counter).isEmpty()); //until (C[k] is empty)
int pos;
//find a rule R in R that can be deleted without affecting performance on cases in S
while((pos=rule_to_be_erased())!=-1){//while (R can be found){
R.remove(pos);
}//}endwhile
statistics();
}
/**
* Classifies the training set
*/
public void classifyTrainSet(){
trainPrediction=new String[S.getNumInstances()];
trainReal=new String[S.getNumInstances()];
for(int i=0;i<S.getNumInstances();i++){
trainPrediction[i]=classify(S.getInstance(i));
trainReal[i]=(S.getInstance(i)).getOutputNominalValues(0);
}
}
/**
* Classifies the test set
*/
public void classifyTestSet(){
testPrediction=new String[tstSet.getNumInstances()];
testReal=new String[tstSet.getNumInstances()];
for(int i=0;i<tstSet.getNumInstances();i++){
testPrediction[i]=classify(tstSet.getInstance(i));
testReal[i]=(tstSet.getInstance(i)).getOutputNominalValues(0);
}
}
public String classify(Instance ins){
boolean covered = false;
String val="Unclassified";
for(int j=1;j<R.size() && !covered;j++){
boolean correct = true;
rule rR = R.get(j);
for(int k=0;k<rR.getNumInstances() && correct ;k++){
Instance rR_i = rR.getInstance(k);
if(!ins.getInputNominalValues(rR.posInstance(k)).equals(rR_i.getInputNominalValues(0))){
correct = false;
}
}
if(correct){
covered = true;
val= rR.get_clase();
}
}
return val;
}
/**
* Reports the results obtained
*/
public void writeResults(){
writeOutput(Parameters.trainOutputFile, trainReal, trainPrediction);
writeOutput(Parameters.testOutputFile, testReal, testPrediction);
}
/**
* Prints KEEL standard output files.
*
* @param filename Name of output file
* @param realClass Real output of instances
* @param prediction Predicted output for instances
*/
protected void writeOutput(String filename, String [] realClass, String [] prediction) {
String text = "";
/*Printing input attributes*/
text += "@relation "+ relation +"\n";
for (int i=0; i<inputs.length; i++) {
text += "@attribute "+ inputs[i].getName()+" ";
if (inputs[i].getType() == Attribute.NOMINAL) {
text += "{";
for (int j=0; j<inputs[i].getNominalValuesList().size(); j++) {
text += (String)inputs[i].getNominalValuesList().elementAt(j);
if (j < inputs[i].getNominalValuesList().size() -1) {
text += ", ";
}
}
text += "}\n";
} else {
if (inputs[i].getType() == Attribute.INTEGER) {
text += "integer";
} else {
text += "real";
}
text += " ["+String.valueOf(inputs[i].getMinAttribute()) + ", " + String.valueOf(inputs[i].getMaxAttribute())+"]\n";
}
}
/*Printing output attribute*/
text += "@attribute "+ output.getName()+" ";
if (output.getType() == Attribute.NOMINAL) {
text += "{";
for (int j=0; j<output.getNominalValuesList().size(); j++) {
text += (String)output.getNominalValuesList().elementAt(j);
if (j < output.getNominalValuesList().size() -1) {
text += ", ";
}
}
text += "}\n";
} else {
text += "integer ["+String.valueOf(output.getMinAttribute()) + ", " + String.valueOf(output.getMaxAttribute())+"]\n";
}
/*Printing data*/
text += "@data\n";
Files.writeFile(filename, text);
text = "";
for (int i=0; i<realClass.length; i++) {
text += "" + realClass[i] + " ";
text += "" + prediction[i] + " ";
text += "\n";
if((i%10)==9){
Files.addToFile(filename, text);
text = "";
}
}
if((realClass.length%10)!=0){
Files.addToFile(filename, text);
}
}
/**
* Pruebas
*/
public void test(){
int acertados=0;
// Perform classification of training and test sets in KEEL Format
classifyTrainSet();
classifyTestSet();
writeResults();
try{
bw_output.write("\n\n");
bw_output.write("---------------------------------------------\n");
bw_output.write("Inicio de las pruebas\n");
bw_output.write("---------------------------------------------\n");
for(int i=0;i<tstSet.getNumInstances();i++){
Instance tst_i = tstSet.getInstance(i);
boolean covered = false;
for(int j=1;j<R.size() && !covered;j++){
boolean correct = true;
rule rR = R.get(j);
for(int k=0;k<rR.getNumInstances() && correct ;k++){
Instance rR_i = rR.getInstance(k);
if(!tst_i.getInputNominalValues(rR.posInstance(k)).equals(rR_i.getInputNominalValues(0))){
correct = false;
}
}
if(correct){
covered = true;
for(int k=0;k<Attributes.getInputNumAttributes();k++){
System.out.print(tst_i.getInputNominalValues(k)+" ");
bw_output.write(tst_i.getInputNominalValues(k)+" ");
}
System.out.print(" :");
bw_output.write(" :");
String swap_res = rR.get_clase(), real_res = tst_i.getOutputNominalValues(0);
System.out.println(" Segun SWAP-1 es: "+swap_res+" y en verdad es: "+real_res);
bw_output.write(" Segun SWAP-1 es: "+swap_res+" y en verdad es: "+real_res+"\n");
if(real_res.equalsIgnoreCase(swap_res)){
acertados++;
}
}
}
}
double total_ins = tstSet.getNumInstances();
double accuracy = acertados/total_ins;
System.out.println("Casos totales: "+total_ins);
System.out.println("Casos acertados: "+acertados);
System.out.println("Porcentaje de acierto: "+accuracy*100+"%");
// try{
bw_output.write("Casos totales: "+total_ins+"\n");
bw_output.write("Casos acertados: "+acertados+"\n");
bw_output.write("Porcentaje de acierto: "+accuracy*100+"%\n");
bw_output.close();
}
catch(IOException ex){
System.out.println("Error en la escritora del ficehro de salida");
}
}
/**
*
* Busca una regla que puede ser borrada sin que afecte a la performance
*
* @return Posicion de la regla que puede ser borrada
*/
private int rule_to_be_erased(){ //find a rule R in R that can be deleted without affecting performance on cases in S
double p=performance(R); //obtener performance
ArrayList<rule> copy=null;
for(int i=1;i<R.size();i++){//para la cantidad de atributos_entrada de la regla
copy=new ArrayList<rule>(R);//copiar conjunto de reglas
copy.remove(i);
//calcular y almacenar nueva performance
if(p==performance(copy))//si la performance no ha cambiado
return i;//devolver at
}
return -1;
}
/**
* Reglas que han sido generadas por el algoritmo
*/
private void statistics(){
rule _r;
try{
bw_output.write("---------------------------------------------\n");
bw_output.write("Inicio del entrenamiento\n");
bw_output.write("---------------------------------------------\n");
for(int i=1;i<R.size();i++){
_r = R.get(i);
int j;
for(j=0;j<(_r.getNumInstances()-1);j++){
System.out.print("["+Attributes.getInputAttribute(_r.attr_pos.get(j)).getName()+"] = "+_r.getInstance(j).getInputNominalValues(0)+" && ");
bw_output.write("["+Attributes.getInputAttribute(_r.attr_pos.get(j)).getName()+"] = "+_r.getInstance(j).getInputNominalValues(0)+" && ");
}
System.out.print("["+Attributes.getInputAttribute(_r.attr_pos.get(j)).getName()+"] = "+_r.getInstance(j).getInputNominalValues(0)+" --> ");
System.out.println(_r.get_clase());
bw_output.write("["+Attributes.getInputAttribute(_r.attr_pos.get(j)).getName()+"] = "+_r.getInstance(j).getInputNominalValues(0)+" --> ");
bw_output.write(_r.get_clase()+"\n");
}
}
catch(IOException ex){
System.out.println("Error en la escritora del ficehro de salida");
}
}
/**
*
* Comprueba si todos los atributos son nominales
*
* @throws ExNotNominalAttr Hay atributos no nominales
*/
private void checkNominal() throws ExNotNominalAttr{
for(int i=0;i<Attributes.getInputNumAttributes();i++){
Attribute a=Attributes.getInputAttribute(i);
if(a.getType()!=Attribute.NOMINAL)
throw new ExNotNominalAttr();
}
}
/**
*
* Busca la existencia de instancias de la clase indicada en el conjunto de elementos que todavía no están cubiertos
*
* @param clase Clase que se busca
* @return true si quedan instancias de la clase indicada
*/
private boolean quedan_de_la_clase(String clase){
InstanceSet a=C.get(k_counter);
for(int i=0;i<a.getNumInstances();i++){
if(clase.equalsIgnoreCase(a.getInstance(i).getOutputNominalValues(0)))
return true;
}
return false;
}
/**
*
* @param is InstanceSet del que calcular la performance
* @return Porcentaje (sobre 1) de aciertos
*/
private double performance(ArrayList<rule> is){
int acertados=0;
for(int i=0;i<S.getNumInstances();i++){
Instance tst_i = S.getInstance(i);
boolean covered = false;
for(int j=1;j<is.size() && !covered;j++){
boolean correct = true;
rule rR = is.get(j);
for(int k=0;k<rR.getNumInstances() && correct ;k++){
Instance rR_i = rR.getInstance(k);
if(!tst_i.getInputNominalValues(rR.posInstance(k)).equals(rR_i.getInputNominalValues(0))){
correct = false;
}
}
if(correct){
covered = true;
String swap_res = rR.get_clase(), real_res = tst_i.getOutputNominalValues(0);
if(real_res.equalsIgnoreCase(swap_res))
acertados++;
}
}
}
double total_ins = tstSet.getNumInstances();
return acertados/total_ins;
}
}