/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Rosa Venzala 19/09/2008 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 16/12/2008 * @version 1.2 * @since JDK1.2 * </p> */ package keel.Algorithms.Rule_Learning.Riona; import java.io.IOException; import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import keel.Dataset.*; import java.util.*; import java.lang.*; import java.text.DecimalFormat; import org.core.*; public class Riona { /** * <p> * Main procedures of Rionasd algorithm * </p> */ // Train file private Dataset train ; // Test file private Dataset test; private String outFile; private String outTrainFile; private String outTestFile; private String []classes=null; private String []testClasses=null; private long seed; private double data[][]=null; private double dataWithoutNor[][]=null; private double testData[][]=null; private double testDataWithoutNor[][]=null; private int clasificacionFinalTr[]=null; private int clasificacionFinalTst[]=null; private int k; public Riona(){ } /** * <p> * Riona constructor * </p> */ public Riona(String trainFile,String testFile, String trainOutFile, String testOutFile, String fOut,long lSeed){ outFile = fOut;outTrainFile = trainOutFile; outTestFile = testOutFile; train=new Dataset();test=new Dataset(); this.seed=lSeed; try { train.readSet(trainFile, true); test.readSet(testFile,false); train.calculateMostCommon();//eval.calculaMasComunes(); test.calculateMostCommon(); classes=train.giveClasses(); testClasses=test.giveClasses(); dataWithoutNor=new double[train.getNData()][]; for(int i=0;i<train.getNData();i++)dataWithoutNor[i]=new double[train.getInPuts()]; data=train.getX(); for(int i=0;i<train.getNData();i++){ for(int j=0;j<train.getInPuts();j++)dataWithoutNor[i][j]=data[i][j]; } train.setNumValues(); train.normalize();//Convierte todos los valores del conjunto de datos en el intervalo [0,1] a la hora de facilitar los calculos de las distancias testDataWithoutNor=new double[test.getNData()][]; for(int i=0;i<test.getNData();i++)testDataWithoutNor[i]=new double[test.getInPuts()]; testData=test.getX(); for(int i=0;i<test.getNData();i++){ for(int j=0;j<test.getInPuts();j++)testDataWithoutNor[i][j]=testData[i][j]; } test.setNumValues(); test.normalize();//Convierte todos los valores del conjunto de datos en el intervalo [0,1] a la hora de facilitar los calculos de las distancias //AQUI EMPIEZO RIONA train.computeSVDM(); test.computeSVDM(); //Clasificamos como ejemplos de test los datos de train y de test int neighbourSet[]; int newK,kMax; Complex rule; boolean consistent; int supportSet[]=new int[train.getNClasses()]; clasificacionFinalTr=new int[train.getNData()]; clasificacionFinalTst=new int[test.getNData()]; if(train.getNData()<100)kMax=train.getNData()-1; else kMax=100; k=findOptimalK(kMax); System.out.println("tamanio optimo vecindario "+k); //k=4; for(int i=0;i<train.getNData();i++){ neighbourSet=train.getNeighbourSet(data[i],k); newK=0;//inicializar for(int j=0;j<train.getNData();j++){ if(neighbourSet[j]!=-2){newK++; } } //System.out.println("tam final vecindario "+k+" "+nuevo_k); for(int c=0;c<train.getNClasses();c++){ supportSet[c]=0; for(int v=0;v<newK;v++){ if(train.getC(neighbourSet[v])==c){ rule=createRuleTestTrain(data[i],i,data[neighbourSet[v]],neighbourSet[v],c,train.getNClasses(),true); rule=desnormalize(rule,true);// rule.print(); if(isConsistent(rule,neighbourSet,newK)) supportSet[c]++; } } //System.out.println("clase "+c+" elems "+supportSet[c]); } clasificacionFinalTr[i]=train.getMaximum(supportSet,seed); //System.out.println(clases[clasificacionFinalTr[i]]); } System.out.println("------------------------------------------------"); //supportSet=new int[test.getnclases()]; //las clases son las de train for(int i=0;i<test.getNData();i++){ neighbourSet=train.getNeighbourSet(testData[i],k);//no es test newK=0;//inicializar for(int j=0;j<train.getNData();j++)//no es test if(neighbourSet[j]!=-2){newK++;} for(int c=0;c<train.getNClasses();c++){ supportSet[c]=0; for(int v=0;v<newK;v++){ if(train.getC(neighbourSet[v])==c){ rule=createRuleTestTrain(testData[i],i,data[neighbourSet[v]],neighbourSet[v],c,train.getNClasses(),false); // rule=desnormalizar(rule,false); //rule.print(); if(isConsistent(rule,neighbourSet,newK)) supportSet[c]++; } } } clasificacionFinalTst[i]=train.getMaximum(supportSet,seed); //System.out.println(clasesTest[clasificacionFinalTst[i]]); } //GENERAMOS LA SALIDA generateOutPut(); } catch (IOException e) { System.err.println("There was a problem while trying to read the dataset files:"); System.err.println("-> " + e); } } /** * <p> * Calculate the statistical values necessary and creates the out-put KEEL files * </p> */ private void generateOutPut() { Fichero f = new Fichero(); String str = ""; String strTrain=""; String strtest=""; String outPut = new String(""); outPut = train.copyTestHeader(); double trainAcc=0.,testAcc=0.; for(int i=0;i<train.getNData();i++){ strTrain += new String(classes[train.getC(i)] + " " + classes[clasificacionFinalTr[i]] + "\n"); if(train.getC(i)==clasificacionFinalTr[i])trainAcc++; } for(int i=0;i<test.getNData();i++){ strtest += new String(testClasses[test.getC(i)] + " " + testClasses[clasificacionFinalTst[i]] + "\n"); if(test.getC(i)==clasificacionFinalTst[i])testAcc++; } f.escribeFichero(outTrainFile, outPut + strTrain); f.escribeFichero(outTestFile, outPut + strtest); double porcTrn=(trainAcc/train.getNData()); double porcTst=(testAcc/test.getNData()); str += "Accuracy Training: " + porcTrn + "\n"; str += "Accuracy Test: " + porcTst+ "\n"; str += "tamanio optimo vecindario "+k; f.escribeFichero(outFile, str); System.out.println(str); } /** * <p> * Desnormalize the data * </p> * @param c the rule * @param train true if is Train data, false otherwise */ private Complex desnormalize(Complex c,boolean train){ int []numI=new int[2]; double []limits=new double[2]; for (int j = 0; j < c.size();j++) { Selector s = c.getSelector(j); int at=s.getAttribute(); numI=s.getNumInstances(); if(train){limits[0]=dataWithoutNor[numI[0]][at]; limits[1]=dataWithoutNor[numI[1]][at];} else {limits[0]=testDataWithoutNor[numI[0]][at]; limits[1]=testDataWithoutNor[numI[1]][at];} s.setValues(limits); } return c; } /** * <p> * Creates a local rule * </p> * @param tst the test example * @param numItst the position of the example in the set * @param trn the train example * @param numItrn the position of the example in the set * @param clase the consequent of the rule * @param nClasses the total number of classes */ public Complex createRuleTestTrain(double[]tst,int numItst,double[]trn,int numItrn,int clase,int nClasses,boolean es_train){ double numeric; Selector s = null; Complex rule; rule=new Complex(nClasses); rule.setClassAttribute(clase); rule.adjuntNameAttributes(train.giveNames()); for (int j = 0; j < train.getInPuts(); j++) { double []vnum=new double[2]; if(tst[j]<trn[j]){vnum[0]=tst[j];vnum[1]=trn[j];} else {vnum[0]=trn[j];vnum[1]=tst[j];} if (Attributes.getInputAttribute(j).getType() == Attribute.NOMINAL){ double []comp; int elems=0; if(es_train)comp=train.createBall(j,tst[j],trn[j]); else { double v=train.getRealValue(j,test.findNominalValue(j,tst[j])); comp=train.createBall(j,v,trn[j]); } if(comp!=null){ for(int ii=0;ii<comp.length;ii++){ if(comp[ii]!=-1)elems++; else break; } String []vnomi=new String[elems]; for(int ii=0;ii<elems;ii++){ //if(es_train) vnomi[ii]=train.findNominalValue(j,comp[ii]); //else vnomi[ii]=test.findNominalValue(j,comp[ii]); } s=new Selector(j,0,vnomi,comp,elems,/*i*/0); } } else { s = new Selector(j, 0,vnum,2,0); } if(tst[j]<trn[j]){s.setNumUp(numItrn);s.setNumLow(numItst);} else {s.setNumUp(numItst);s.setNumLow(numItrn);} if(s!=null) rule.addSelector(s); } return rule; } /** * <p> * Inidcates if a rule is consistent with a determined set of examples * </p> * @return true if is consistent * @return false otherwise */ public boolean isConsistent(Complex R,int[]verifySet,int util){ for(int i=0;i<util;i++){ //if(train){ //no es necesario pq el vecindario siempre es de train if((R.getClassAttribute()!=train.getC(verifySet[i])/*claseTst*/) && (R.ruleCoversInstance(data[verifySet[i]])))return false; //} //else if((R.getClase()!=claseTst) && (R.reglaCubreInstancia(datosTst[verifySet[i]])))return false; } return true; } /** * <p> * Calculates the optimum size of the neighborhood for the training set * </p> * @param kmax max malue for neighborhood size */ public int findOptimalK(int kmax){ int A[][]=new int[train.getNData()][]; int max[]=new int[kmax]; for(int k=0;k<kmax;k++)max[k]=0; for(int i=0;i<train.getNData();i++){ A[i]=getClassificationVector(i,kmax); for(int k=0;k<kmax;k++){ if(train.getC(i)==A[i][k])max[k]++; } } int k=-1; for (int i = 0, c = -1; i < kmax; i++) { if (max[i] > c) { k = i; c = max[i]; } } return (k+1); } private int[]getClassificationVector(int trn,int kmax){ int []A=new int[kmax]; int []NN=train.getNN(train.getX(trn),kmax); //NN tiene tamanio kmax int decStrength[]=new int[train.getNClasses()]; for(int v=0;v<train.getNClasses();v++)decStrength[v]=0; int currentDec=train.getMostFrequentClass(); for(int k=0;k<kmax;k++){ //System.out.println("soy el vecino "+NN[k]); Complex rule=createRuleTestTrain(data[trn],trn,data[NN[k]],NN[k],train.getC(NN[k]),train.getNClasses(),true); if(isConsistent(rule,NN,kmax)){ int v=train.getC(NN[k]); //System.out.print("es consistente "+decStrength[v]+" "+decStrength[currentDec]+" "); decStrength[v]=decStrength[v]+1; if(decStrength[v]>decStrength[currentDec]){currentDec=v; } } A[k]=currentDec; //System.out.print(A[k]+" ;; "); } return A; } }