/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.EARMGA; /** * <p> * @author Written by Alberto Fern�ndez (University of Granada) * @author Modified by Diana Mart�n (dmartin@ceis.cujae.edu.cu) * @version 1.1 * @since JDK1.6 * </p> */ import java.io.PrintWriter; import java.math.BigDecimal; import java.util.*; import org.core.Randomize; public class EARMGAProcess { private myDataset ds; private DataB dataBase; ArrayList<Chromosome> pop; ArrayList<AssociationRule> assocRules; private int nTrials, trials; private int popsize; private double ps; private double pc; private double pm; private int kItemsets; private double alpha; public EARMGAProcess(myDataset ds, DataB dataBase, int nTrials, int popsize, int kItemsets, double ps, double pc, double pm, double alpha) { this.ds = ds; this.dataBase = dataBase; this.nTrials = nTrials; this.popsize = popsize; this.kItemsets = kItemsets; this.ps = ps; this.pc = pc; this.pm = pm; this.alpha = alpha; } public void run() { int i, nGen; Chromosome chromo; ArrayList<Chromosome> pop_temp; this.trials = 0; nGen = 0; System.out.println("Initialization"); this.initialize(); do { System.out.println("Generation: " + nGen); this.select(); pop_temp = this.crossover(); for (i=pop_temp.size()-1; i>=0; i--) { chromo = pop_temp.get(i); if ((Randomize.Rand() * chromo.getFit()) < this.pm) { this.mutate(chromo); this.fitness(chromo); } if (chromo.getFit() <= 0.0) pop_temp.remove(i); } this.elitist (pop_temp); nGen++; }while (!terminate()); this.genRules(); } public boolean terminate () { Chromosome best, worst; Collections.sort(this.pop); best = this.pop.get(0); worst = this.pop.get(this.pop.size()-1); // if ((best.getFit() - worst.getFit()) < this.alpha) return (true); if (this.trials > this.nTrials) return (true); return (false); } private void initialize() { int i, nVars, attr, top; Chromosome chromo; Gene gen; this.pop = new ArrayList<Chromosome>(); nVars = this.ds.getnVars(); do { chromo = new Chromosome(Randomize.Randint(0, this.kItemsets-1), nVars); for (i=0; i < this.kItemsets; i++) { gen = new Gene(); attr = Randomize.Randint (0, nVars); while (chromo.isUsed(attr)) attr = (attr + 1) % nVars; gen.setAttr(attr); gen.setType(this.ds.getType(attr)); gen.addValue(Randomize.Randint(0, this.dataBase.numIntervals(attr))); chromo.add(gen); } this.fitness(chromo); } while (chromo.getFit() <= 0.0); this.pop.add(chromo); // System.out.println("Fitness Semilla: " + chromo.getFit()); while (this.pop.size() <= (this.popsize/2.0)) { top = this.pop.size(); for (i=0; i<top; i++) { chromo = (this.pop.get(i)).copy(); this.mutate(chromo); this.fitness(chromo); // System.out.println(" Fitness mutacion semilla: " + chromo.getFit()); if (chromo.getFit() > 0.0) this.pop.add(chromo); } } } private void select() { int i; for (i=this.pop.size()-1; i >= 0 ; i--) { if ((Randomize.Rand() * (this.pop.get(i)).getFit()) > this.ps) this.pop.remove(i); } } private ArrayList<Chromosome> crossover() { int i, j, k, posi, posj, aux, pos; ArrayList<Chromosome> pop_tmp = new ArrayList<Chromosome>(); Chromosome dad, mom, off1, off2; for (i=0; i < this.pop.size(); i++) { dad = this.pop.get(i); for (j=i+1; j < this.pop.size(); j++) { if (Randomize.Rand() < this.pc) { mom = this.pop.get(j); posi = Randomize.Randint(0, this.kItemsets); posj = Randomize.Randint(0, this.kItemsets); if (posi > posj) { aux = posi; posi = posj; posj = aux; } off1 = dad.copy(); off2 = mom.copy(); if (posi==0) { off1.setLengthAnt(mom.getLengthAnt()); off2.setLengthAnt(dad.getLengthAnt()); } for (k=posi; k <= posj; k++) { off1.offUsed((off1.getGen(k)).getAttr()); off2.offUsed((off2.getGen(k)).getAttr()); } for (k=posi; k <= posj; k++) { pos = k; if (off1.isUsed((mom.getGen(pos)).getAttr())) { pos = (posj + 1) % this.kItemsets; while (off1.isUsed((mom.getGen(pos)).getAttr())) { pos = (pos + 1) % this.kItemsets; } } (off1.getGen(k)).setAttr((mom.getGen(pos)).getAttr()); (off1.getGen(k)).setType((mom.getGen(pos)).getType()); (off1.getGen(k)).setValue((mom.getGen(pos)).getValue()); off1.onUsed((off1.getGen(k)).getAttr()); pos = k; if (off2.isUsed((dad.getGen(pos)).getAttr())) { pos = (posj + 1) % this.kItemsets; while (off2.isUsed((dad.getGen(pos)).getAttr())) { pos = (pos + 1) % this.kItemsets; } } (off2.getGen(k)).setAttr((dad.getGen(pos)).getAttr()); (off2.getGen(k)).setType((dad.getGen(pos)).getType()); (off2.getGen(k)).setValue((dad.getGen(pos)).getValue()); off2.onUsed((off2.getGen(k)).getAttr()); } this.fitness(off1); this.fitness(off2); // System.out.println(" Fitness cruce: " + off1.getFit()); // System.out.println(" Fitness cruce: " + off2.getFit()); pop_tmp.add(off1); pop_tmp.add(off2); } } } return (pop_tmp); } private void mutate(Chromosome chromo) { int i, attr, attr_ant; double prop; Gene gen; chromo.setLengthAnt(Randomize.Randint(0, this.kItemsets-1)); gen = chromo.getGen(Randomize.Randint(0, this.kItemsets)); attr_ant = gen.getAttr(); prop = gen.numIntervals() / (this.dataBase.numIntervals(attr_ant) * 1.0); attr = Randomize.Randint(0, this.ds.getnVars()); for (i=0; chromo.isUsed(attr) && i < this.ds.getnVars(); i++) attr = (attr + 1) % this.ds.getnVars(); if (chromo.isUsed(attr)) attr = attr_ant; else { chromo.offUsed(attr_ant); chromo.onUsed(attr); gen.setAttr(attr); gen.setType(this.ds.getType(attr)); } gen.clearValue(); for (i=0; i < this.dataBase.numIntervals(attr); i++) { if (Randomize.Rand() <= prop) gen.addValue(i); } // if (gen.numIntervals() == this.dataBase.numIntervals(attr)) gen.removeValue(Randomize.Randint(0, gen.numIntervals())); if (gen.numIntervals() == 0) gen.addValue(Randomize.Randint(0, this.dataBase.numIntervals(attr))); // System.out.println(" Mutracion, numero intervalos: " + gen.numIntervals()); } private void fitness(Chromosome chromo) { double nTrans = (double) this.ds.getnTrans(); double fit; this.trials++; ArrayList<Integer> ant = this.countSupport(chromo, 0, chromo.getLengthAnt()); if (ant.size() == 0) { chromo.setFit(0.0); chromo.setSupportAnt(0.0); chromo.setSupportCon(0.0); chromo.setSupportAll(0.0); } else { ArrayList<Integer> con = this.countSupport(chromo, chromo.getLengthAnt()+1, this.kItemsets-1); if ((con.size() / nTrans) >= 1.0) { chromo.setFit(1.0); chromo.setSupportAnt(ant.size() / nTrans); chromo.setSupportCon(1.0); chromo.setSupportAll(chromo.getSupportAnt()); } else { ArrayList<Integer> all = this.countSupport(chromo, 0, this.kItemsets-1); if (all.size()==0) { chromo.setFit(0.0); chromo.setSupportAnt(ant.size() / nTrans); chromo.setSupportCon(con.size() / nTrans); chromo.setSupportAll(0.0); } else { chromo.setSupportAnt(ant.size() / nTrans); chromo.setSupportCon(con.size() / nTrans); chromo.setSupportAll(all.size() / nTrans); fit = (chromo.getSupportAll() - (chromo.getSupportAnt() * chromo.getSupportCon())) / (chromo.getSupportAnt() * (1.0 - chromo.getSupportCon())); if (fit > 1.0) fit = 1.0; chromo.setFit(fit); } } } // } } /* private void elitist (ArrayList<Chromosome> pop_temp) { int i, j; boolean stop; Chromosome chromo1, chromo2; this.pop.addAll(pop_temp); Collections.sort(this.pop); for (i=0; i<this.pop.size(); i++) { chromo1 = this.pop.get(i); stop = false; for (j=i+1; j<this.pop.size() && !stop; j++) { chromo2 = this.pop.get(j); if ((chromo1.getFit() >= chromo2.getFit()-0.00001) && (chromo1.getFit() <= chromo2.getFit()+0.00001)) { if (chromo1.isEqual(chromo2)) { this.pop.remove(j); j--; } } else stop = true; } } while (this.pop.size() > this.popsize) this.pop.remove(this.pop.size()-1); System.gc(); } */ private void elitist (ArrayList<Chromosome> pop_temp) { int i, j; Chromosome chromo1, chromo2; for (i=0; i<pop_temp.size(); i++) { chromo1 = pop_temp.get(i); for (j=0; j<this.pop.size(); j++) { chromo2 = this.pop.get(j); if (chromo1.isSub(chromo2)) { if (chromo1.getFit() > chromo2.getFit()) { this.pop.remove(j); j--; } else { pop_temp.remove(i); j = this.pop.size(); i--; } } else if (chromo2.isSub(chromo1)) { if (chromo2.getFit() >= chromo1.getFit()) { pop_temp.remove(i); j = this.pop.size(); i--; } else { this.pop.remove(j); j--; } } } } this.pop.addAll(pop_temp); Collections.sort(this.pop); while (this.pop.size() > this.popsize) this.pop.remove(this.pop.size()-1); System.gc(); } private ArrayList<Integer> countSupport(Chromosome chromo, int ini, int fin) { ArrayList<Integer> tid_list = new ArrayList<Integer>(); ArrayList<Integer> value; double[][] trans = this.ds.getRealTransactions(); int i, j, t, attr, nTrans; boolean ok; nTrans = this.ds.getnTrans(); for (t=0; t < nTrans; t++) { ok = true; for (i=ini; i <= fin && ok; i++) { attr = (chromo.getGen(i)).getAttr(); value = (chromo.getGen(i)).getValue(); ok = false; for (j=0; j < value.size() && !ok; j++) { if (this.dataBase.isCovered(attr, value.get(j).intValue(), trans[t][attr])) ok = true; } } if (ok) tid_list.add(t); } return tid_list; } public static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } public void printReport (double minConfidence, double minSupport) { double avg_yulesQ=0.0, avg_sup=0.0, avg_conf=0.0,avg_lift=0.0, avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; int i, countRules, length; AssociationRule rule; countRules = length = 0; for (i=0; i < this.assocRules.size(); i++) { rule = this.assocRules.get(i); if ((rule.getConfidence() >= minConfidence) && (rule.getAll_support() >= minSupport)) { countRules++; length += rule.getLengthRule(); avg_sup += rule.getAll_support(); avg_conf += rule.getConfidence(); avg_lift += rule.getLift(); avg_conv += rule.getConv(); avg_CF += rule.getCF(); avg_netConf += rule.getNetConf(); avg_yulesQ += rule.getYulesQ(); } } System.out.println("Number of Frequent Itemsets generated: " + "-"); System.out.println("Number of Association Rules generated: " + countRules); if(countRules!=0){ System.out.println("Average SupportRules: " + roundDouble(( avg_sup / countRules ), 2) ); System.out.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2) ); System.out.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2) ); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); System.out.println("Average Length of the Rules generated: " + roundDouble((length / (double) countRules), 2)); System.out.println("Number of Covered Records(%): " + (100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans()); } else{ System.out.println("Average Support: " + (0.0)); System.out.println("Average Confidence: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); System.out.println("Average Antecedents Length: " + ( 0.0 )); System.out.println("Number of Covered Records (%): " + (0.0) ); } } public String printRules(ArrayList<AssociationRule> rules) { int i, lenghtrule; boolean stop; String rulesList; stop = false; rulesList = ""; rulesList += ("Support\tantecedent_support\tconsequent_support\tConfidence\tLift\tConv\tCF\tNetConf\tYulesQ\tnAttributes\n"); for (i=0; i < rules.size() && !stop; i++) { lenghtrule = rules.get(i).getAntecedent().size()+ rules.get(i).getConsequent().size(); rulesList += ("" + roundDouble(rules.get(i).getAll_support(),2) + "\t" + roundDouble(rules.get(i).getSupport_Ant(),2) + "\t" + roundDouble(rules.get(i).getSupport_cons(),2) + "\t" + roundDouble(rules.get(i).getConfidence(),2) + "\t" + roundDouble(rules.get(i).getLift(),2) + "\t" + roundDouble(rules.get(i).getConv(),2) + "\t" + roundDouble(rules.get(i).getCF(),2) + "\t" + roundDouble(rules.get(i).getNetConf(),2) + "\t" + roundDouble(rules.get(i).getYulesQ(),2) + "\t" + lenghtrule + "\n"); } return rulesList; } public void saveReport (double minSupport,PrintWriter w) { int i, countRules, length; AssociationRule rule; double avg_yulesQ=0.0, avg_sup=0.0, avg_conf=0.0,avg_lift=0.0, avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; countRules = length = 0; for (i=0; i < this.assocRules.size(); i++) { rule = this.assocRules.get(i); if (rule.getAll_support() >= minSupport) { countRules++; length += rule.getLengthRule(); avg_sup += rule.getAll_support(); avg_conf += rule.getConfidence(); avg_lift += rule.getLift(); avg_conv += rule.getConv(); avg_CF += rule.getCF(); avg_netConf += rule.getNetConf(); avg_yulesQ += rule.getYulesQ(); } } w.println("\nNumber of Frequent Itemsets generated: " + "-"); System.out.println("Number of Frequent Itemsets generated: " + "-"); w.println("\nNumber of Association Rules generated: " + countRules); System.out.println("Number of Association Rules generated: " + countRules); if(countRules!=0){ w.println("Average Support: " + roundDouble(( avg_sup / countRules ), 2)); System.out.println("Average SupportRules: " + roundDouble(( avg_sup / countRules ), 2) ); w.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2) ); w.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2) ); w.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); w.println("Average Antecedents Length: " + roundDouble((length / (double) countRules), 2)); System.out.println("Average Length of the Rules generated: " + roundDouble((length / (double) countRules), 2)); w.println("Number of Covered Records (%): " + roundDouble((100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans(),2)); System.out.println("Number of Covered Records(%): " + (100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans()); } else{ w.println("Average Support: " + ( 0.0 )); System.out.println("Average Support: " + (0.0)); w.println("Average Confidence: " + ( 0.0 )); System.out.println("Average Confidence: " + (0.0 )); w.println("Average Lift: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); w.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); w.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); w.println("Average Netconf: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); w.println("Average Antecedents Length: " + ( 0.0 )); System.out.println("Average Antecedents Length: " + ( 0.0 )); w.println("Number of Covered Records (%): " + (0.0)); System.out.println("Number of Covered Records (%): " + (0.0) ); } } public ArrayList<AssociationRule> getSetRules (double minSupport) { int i; ArrayList<AssociationRule> selectRules = new ArrayList<AssociationRule>(); AssociationRule rule; for (i=0; i < this.assocRules.size(); i++) { rule = this.assocRules.get(i); if (rule.getAll_support() >= minSupport) selectRules.add(rule.copy()); } return selectRules; } private void genRules() { int i, j; double numeratorYules, denominatorYules, confidance,lift,conv, CF, netConf, yulesQ; AssociationRule rule; Chromosome chromo; this.assocRules = new ArrayList<AssociationRule>(); for (i=0; i < this.pop.size(); i++) { chromo = this.pop.get(i); rule = new AssociationRule(); for (j=0; j <= chromo.getLengthAnt(); j++) rule.addAntecedent((chromo.getGen(j)).copy()); for (j=chromo.getLengthAnt()+1; j < this.kItemsets; j++) rule.addConsequent((chromo.getGen(j)).copy()); confidance = chromo.getSupportAll() / chromo.getSupportAnt(); if((chromo.getSupportAnt() == 0)||(chromo.getSupportCon() == 0)) lift = 1; else lift = chromo.getSupportAll() /(chromo.getSupportAnt()* chromo.getSupportCon()); if((chromo.getSupportCon()==1)||(chromo.getSupportAnt() == 0)) conv = 1; else conv = (chromo.getSupportAnt()*(1-chromo.getSupportCon()))/(chromo.getSupportAnt()- chromo.getSupportAll()); if ((chromo.getSupportAnt() == 0)||(chromo.getSupportAnt() == 1)||(Math.abs(chromo.getSupportAnt()*(1-chromo.getSupportAnt())) <= 0.001)) netConf = 0; else netConf = (chromo.getSupportAll()-(chromo.getSupportAnt()* chromo.getSupportCon()))/(chromo.getSupportAnt()*(1-chromo.getSupportAnt())); //compute yulesQ numeratorYules = ((chromo.getSupportAll() * (1 - chromo.getSupportCon() - chromo.getSupportAnt() + chromo.getSupportAll())) - ((chromo.getSupportAnt() - chromo.getSupportAll())* (chromo.getSupportCon() - chromo.getSupportAll()))); denominatorYules = ((chromo.getSupportAll() * (1 - chromo.getSupportCon() - chromo.getSupportAnt() + chromo.getSupportAll())) + ((chromo.getSupportAnt() - chromo.getSupportAll())* (chromo.getSupportCon() - chromo.getSupportAll()))); if((chromo.getSupportAnt() == 0)||(chromo.getSupportAnt() == 1)||(chromo.getSupportCon() == 0)||(chromo.getSupportCon() == 1)||(Math.abs(denominatorYules) <= 0.001)) yulesQ = 0; else yulesQ = numeratorYules/denominatorYules; CF = 0; if(confidance > chromo.getSupportCon()) CF = (confidance - chromo.getSupportCon())/(1-chromo.getSupportCon()); else if(confidance < chromo.getSupportCon()) CF = (confidance - chromo.getSupportCon())/(chromo.getSupportCon()); rule.setSupport_Ant (chromo.getSupportAnt()); rule.setSupport_cons(chromo.getSupportCon()); rule.setAll_support (chromo.getSupportAll()); rule.setConfidence (confidance); rule.setLift(lift); rule.setConv(conv); rule.setCF(CF); rule.setNetConf(netConf); rule.setYulesQ(yulesQ); this.assocRules.add(rule); } } private int numCoveredRecords (double minSupport) { int i, j, covered, nTrans; ArrayList<Integer> tidCovered; Chromosome chromo; nTrans = this.ds.getnTrans(); boolean [] marked = new boolean[nTrans]; for (i=0; i < nTrans; i++) marked[i] = false; // System.out.println("Tamano: " + this.pop.size()); for (i=0; i < this.pop.size(); i++) { chromo = this.pop.get(i); if ((chromo.getSupportAll() >= minSupport)) { tidCovered = this.countSupport(chromo, 0, this.kItemsets-1); for (j=0; j < tidCovered.size(); j++) marked[tidCovered.get(j)] = true; } } covered = 0; for (i=0; i < nTrans; i++) if (marked[i]) covered++; // System.out.println("Cubiertos: " + covered); return covered; } }