/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.GENAR; /** * <p> * @author Alberto Fern�ndez * @author Modified by Diana Mart�n (dmartin@ceis.cujae.edu.cu) * @version 1.1 * @since JDK1.6 * </p> */ import java.io.PrintWriter; import java.math.BigDecimal; import java.util.*; import org.core.Randomize; public class GENARProcess { private myDataset ds; private double[] weights; private double allow_ampl[]; ArrayList<Chromosome> bestRules; ArrayList<AssociationRule> assoc_rules; private int nRules, trials; private int nTrials; private int popsize; private double pm; private double pf; private int limit; public GENARProcess(myDataset ds, int nRules, int nTrials, int popsize, double ps, double pm, double pf, double AF) { int i; this.nRules = nRules; this.nTrials = (nTrials / nRules) + 1; this.popsize = popsize; this.pm = pm; this.pf = pf; this.limit = (int) Math.ceil(popsize * ps); this.ds = ds; this.weights = new double[this.ds.getnTrans()]; this.allow_ampl = new double[this.ds.getnVars()]; for (i=0; i < this.allow_ampl.length; i++) { if (!this.ds.isReal(i)) this.allow_ampl[i] = (int) ((this.ds.getMax(i) - this.ds.getMin(i)) / AF); else this.allow_ampl[i] = (this.ds.getMax(i) - this.ds.getMin(i)) / AF; } } public void run() { ArrayList<Chromosome> popNew; Chromosome chromoBest; this.bestRules = new ArrayList<Chromosome>(); this.trials = 0; for (int i=0; i < this.weights.length; i++) this.weights[i] = 1.0; do { System.out.println("Number of Rules Selected: " + this.bestRules.size()); int nGn = 0; this.trials = 0; ArrayList<Chromosome> popCurrent = this.initialize(); do { System.out.println("Generation: " + nGn); popNew = this.select(popCurrent); this.crossover(popNew); this.mutate(popNew); popCurrent.clear(); popCurrent = popNew; nGn++; } while (this.trials < this.nTrials); chromoBest = this.chooseTheBest(popCurrent); this.penalizeRecordsCoveredBy(chromoBest); this.bestRules.add(chromoBest.copy()); } while ( ( this.bestRules.size() < this.nRules ) && ( ! this.allRecordsCovered() ) ); this.genRules(); } public void printReport (double minConfidence, double minSupport) { int i, countRules, length; AssociationRule rule; double avg_yulesQ=0.0, avg_sup=0.0, avg_conf=0.0,avg_lift=0.0, avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; countRules = length = 0; for (i=0; i < this.assoc_rules.size(); i++) { rule = this.assoc_rules.get(i); if ((rule.getConfidence() >= minConfidence) && (rule.getAll_support() >= minSupport)) { countRules++; length += rule.getLength(); avg_sup += rule.getAll_support(); avg_conf += rule.getConfidence(); avg_lift += rule.getLift(); avg_conv += rule.getConv(); avg_CF += rule.getCF(); avg_netConf += rule.getNetConf(); avg_yulesQ += rule.getYulesQ(); } } System.out.println("Number of Frequent Itemsets generated: " + "-"); System.out.println("Number of Association Rules generated: " + countRules); if(countRules!=0){ System.out.println("Average SupportRules: " + roundDouble(( avg_sup / countRules ), 2) ); System.out.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2) ); System.out.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2) ); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); System.out.println("Average Length of the Rules generated: " + roundDouble((length / (double) countRules), 2)); System.out.println("Number of Covered Records(%): " + (100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans()); } else{ System.out.println("Average Support: " + (0.0)); System.out.println("Average Confidence: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); System.out.println("Average Antecedents Length: " + ( 0.0 )); System.out.println("Number of Covered Records (%): " + (0.0) ); } } public static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } public String printRules(ArrayList<AssociationRule> rules) { int i, lenghtrule; boolean stop; String rulesList; stop = false; rulesList = ""; rulesList += ("Support\tantecedent_support\tconsequent_support\tConfidence\tLift\tConv\tCF\tNetConf\tYulesQ\tnAttributes\n"); for (i=0; i < rules.size() && !stop; i++) { lenghtrule = rules.get(i).getAntecedent().size()+ rules.get(i).getConsequent().size(); rulesList += ("" + roundDouble(rules.get(i).getAll_support(),2) + "\t" + roundDouble(rules.get(i).getSupport(),2) + "\t" + roundDouble(rules.get(i).getSupport_cons(),2) + "\t" + roundDouble(rules.get(i).getConfidence(),2) + "\t" + roundDouble(rules.get(i).getLift(),2) + "\t" + roundDouble(rules.get(i).getConv(),2) + "\t" + roundDouble(rules.get(i).getCF(),2) + "\t" + roundDouble(rules.get(i).getNetConf(),2) + "\t" + roundDouble(rules.get(i).getYulesQ(),2) + "\t" + lenghtrule + "\n"); } return rulesList; } public void saveReport (double minSupport,PrintWriter w) { int i, countRules, length; AssociationRule rule; double avg_yulesQ=0.0, avg_sup=0.0, avg_conf=0.0,avg_lift=0.0, avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; countRules = length = 0; for (i=0; i < this.assoc_rules.size(); i++) { rule = this.assoc_rules.get(i); if (rule.getAll_support() >= minSupport) { countRules++; length += rule.getLength(); avg_sup += rule.getAll_support(); avg_conf += rule.getConfidence(); avg_lift += rule.getLift(); avg_conv += rule.getConv(); avg_CF += rule.getCF(); avg_netConf += rule.getNetConf(); avg_yulesQ += rule.getYulesQ(); } } w.println("\nNumber of Frequent Itemsets generated: " + "-"); System.out.println("Number of Frequent Itemsets generated: " + "-"); w.println("\nNumber of Association Rules generated: " + countRules); System.out.println("Number of Association Rules generated: " + countRules); if(countRules!=0){ w.println("Average Support: " + roundDouble(( avg_sup / countRules ), 2)); System.out.println("Average SupportRules: " + roundDouble(( avg_sup / countRules ), 2) ); w.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / countRules ), 2) ); w.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / countRules ), 2) ); w.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ countRules ), 2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ countRules ), 2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ countRules), 2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ countRules), 2)); w.println("Average Antecedents Length: " + roundDouble((length / (double) countRules), 2)); System.out.println("Average Length of the Rules generated: " + roundDouble((length / (double) countRules), 2)); w.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans()),2)); System.out.println("Number of Covered Records(%): " + (100.0 * this.numCoveredRecords (minSupport)) / this.ds.getnTrans()); } else{ w.println("Average Support: " + ( 0.0 )); System.out.println("Average Support: " + (0.0)); w.println("Average Confidence: " + ( 0.0 )); System.out.println("Average Confidence: " + (0.0 )); w.println("Average Lift: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); w.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); w.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); w.println("Average Netconf: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); w.println("Average Antecedents Length: " + ( 0.0 )); System.out.println("Average Antecedents Length: " + ( 0.0 )); w.println("Number of Covered Records (%): " + (0.0)); System.out.println("Number of Covered Records (%): " + (0.0) ); } } public ArrayList<AssociationRule> getSetRules (double minSupport) { int i; ArrayList<AssociationRule> selectRules = new ArrayList<AssociationRule>(); AssociationRule rule; for (i=0; i < this.assoc_rules.size(); i++) { rule = this.assoc_rules.get(i); if (rule.getAll_support() >= minSupport) selectRules.add(rule.copy()); } return selectRules; } private ArrayList<Chromosome> initialize() { ArrayList<Chromosome> popInit = new ArrayList<Chromosome>(); int nVars, attr, tr; double lb, ub, max_attr, min_attr; nVars = this.ds.getnVars(); while (popInit.size() < this.popsize) { Gene[] genes = new Gene[nVars]; double[][] trans = this.ds.getRealTransactions(); tr = Randomize.Randint(0, this.ds.getnTrans()); for (int g=0; g < nVars; g++) { genes[g] = new Gene(); attr = g; genes[g].setAttr(attr); genes[g].setType( this.ds.getAttributeType(attr) ); max_attr = this.ds.getMax(attr); min_attr = this.ds.getMin(attr); if ( !this.ds.isNominal(attr) ) { if ( this.ds.isReal(attr) ) { lb = Math.max(trans[tr][attr] - (this.allow_ampl[attr] / 2.0), min_attr); ub = Math.min(trans[tr][attr] + (this.allow_ampl[attr] / 2.0), max_attr); } else { lb = Math.max(trans[tr][attr] - ((int) this.allow_ampl[attr] / 2), min_attr); ub = Math.min(trans[tr][attr] + ((int) this.allow_ampl[attr] / 2), max_attr); } } else lb = ub = trans[tr][attr]; genes[g].setL(lb); genes[g].setU(ub); } Chromosome c = new Chromosome(genes); c.setFit(this.fitness(c)); popInit.add(c); } return popInit; } private ArrayList<Chromosome> select (ArrayList<Chromosome> pop) { ArrayList<Chromosome> popTmp = new ArrayList<Chromosome>(); Collections.sort(pop); for (int i = 0; i <= this.limit && i < pop.size(); i++) popTmp.add((pop.get(i)).copy()); return popTmp; } private void crossover(ArrayList<Chromosome> pop) { int i, pos, nVars; Chromosome dad, mom, off1, off2, off_best; Gene gen1, gen2; nVars = this.ds.getnVars(); Gene[] genesOff1 = new Gene[nVars]; Gene[] genesOff2 = new Gene[nVars]; while (pop.size() < this.popsize) { dad = pop.get(Randomize.Randint (0, pop.size())); mom = pop.get(Randomize.Randint (0, pop.size())); pos = Randomize.Randint(1, nVars-1); for (i=0; i<pos; i++) { gen1 = dad.getGen(i); gen2 = mom.getGen(i); genesOff1[i] = gen1.copy(); genesOff2[i] = gen2.copy(); } for (i=pos; i<nVars; i++) { gen1 = dad.getGen(i); gen2 = mom.getGen(i); genesOff1[i] = gen2.copy(); genesOff2[i] = gen1.copy(); } off1 = new Chromosome (genesOff1); off2 = new Chromosome (genesOff2); off1.setFit(this.fitness(off1)); off2.setFit(this.fitness(off2)); if (off1.getFit() > off2.getFit()) off_best = off1; else off_best = off2; if (off_best.getFit() > 0) pop.add(off_best); } } private void mutate(ArrayList<Chromosome> pop) { int i, index, attr, nVars; double max_attr, min_attr, top; Chromosome chromo; Gene g; nVars = this.ds.getnVars(); for (i=0; i < this.popsize; i++) { if (Randomize.Rand() < this.pm) { chromo = pop.get(i); index = Randomize.Randint(0, nVars); g = chromo.getGen(index); attr = g.getAttr(); max_attr = this.ds.getMax(attr); min_attr = this.ds.getMin(attr); if (!this.ds.isNominal(attr)) { if (this.ds.isReal(attr)) { if (Randomize.Rand() < 0.5) { top = Math.max(g.getU() - this.allow_ampl[attr], min_attr); g.setL(Randomize.RanddoubleClosed(top, g.getL())); } else g.setL(Randomize.Randdouble(g.getL(), g.getU())); if (Randomize.Rand() < 0.5) { top = Math.min(g.getL() + this.allow_ampl[attr], max_attr); g.setU(Randomize.RanddoubleClosed(g.getU(), top)); } else g.setU(Randomize.RanddoubleClosed(g.getL()+0.0001, g.getU())); } else { if (Randomize.Rand() < 0.5) { top = Math.max(g.getU() - this.allow_ampl[attr], min_attr); g.setL(Randomize.RandintClosed((int) top, (int) g.getL())); } else g.setL(Randomize.Randint((int) g.getL(), (int) g.getU())); if (Randomize.Rand() < 0.5) { top = Math.min(g.getL() + this.allow_ampl[attr], max_attr); g.setU(Randomize.RandintClosed((int) g.getU(), (int) top)); } else g.setU(Randomize.RandintClosed((int) g.getL() + 1, (int) g.getU())); } } else { top = Randomize.RandintClosed((int) min_attr, (int) max_attr); g.setL(top); g.setU(top); } chromo.setFit(this.fitness(chromo)); } } } private double fitness(Chromosome c) { ArrayList<Integer> tid_lst = countSupport(c.getGenes()); double cov = 0.0; for (int t=0; t < tid_lst.size(); t++) cov += this.weights[ tid_lst.get(t) ]; this.trials++; return (cov / (double) this.ds.getnTrans()); } private ArrayList<Integer> countSupport(Gene[] genes) { ArrayList<Integer> tid_list = new ArrayList<Integer>(); double[][] trans = this.ds.getRealTransactions(); int attr, nTrans; double lb, ub; boolean ok; nTrans = this.ds.getnTrans(); for (int t=0; t < nTrans; t++) { ok = true; for (int g=0; g < genes.length && ok; g++) { attr = genes[g].getAttr(); lb = genes[g].getL(); ub = genes[g].getU(); if ((trans[t][attr] < lb) || (trans[t][attr] > ub)) ok = false; } if (ok) tid_list.add(t); } return tid_list; } private void penalizeRecordsCoveredBy(Chromosome c) { int i, tr; ArrayList<Integer> tid_lst = countSupport( c.getGenes() ); for (i=0; i < tid_lst.size(); i++) { tr = tid_lst.get(i); if ( this.weights[tr] == 1.0 ) this.weights[tr] = 1.0 - this.pf; } } private void genRules() { int i, j; double yulesQ, numeratorYules, denominatorYules, all_sup, ant_sup, cons_sup, nTrans, conf, lift, conv, CF, netConf; ArrayList<Integer> tid_lst_all, tid_lst_ant, tid_lst_con; AssociationRule rule; Chromosome chromo; Gene[] genes_ant; Gene[] genes_con; nTrans = (double)this.ds.getnTrans(); this.assoc_rules = new ArrayList<AssociationRule>(); for (i=0; i < bestRules.size(); i++) { chromo = bestRules.get(i); rule = new AssociationRule(); genes_ant = new Gene[chromo.length() - 1]; genes_con = new Gene[1]; for (j=0; j < chromo.length()-1; j++) { rule.addAntecedent(chromo.getGen(j).copy()); genes_ant[j] = chromo.getGen(j); } rule.addConsequent((chromo.getGen(j)).copy()); genes_con[0] = chromo.getGen(j); tid_lst_all = this.countSupport(chromo.getGenes()); all_sup = tid_lst_all.size() / (double) nTrans; tid_lst_ant = this.countSupport(genes_ant); ant_sup = tid_lst_ant.size() / (double) nTrans; tid_lst_con = this.countSupport(genes_con); cons_sup = tid_lst_con.size() / (double) nTrans; conf = all_sup / ant_sup; //compute lift if((cons_sup == 0) || (ant_sup == 0)) lift = 1; else lift = all_sup / (ant_sup*cons_sup); //compute conviction if((cons_sup == 1)||(ant_sup == 0)) conv = 1; else conv = (ant_sup*(1-cons_sup))/(ant_sup-all_sup); //compute netconf if((ant_sup == 0)||(ant_sup == 1)||(Math.abs((ant_sup * (1-ant_sup))) <= 0.001)) netConf = 0; else netConf = (all_sup - (ant_sup*cons_sup))/(ant_sup * (1-ant_sup)); //compute yulesQ numeratorYules = ((all_sup * (1 - cons_sup - ant_sup + all_sup)) - ((ant_sup - all_sup)* (cons_sup - all_sup))); denominatorYules = ((all_sup * (1 - cons_sup - ant_sup + all_sup)) + ((ant_sup - all_sup)* (cons_sup - all_sup))); if((ant_sup == 0)||(ant_sup == 1)|| (cons_sup == 0)||(cons_sup == 1)||(Math.abs(denominatorYules) <= 0.001)) yulesQ = 0; else yulesQ = numeratorYules/denominatorYules; //compute Certain Factor(CF) CF = 0; if(conf > cons_sup) CF = (conf - cons_sup)/(1-cons_sup); else if(conf < cons_sup) CF = (conf - cons_sup)/(cons_sup); rule.setSupport(ant_sup); rule.setSupport_cons(cons_sup); rule.setAll_support(all_sup); rule.setConfidence(conf); rule.setLift(lift); rule.setConv(conv); rule.setCF(CF); rule.setNetConf(netConf); rule.setYulesQ(yulesQ); this.assoc_rules.add(rule); } } private Chromosome chooseTheBest(ArrayList<Chromosome> pop) { double all_sup, ant_sup, conf, prod, max_prod, nTrans; int i, j; Gene[] genes, ant_genes; ArrayList<Integer> tid_lst_all, tid_lst_ant; Chromosome chromo, chromo_the_best; nTrans = (double)this.ds.getnTrans(); max_prod = 0.0; chromo_the_best = pop.get(0); ant_genes = new Gene[this.ds.getnVars()-1]; for (i=1; i < pop.size(); i++) { chromo = pop.get(i); genes = chromo.getGenes(); tid_lst_all = this.countSupport(genes); for (j=0; j < genes.length-1; j++) ant_genes[j] = genes[j]; tid_lst_ant = this.countSupport(ant_genes); all_sup = tid_lst_all.size() / (double) nTrans; ant_sup = tid_lst_ant.size() / (double) nTrans; conf = all_sup / ant_sup; prod = all_sup * conf * chromo.getFit(); if ( prod > max_prod ) { max_prod = prod; chromo_the_best = chromo; } } return chromo_the_best; } private boolean allRecordsCovered() { for (int i=0; i < this.weights.length; i++) if ( this.weights[i] == 1.0 ) return false; return true; } private int numCoveredRecords (double minSupport) { int i, j, tr, covered, nTrans; ArrayList<Gene> ant; ArrayList<Integer> tidCovered; AssociationRule rule; Gene[] genes; nTrans = this.ds.getnTrans(); boolean[] marked = new boolean[nTrans]; for (i=0; i < marked.length; i++) marked[i] = false; for (i=0; i < this.assoc_rules.size(); i++) { rule = this.assoc_rules.get(i); if (rule.getAll_support() >= minSupport) { ant = rule.getAntecedent(); genes = new Gene[ant.size()]; for (j=0; j < ant.size(); j++) genes[j] = ant.get(j); tidCovered = countSupport(genes); for (j=0; j < tidCovered.size(); j++) { tr = tidCovered.get(j); if ( !marked[tr] ) marked[tr] = true; } } } covered = 0; for (i=0; i < marked.length; i++) if (marked[i]) covered++; return covered; } }