/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.Eclat; /** * <p> * @author Written by Nicol� Flugy Pap� (Politecnico di Milano) 24/03/2009 * @author Modified by Diana Mart�n (dmartin@ceis.cujae.edu.cu) * @version 1.0 * @since JDK1.6 * </p> */ import java.io.PrintWriter; import java.math.BigDecimal; import java.util.*; public class EclatProcess { /** * <p> * It provides the implementation of the algorithm to be run in a process * </p> */ private double minSupport; private double minConfidence; private myDataset dataset; private int nAttr; private int nTrans; private Item root; private int nFrequentItemsets; private int nCoveredRecords; /** * <p> * It creates a new process for the algorithm by setting up its parameters * </p> * @param dataset The instance of the dataset for dealing with its records * @param minSupport The user-specified minimum support for the mined association rules * @param minConfidence The user-specified minimum confidence for the mined association rules */ public EclatProcess(myDataset dataset, double minSupport, double minConfidence) { this.minSupport = minSupport; this.minConfidence = minConfidence; this.dataset = dataset; this.nAttr = dataset.getnVars(); this.nTrans = dataset.getnTrans(); this.root = new Item(-1); } /** * <p> * It runs the algorithm for mining association rules * </p> */ public void run() { this.generateFirstCandidates(); this.nFrequentItemsets = this.generateCandidates(this.root, new ArrayList<Item>(), 1); } /** * <p> * It constructs a rules set once the algorithm has been carried out * </p> * @return An array of association rules having both minimum confidence and support */ public ArrayList<AssociationRule> generateRulesSet() { ArrayList<AssociationRule> rules = new ArrayList<AssociationRule>(); HashSet<Integer> covered_records = new HashSet<Integer>(); this.generateRules(this.root, new ArrayList<Item>(), rules, covered_records); this.nCoveredRecords = covered_records.size(); return rules; } /** * <p> * It prints out on screen relevant information regarding the mined association rules * </p> * @param rules The array of association rules from which gathering relevant information */ public void printReport(ArrayList<AssociationRule> rules) { int r; double avg_sup = 0.0, avg_yulesQ = 0.0, avg_conf = 0.0,avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0, avg_ant_length = 0.0; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getRuleSupport(); avg_conf += ar.getConfidence(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getNetConf(); avg_yulesQ += ar.getYulesQ(); avg_ant_length += ar.getAntecedent().size()+ ar.getConsequent().size(); } System.out.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); System.out.println("Number of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2) ); } } public static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } public String printRules(ArrayList<AssociationRule> rules) { int i, lenghtrule; boolean stop; String rulesList; stop = false; rulesList = ""; rulesList += ("\n\nNumber of trials = " + "x" + "\n\n"); rulesList += ("Support\tantecedent_support\tconsequent_support\tConfidence\tLift\tConv\tCF\tNetConf\tYulesQ\tnAttributes\n"); for (i=0; i < rules.size() && !stop; i++) { lenghtrule = rules.get(i).getAntecedent().size()+ rules.get(i).getConsequent().size(); rulesList += ("" + roundDouble(rules.get(i).getRuleSupport(),2) + "\t" + roundDouble(rules.get(i).getAntecedentSupport(),2) + "\t" + roundDouble(rules.get(i).getConsequentSupport(),2) + "\t" + roundDouble(rules.get(i).getConfidence(),2) + "\t" + roundDouble(rules.get(i).getLift(),2) + "\t" + roundDouble(rules.get(i).getConv(),2) + "\t" + roundDouble(rules.get(i).getCF(),2) + "\t" + roundDouble(rules.get(i).getNetConf(),2) + "\t" + roundDouble(rules.get(i).getYulesQ(),2) + "\t" + lenghtrule + "\n"); } rulesList += ("\nFrequent\n"); return rulesList; } public void saveReport(ArrayList<AssociationRule> rules,PrintWriter w) { int r; double avg_sup = 0.0, avg_yulesQ = 0.0, avg_conf = 0.0,avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0, avg_ant_length = 0.0; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getRuleSupport(); avg_conf += ar.getConfidence(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getNetConf(); avg_yulesQ += ar.getYulesQ(); avg_ant_length += ar.getAntecedent().size()+ ar.getConsequent().size(); } w.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); System.out.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); w.println("\nNumber of Association Rules generated: " + rules.size()); System.out.println("Number of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { w.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); w.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); w.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); w.println("Average Conviction: " + roundDouble(( avg_conv / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size() ),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size() ),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); w.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); w.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2) ); } else{ w.println("Average Support: " + ( 0.0 )); System.out.println("Average Support: " + (0.0)); w.println("Average Confidence: " + ( 0.0 )); System.out.println("Average Confidence: " + (0.0 )); w.println("Average Lift: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); w.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); w.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); w.println("Average Netconf: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); w.println("Average YulesQ: " + ( 0.0 )); System.out.println("Average YulesQ: " + (0.0)); w.println("Average Number of Antecedents: " + ( 0.0 )); System.out.println("Average Number of Antecedents: " + ( 0.0 )); w.println("Number of Covered Records (%): " + (0.0)); System.out.println("Number of Covered Records (%): " + (0.0) ); } } private void generateFirstCandidates() { Enumeration<Integer> keys; for (keys = this.dataset.getTIDList().keys(); keys.hasMoreElements(); ) { this.root.addChild( new Item( keys.nextElement() ) ); } } private int generateCandidates(Item item, ArrayList<Item> current, int depth) { int i, sup, generated = 0; Item child; ArrayList<Item> v = item.getChildren(); ArrayList<Item> v_tmp = new ArrayList<Item>(v); for (i=0; i < v_tmp.size(); i++) { child = v_tmp.get(i); current.add(child); sup = this.countSupport(current); if ( ( (double)sup / (double)this.nTrans ) >= this.minSupport ) { child.setSupport(sup); if (depth < this.nAttr) { this.copySiblings(child, v); generated += this.generateCandidates(child, current, depth + 1); } generated++; } else v.remove(child); current.remove(child); } return generated; } private void copySiblings(Item item, ArrayList<Item> siblings) { int i, mod_item, mod_sibling; Item sibling; mod_item = item.getLabel() % this.nAttr; for (i=0; i < siblings.size(); i++) { sibling = siblings.get(i); mod_sibling = sibling.getLabel() % this.nAttr; if (mod_sibling > mod_item) item.addChild( new Item( sibling.getLabel() ) ); } } private int countSupport(ArrayList<Item> itemset) { return ( this.countCoveredRecords(itemset).size() ); } private void generateRules(Item item, ArrayList<Item> itemset, ArrayList<AssociationRule> rules, HashSet<Integer> cov_recs) { int f, i, j; double yulesQ, rule_sup, ant_sup,cons_sup, conf, lift, conv, CF, netConf, numeratorYules, denominatorYules; AssociationRule ar; ArrayList<Item> ant, v = item.getChildren(); for (f=0; f < v.size(); f++) { item = v.get(f); itemset.add(item); if (itemset.size() > 1) { for (i=0; i < itemset.size(); i++) { ant = new ArrayList<Item>(); for (j=0; j < itemset.size(); j++) { if (i != j) ant.add( itemset.get(j) ); } rule_sup = (double)item.getSupport() / (double)this.nTrans; ant_sup = (double)searchItemsetIntoTrie(this.root, ant, 0) / (double)this.nTrans; cons_sup = itemset.get(i).getSupport() / (double)this.nTrans; conf = rule_sup / ant_sup; //compute lift if((cons_sup == 0) || (ant_sup == 0)) lift = 1; else lift = rule_sup / (ant_sup*cons_sup); //compute conviction if((cons_sup == 1)||(ant_sup == 0)) conv = 1; else conv = (ant_sup*(1-cons_sup))/(ant_sup-rule_sup); //compute netconf if((ant_sup == 0)||(ant_sup == 1)||(Math.abs((ant_sup * (1-ant_sup))) <= 0.001)) netConf = 0; else netConf = (rule_sup - (ant_sup*cons_sup))/(ant_sup * (1-ant_sup)); //compute yulesQ numeratorYules = ((rule_sup * (1 - cons_sup - ant_sup + rule_sup)) - ((ant_sup - rule_sup)* (cons_sup - rule_sup))); denominatorYules = ((rule_sup * (1 - cons_sup - ant_sup + rule_sup)) + ((ant_sup - rule_sup)* (cons_sup - rule_sup))); if((ant_sup == 0)||(ant_sup == 1)|| (cons_sup == 0)||(cons_sup == 1)||(Math.abs(denominatorYules) <= 0.001)) yulesQ = 0; else yulesQ = numeratorYules/denominatorYules; //compute Certain Factor(CF) CF = 0; if(conf > cons_sup) CF = (conf - cons_sup)/(1-cons_sup); else if(conf < cons_sup) CF = (conf - cons_sup)/(cons_sup); if (conf >= this.minConfidence) { ar = new AssociationRule(); for (j=0; j < ant.size(); j++) { ar.addAntecedent( ant.get(j).getLabel() ); } ar.addConsequent( itemset.get(i).getLabel() ); ar.setRuleSupport(rule_sup); ar.setAntecedentSupport(ant_sup); ar.setConsequentSupport(cons_sup); ar.setConfidence(conf); ar.setLift(lift); ar.setConv(conv); ar.setCF(CF); ar.setNetConf(netConf); ar.setYulesQ(yulesQ); cov_recs.addAll( this.countCoveredRecords(itemset) ); rules.add(ar); } } } if ( item.hasChildren() ) this.generateRules(item, itemset, rules, cov_recs); itemset.remove(item); } } private int searchItemsetIntoTrie(Item item, ArrayList<Item> itemset, int index) { int i, support = 0; ArrayList<Item> v = item.getChildren(); for (i=0; i < v.size(); i++) { item = v.get(i); if ( item.equals( itemset.get(index) ) ) { if (index == (itemset.size()-1)) return ( item.getSupport() ); else if ( item.hasChildren() ) support = searchItemsetIntoTrie(item, itemset, index + 1); break; } } return support; } private HashSet<Integer> countCoveredRecords(ArrayList<Item> itemset) { int i, k; ArrayList<HashSet<Integer>> v_tid_lst = new ArrayList<HashSet<Integer>>(); for (i=0; i < itemset.size(); i++) { Item item = itemset.get(i); v_tid_lst.add( this.dataset.getTIDList().get( item.getLabel() ) ); } HashSet<Integer> toIntersect = new HashSet<Integer>( v_tid_lst.get(0) ); for (k=1; k < v_tid_lst.size(); k++) { toIntersect.retainAll( v_tid_lst.get(k) ); if ( toIntersect.isEmpty() ) break; } return toIntersect; } }