/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.Apriori; /** * <p> * @author Written by Nicol� Flugy Pap� (Politecnico di Milano) 24/03/2009 * @author Modified by Diana Mart�n (dmartin@ceis.cujae.edu.cu) * @version 1.0 * @since JDK1.6 * </p> */ import java.io.PrintWriter; import java.util.*; import java.math.*; public class AprioriProcess { /** * <p> * It provides the implementation of the algorithm to be run in a process * </p> */ private double minSupport; private double minConfidence; private myDataset dataset; private int nAttr; private int nTrans; private int pass; private int[][] transactions; private Item root; private int nFrequentItemsets; private int nCoveredRecords; /** * <p> * It creates a new process for the algorithm by setting up its parameters * </p> * @param dataset The instance of the dataset for dealing with its records * @param minSupport The user-specified minimum support for the mined association rules * @param minConfidence The user-specified minimum confidence for the mined association rules */ public AprioriProcess(myDataset dataset, double minSupport, double minConfidence) { this.minSupport = minSupport; this.minConfidence = minConfidence; this.dataset = dataset; this.nAttr = dataset.getnVars(); this.nTrans = dataset.getnTrans(); this.pass = 1; this.transactions = dataset.getFakeTransactions(); this.root = new Item(-1); } /** * <p> * It runs the algorithm for mining association rules * </p> */ public void run() { int candidates, pruned; candidates = this.generateFirstCandidates(); pruned = this.checkMinSupport(this.root); this.nFrequentItemsets = candidates - pruned; System.out.println("\nPass: " + this.pass + "; Candidate Itemsets: " + candidates + "; Pruned Itemsets: " + pruned + "; Total Frequent Itemsets: " + this.nFrequentItemsets); while( ((candidates-pruned) > 1) && (this.pass < this.nAttr) ) { this.pass++; candidates = this.generateCandidates(this.root, new ArrayList<Item>(), 1); this.countSupport(); pruned = this.checkMinSupport(this.root); this.nFrequentItemsets += (candidates - pruned); System.out.println("Pass: " + this.pass + "; Candidate Itemsets: " + candidates + "; Pruned Itemsets: " + pruned + "; Total Frequent Itemsets: " + this.nFrequentItemsets); } } /** * <p> * It constructs a rules set once the algorithm has been carried out * </p> * @return An array of association rules having both minimum confidence and support */ public ArrayList<AssociationRule> generateRulesSet() { ArrayList<AssociationRule> rules = new ArrayList<AssociationRule>(); HashSet<Integer> covered_records = new HashSet<Integer>(); this.generateRules(this.root, new ArrayList<Item>(), rules, covered_records); this.nCoveredRecords = covered_records.size(); return rules; } /** * <p> * It prints out on screen relevant information regarding the mined association rules * </p> * @param rules The array of association rules from which gathering relevant information */ public void printReport(ArrayList<AssociationRule> rules) { int r; double avg_sup = 0.0, avg_yulesQ = 0.0, avg_conf = 0.0,avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0, avg_ant_length = 0.0; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getRuleSupport(); avg_conf += ar.getConfidence(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getNetConf(); avg_yulesQ += ar.getYulesQ(); avg_ant_length += ar.getAntecedent().size()+ ar.getConsequent().size(); } System.out.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); System.out.println("Number of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2) ); } } public void saveReport(ArrayList<AssociationRule> rules,PrintWriter w) { int r; double avg_sup = 0.0, avg_yulesQ = 0.0, avg_conf = 0.0,avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0, avg_ant_length = 0.0; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getRuleSupport(); avg_conf += ar.getConfidence(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getNetConf(); avg_yulesQ += ar.getYulesQ(); avg_ant_length += ar.getAntecedent().size()+ ar.getConsequent().size(); } w.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); System.out.println("\nNumber of Frequent Itemsets found: " + this.nFrequentItemsets); w.println("\nNumber of Association Rules generated: " + rules.size()); System.out.println("Number of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { w.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); w.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); w.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); w.println("Average Conviction: " + roundDouble(( avg_conv / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size() ),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size() ),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); w.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); w.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * this.nCoveredRecords) / this.nTrans),2) ); } else{ w.println("Average Support: " + ( 0.0 )); System.out.println("Average Support: " + (0.0)); w.println("Average Confidence: " + ( 0.0 )); System.out.println("Average Confidence: " + (0.0 )); w.println("Average Lift: " + (0.0 )); System.out.println("Average Lift: " + ( 0.0 )); w.println("Average Conviction: " + ( 0.0 )); System.out.println("Average Conviction: " + ( 0.0 )); w.println("Average Certain Factor: " + ( 0.0 )); System.out.println("Average Certain Factor: " + ( 0.0 )); w.println("Average Netconf: " + ( 0.0 )); System.out.println("Average Netconf: " + (0.0)); w.println("Average YulesQ: " + ( 0.0 )); System.out.println("Average YulesQ: " + (0.0)); w.println("Average Number of Antecedents: " + ( 0.0 )); System.out.println("Average Number of Antecedents: " + ( 0.0 )); w.println("Number of Covered Records (%): " + (0.0)); System.out.println("Number of Covered Records (%): " + (0.0) ); } } private int generateFirstCandidates() { int i, j, index, generated = 0; Item child; ArrayList<Item> v = this.root.getChildren(); for (i=0; i < this.nTrans; i++) { for (j=0; j < this.nAttr; j++) { child = new Item( this.transactions[i][j] ); if ( (index = v.indexOf(child)) == -1 ) { child.incSupport(); this.root.addChild(child); generated++; } else v.get(index).incSupport(); } } return generated; } private int checkMinSupport(Item item) { int i, pruned = 0; Item child; ArrayList<Item> v = item.getChildren(); ArrayList<Item> v_tmp = new ArrayList<Item>(v); for (i=0; i < v_tmp.size(); i++) { child = v_tmp.get(i); if ( ((double)child.getSupport() / (double)this.nTrans) < this.minSupport ) { v.remove(child); pruned++; } else { pruned += checkMinSupport(child); } } return pruned; } private int generateCandidates(Item item, ArrayList<Item> current, int depth) { int i, generated = 0; Item child; ArrayList<Item> v = item.getChildren(); for (i=0; i < v.size(); i++) { child = v.get(i); current.add(child); if (depth == this.pass-1) { generated += this.copySiblings(child, v, current); } else { generated += this.generateCandidates(child, current, depth+1); } current.remove(child); } return generated; } private int copySiblings(Item item, ArrayList<Item> siblings, ArrayList<Item> current) { int i, mod_item, mod_sibling, copied = 0; Item sibling; Item parent = item; mod_item = item.getLabel() % this.nAttr; for (i=0; i < siblings.size(); i++) { sibling = siblings.get(i); mod_sibling = sibling.getLabel() % this.nAttr; if (mod_sibling > mod_item) { current.add(sibling); if ( this.checkSubsets(current, this.root.getChildren(), 0, 1) ) { parent.addChild( new Item( sibling.getLabel() ) ); copied++; } current.remove(sibling); } } return copied; } private boolean checkSubsets(ArrayList<Item> current, ArrayList<Item> children, int mark, int depth) { Item child; int i, index; boolean ok = true; if ( children.isEmpty() ) ok = false; i = depth; while (ok && (mark <= i)) { index = children.indexOf( current.get(i) ); if (index >= 0) { if (depth < this.pass-1) { child = children.get(index); ok = this.checkSubsets(current, child.getChildren(), i + 1, depth + 1); } } else { ok = false; } i--; } return ok; } private void countSupport() { int i; for (i=0; i < this.nTrans; i++) { this.countSupport(this.root, this.transactions[i], 1, i); } } private void countSupport(Item item, int[] items, int depth, int id_trans) { int i, mod_child; ArrayList<Item> v = item.getChildren(); Item child; for (i=0; i < v.size(); i++) { child = v.get(i); mod_child = child.getLabel() % this.nAttr; if (child.getLabel() == items[mod_child]) { if (depth == this.pass) { child.incSupport(); } else { this.countSupport(child, items, depth + 1, id_trans); } } } } public static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } private void generateRules(Item item, ArrayList<Item> itemset, ArrayList<AssociationRule> rules, HashSet<Integer> cov_recs) { int f, i, j; double rule_sup, ant_sup,cons_sup, conf, lift, conv, CF, netConf, numeratorYules, denominatorYules,yulesQ; AssociationRule ar; ArrayList<Item> ant, v = item.getChildren(); for (f=0; f < v.size(); f++) { item = v.get(f); itemset.add(item); if (itemset.size() > 1) { for (i=0; i < itemset.size(); i++) { ant = new ArrayList<Item>(); for (j=0; j < itemset.size(); j++) { if (i != j) ant.add( itemset.get(j) ); } rule_sup = (double)item.getSupport() / (double)this.nTrans; ant_sup = (double)searchItemsetIntoTrie(this.root, ant, 0) / (double)this.nTrans; cons_sup = itemset.get(i).getSupport() / (double)this.nTrans;; conf = rule_sup / ant_sup; //compute lift if((cons_sup == 0) || (ant_sup == 0)) lift = 1; else lift = rule_sup / (ant_sup*cons_sup); //compute conviction if((cons_sup == 1)||(ant_sup == 0)) conv = 1; else conv = (ant_sup*(1-cons_sup))/(ant_sup-rule_sup); //compute netconf if((ant_sup == 0)||(ant_sup == 1)||(Math.abs((ant_sup * (1-ant_sup))) <= 0.001)) netConf = 0; else netConf = (rule_sup - (ant_sup*cons_sup))/(ant_sup * (1-ant_sup)); //compute yulesQ numeratorYules = ((rule_sup * (1 - cons_sup - ant_sup + rule_sup)) - ((ant_sup - rule_sup)* (cons_sup - rule_sup))); denominatorYules = ((rule_sup * (1 - cons_sup - ant_sup + rule_sup)) + ((ant_sup - rule_sup)* (cons_sup - rule_sup))); if((ant_sup == 0)||(ant_sup == 1)|| (cons_sup == 0)||(cons_sup == 1)||(Math.abs(denominatorYules) <= 0.001)) yulesQ = 0; else yulesQ = numeratorYules/denominatorYules; //compute Certain Factor(CF) CF = 0; if(conf > cons_sup) CF = (conf - cons_sup)/(1-cons_sup); else if(conf < cons_sup) CF = (conf - cons_sup)/(cons_sup); if (conf >= this.minConfidence) { ar = new AssociationRule(); for (j=0; j < ant.size(); j++) { ar.addAntecedent( ant.get(j).getLabel() ); } ar.addConsequent( itemset.get(i).getLabel() ); ar.setRuleSupport(rule_sup); ar.setAntecedentSupport(ant_sup); ar.setConsequentSupport(cons_sup); ar.setConfidence(conf); ar.setLift(lift); ar.setConv(conv); ar.setCF(CF); ar.setNetConf(netConf); ar.setYulesQ(yulesQ); cov_recs.addAll( this.countCoveredRecords(itemset) ); rules.add(ar); } } } if ( item.hasChildren() ) this.generateRules(item, itemset, rules, cov_recs); itemset.remove(item); } } private int searchItemsetIntoTrie(Item item, ArrayList<Item> itemset, int index) { int i, support = 0; ArrayList<Item> v = item.getChildren(); for (i=0; i < v.size(); i++) { item = v.get(i); if ( item.equals( itemset.get(index) ) ) { if (index == (itemset.size()-1)) return ( item.getSupport() ); else if ( item.hasChildren() ) support = searchItemsetIntoTrie(item, itemset, index + 1); break; } } return support; } private HashSet<Integer> countCoveredRecords(ArrayList<Item> itemset) { int i, k; ArrayList<HashSet<Integer>> v_tid_lst = new ArrayList<HashSet<Integer>>(); for (i=0; i < itemset.size(); i++) { Item item = itemset.get(i); v_tid_lst.add( this.dataset.getTIDList().get( item.getLabel() ) ); } HashSet<Integer> toIntersect = new HashSet<Integer>( v_tid_lst.get(0) ); for (k=1; k < v_tid_lst.size(); k++) { toIntersect.retainAll( v_tid_lst.get(k) ); if ( toIntersect.isEmpty() ) break; } return toIntersect; } }