/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.FuzzyRuleLearning.FuzzyApriori; /** * <p> * @author Written by Alvaro López * @version 1.0 * @since JDK1.6 * </p> */ import java.util.*; public class FuzzyAprioriProcess { /** * <p> * It provides the implementation of the algorithm to be run in a process * </p> */ private double minSupport; private double minConfidence; private boolean useMaxForOneFrequentItemsets; private myDataset dataset; private int countOneFrequentItemsets; private int countFrequentItemsets; private ArrayList<AssociationRule> associationRulesSet; private boolean[] coveredRecords; /** * <p> * It creates a new process for the algorithm by setting up its parameters * </p> * @param dataset The instance of the dataset for dealing with its records * @param useMaxForOneFrequentItemsets It indicates whether the max operator must be used while discovering 1-Frequent Itemsets * @param minSupport The user-specified minimum support for the mined association rules * @param minConfidence The user-specified minimum confidence for the mined association rules */ public FuzzyAprioriProcess(myDataset dataset, boolean useMaxForOneFrequentItemsets, double minSupport, double minConfidence) { this.useMaxForOneFrequentItemsets = useMaxForOneFrequentItemsets; this.minSupport = minSupport; this.minConfidence = minConfidence; this.dataset = dataset; this.countOneFrequentItemsets = 0; this.countFrequentItemsets = 0; this.associationRulesSet = new ArrayList<AssociationRule>(); this.coveredRecords = new boolean[ dataset.getnTrans() ]; for (int i=0; i < this.coveredRecords.length; i++) this.coveredRecords[i] = false; } /** * <p> * It runs the algorithm for mining association rules * </p> */ public void run() { int pass = 0; ArrayList<Itemset> current_frequent_itemsets; current_frequent_itemsets = this.generateOneFrequentItemsets(this.useMaxForOneFrequentItemsets); this.countOneFrequentItemsets = current_frequent_itemsets.size(); this.countFrequentItemsets = this.countOneFrequentItemsets; System.out.println("\nPass: " + (pass + 1) + "; Total Frequent Itemsets: " + this.countFrequentItemsets); for (pass=1; (pass < this.dataset.getnVars()) && (current_frequent_itemsets.size() > 1); pass++) { current_frequent_itemsets = this.generateCandidateItemsetsAndRules(current_frequent_itemsets); this.countFrequentItemsets += current_frequent_itemsets.size(); System.out.println("Pass: " + (pass + 1) + "; Total Frequent Itemsets: " + this.countFrequentItemsets + "; Total Association Rules: " + this.associationRulesSet.size()); } } /** * <p> * It returns a rules set once the algorithm has been carried out * </p> * @return An array of association rules having both minimum confidence and support */ public ArrayList<AssociationRule> getRulesSet() { return this.associationRulesSet; } /** * <p> * It prints out on screen relevant information regarding the mined association rules * </p> * @param rules The array of association rules from which gathering relevant information */ public void printReport(ArrayList<AssociationRule> rules) { int r; double avg_sup = 0.0, avg_conf = 0.0, avg_ant_length = 0.0, avg_interest = 0.0; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getRuleSupport(); avg_conf += ar.getConfidence(); avg_ant_length += ar.getAntecedent().size(); avg_interest += ar.getInterestingness(); } System.out.println("\nNumber of Frequent Itemsets found: " + this.countFrequentItemsets); System.out.println("Number of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { System.out.println("Average Support: " + ( avg_sup / rules.size() )); System.out.println("Average Confidence: " + ( avg_conf / rules.size() )); System.out.println("Average Antecedents Length: " + ( avg_ant_length / rules.size() )); System.out.println("Number of Covered Records (%): " + ( (100.0 * this.countCoveredRecords()) / this.dataset.getnTrans())); System.out.println("Average Interestingness: " + ( avg_interest / rules.size() )); } } /** * <p> * It returns the number of 1-Frequent Itemsets * </p> * @return A value representing the number of 1-Frequent Itemsets */ public int getNumberOfOneFrequentItemsets() { return this.countOneFrequentItemsets; } private ArrayList<Itemset> generateOneFrequentItemsets(boolean use_max_for_one_frequent_itemsets) { int id_attr, id_label; double max_support; int[] nLabels; Itemset itemset, best_itemset; ArrayList<Itemset> one_frequent_itemsets; nLabels = this.dataset.getNLabelsOfAttributes(); one_frequent_itemsets = new ArrayList<Itemset>(); if (use_max_for_one_frequent_itemsets) { for (id_attr=0; id_attr < this.dataset.getnVars(); id_attr++) { best_itemset = new Itemset(); best_itemset.add( new Item(id_attr, 0) ); best_itemset.calculateSupport(this.dataset); max_support = best_itemset.getSupport(); for (id_label=1; id_label < nLabels[id_attr]; id_label++) { itemset = new Itemset(); itemset.add( new Item(id_attr, id_label) ); itemset.calculateSupport(this.dataset); if (itemset.getSupport() > max_support) { max_support = itemset.getSupport(); best_itemset = itemset; } } if (max_support >= this.minSupport) one_frequent_itemsets.add(best_itemset); } } else { for (id_attr=0; id_attr < this.dataset.getnVars(); id_attr++) { for (id_label=0; id_label < nLabels[id_attr]; id_label++) { itemset = new Itemset(); itemset.add( new Item(id_attr, id_label) ); itemset.calculateSupport(this.dataset); if (itemset.getSupport() >= this.minSupport) one_frequent_itemsets.add(itemset); } } } return one_frequent_itemsets; } private ArrayList<Itemset> generateCandidateItemsetsAndRules(ArrayList<Itemset> curr_freq_itemsets) { int i, j, size; boolean generated_rules; Itemset i_itemset, j_itemset, new_itemset; ArrayList<Integer> covered_tids; ArrayList<Itemset> next_freq_itemsets; size = curr_freq_itemsets.size(); next_freq_itemsets = new ArrayList<Itemset>(); for (i=0; i < size-1; i++) { i_itemset = curr_freq_itemsets.get(i); for (j=i+1; j < size; j++) { j_itemset = curr_freq_itemsets.get(j); if ( this.isCombinable(i_itemset, j_itemset, curr_freq_itemsets) ) { new_itemset = i_itemset.clone(); new_itemset.add( ( j_itemset.get(j_itemset.size() - 1) ).clone() ); covered_tids = new_itemset.calculateSupport(this.dataset); if (new_itemset.getSupport() >= this.minSupport) { generated_rules = this.generateRulesFromItemset(new_itemset); if (generated_rules) this.markCoveredRecords(covered_tids); next_freq_itemsets.add(new_itemset); } } } } return next_freq_itemsets; } private boolean generateRulesFromItemset(Itemset curr_itemset) { int i; double rule_sup, ant_sup, rule_conf,cons_sup,interest; boolean generated_rules = false; Item i_item; Itemset antecedent, consequent; for (i=0; i < curr_itemset.size(); i++) { antecedent = curr_itemset.clone(); i_item = antecedent.remove(i); antecedent.calculateSupport(this.dataset); rule_sup = curr_itemset.getSupport(); ant_sup = antecedent.getSupport(); rule_conf = rule_sup / ant_sup; if (rule_conf >= this.minConfidence) { consequent = new Itemset(); consequent.add(i_item); consequent.calculateSupport(this.dataset); cons_sup = consequent.getSupport(); interest = rule_conf * (rule_sup/cons_sup) * (1 - (rule_sup/this.dataset.getnTrans())); this.associationRulesSet.add( new AssociationRule(antecedent, consequent, rule_sup, ant_sup, rule_conf,cons_sup,interest) ); if (! generated_rules) generated_rules = true; } } return generated_rules; } private boolean isCombinable(Itemset i_itemset, Itemset j_itemset, ArrayList<Itemset> curr_freq_itemsets) { int i; Item i_item, j_item; Itemset itemset; if (i_itemset.size() != j_itemset.size()) return false; i_item = i_itemset.get(i_itemset.size() - 1); j_item = j_itemset.get(i_itemset.size() - 1); if (i_item.getIDAttribute() >= j_item.getIDAttribute()) return false; for (i=0; i < (i_itemset.size() - 1); i++) { i_item = i_itemset.get(i); j_item = j_itemset.get(i); if (! i_item.equals(j_item)) return false; } itemset = i_itemset.clone(); itemset.add( ( j_itemset.get(i_itemset.size() - 1) ).clone() ); if ( this.pruning(itemset, curr_freq_itemsets) ) return false; return true; } private boolean pruning(Itemset itemset, ArrayList<Itemset> curr_freq_itemsets) { int i; Itemset sub; for (i=0; i < itemset.size() - 2; i++) { sub = itemset.clone(); sub.remove(i); if (! this.existingIntoFrequentItemsets(sub, curr_freq_itemsets)) return true; } return false; } private boolean existingIntoFrequentItemsets(Itemset itemset, ArrayList<Itemset> curr_freq_itemsets) { int i; Itemset its; for (i=0; i < curr_freq_itemsets.size(); i++) { its = curr_freq_itemsets.get(i); if ( its.equals(itemset) ) return true; } return false; } private void markCoveredRecords(ArrayList<Integer> covered_tids) { int i, t; for (i=0; i < covered_tids.size(); i++) { t = covered_tids.get(i); if (! this.coveredRecords[t]) this.coveredRecords[t] = true; } } private int countCoveredRecords() { int i, cnt_covered_records = 0; for (i=0; i < this.coveredRecords.length; i++) { if (this.coveredRecords[i]) cnt_covered_records++; } return cnt_covered_records; } }