package ca.pfv.spmf.algorithms.associationrules.IGB; /* This file is copyright (c) 2008-2012 Philippe Fournier-Viger * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. */ import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import ca.pfv.spmf.algorithms.frequentpatterns.zart.TZTableClosed; import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset; import ca.pfv.spmf.patterns.rule_itemset_array_integer_with_count.Rule; import ca.pfv.spmf.patterns.rule_itemset_array_integer_with_count.Rules; /** * This is an implementation of the GEN-IGB-FERMES algorithm for mining * the IGB basis of association rules. * <br/><br/> * * This algorithm is described in the article (in French) : * "IGB : une nouvelle base g�n�rique informative des r�gles d�association" * dans Information-Interaction-Intelligence (Revue I3), vol. 6, n� 1, C�padu�s-�ditions, pp. 31-67, octobre 2006 * <br/><br/> * * This algorithm generates the IGB basis of association rules from the set of frequent closed itemsets, * their support and their associated minimal generators. * <br/><br/> * * This algorithm can save the result to a file or keep it into memory * if the user provides a null output file path to the runAlgorithm() method. * * @author Philippe Fournier-Viger */ public class AlgoIGB { // closed itemsets and their corresponding generators private TZTableClosed closedPatternsAndGenerators; private double minconf; // minimum confidence private int databaseSize; // number of transactions in the original database // the rules found by IGB private Rules rules; // for statistics long startTimestamp = 0; // last execution start time long endTimeStamp = 0; // last execution end time private int ruleCount; // the number of rules found // object to write the output file if the user wish to write to a file BufferedWriter writer = null; public AlgoIGB(){ } /** * Run the algorithm. * @param closedPatternsAndGenerators Closed itemsets and their associated generators. * @param databaseSize the number of transactions in the transaction database. * @param outputFilePath the output file path, if the results should be saved to a file. * if null, the result are saved in memory and returned by this method. * * @return if the user chose to save to memory, this methods return the set of IGB association rules, * otherwise, the result is saved to the output file chosen by the user. * @throws IOException */ public Rules runAlgorithm(TZTableClosed closedPatternsAndGenerators, int databaseSize, double minconf, String outputFilePath) throws IOException { // if the user want to keep the result into memory if(outputFilePath == null){ writer = null; rules = new Rules("IGB ASSOCIATION RULES"); }else{ // if the user want to save the result to a file rules = null; writer = new BufferedWriter(new FileWriter(outputFilePath)); } startTimestamp = System.currentTimeMillis(); // parameters this.minconf = minconf; this.closedPatternsAndGenerators = closedPatternsAndGenerators; this.databaseSize = databaseSize; // initialize variable to count the number of rules found ruleCount = 0; // line 3 of the pseudo code in the IGB paper: // For each closed frequent itemset t. for(List<Itemset> level : closedPatternsAndGenerators.levels){ // for each itemset for(Itemset itemset : level){ // if it is not the empty set if(itemset.size() != 0){ // we will process this itemset processItemset(itemset); } } } // record the end time endTimeStamp = System.currentTimeMillis(); // if the user chose to save to a file, we close the file. if(writer != null){ writer.close(); } return rules; } /** * Process an itemset to generate rules. * @param i an itemset. * @throws IOException exception if error while writing output file */ private void processItemset(Itemset i) throws IOException { // If the itemset has enough confidence if(i.getRelativeSupport(databaseSize) >= minconf){ // line 3 // we generate a rule with an empty antecedent // Rule rule = new Rule(, i, i.getAbsoluteSupport(), i.getRelativeSupport(databaseSize)); // 4,5,6 save(new Itemset(), i, i.getAbsoluteSupport(), i.getRelativeSupport(databaseSize)); return; } // Line 9 of the paper // Create an empty set to store the smallest premises Set<Itemset> lSmallestPremise = new HashSet<Itemset>(); // line 10 of the pseudo code in the IGB paper // For each closed itemsets, starting from size j=0 to the maximum size for(int j=0; j < i.size(); j++){ for(Itemset i1 : closedPatternsAndGenerators.levels.get(j)){ // if the confidence of I1 ==> I / I1 is higher than minconf // and that I1 \included_in I then: if(((double)i.getAbsoluteSupport() / (double)i1.getAbsoluteSupport()) >= minconf && i.containsAll(i1)){ // line 11 of the pseudo code in the IGB paper // For each generator genI1 of I1: for(Itemset genI1 : closedPatternsAndGenerators.mapGenerators.get(i1)){ // line 12 of the pseudo code in the IGB paper // check if there is a premise smaller than gen1 already found boolean thereIsSmaller = false; // for each premise: for(Itemset l : lSmallestPremise){ // If strictly genI1 contains L. if(genI1.containsAll(l) && genI1.size() != l.size()){ // remember that genI1 is not the smallest thereIsSmaller = true; break; } } // if genI1 has no smaller premise if(thereIsSmaller == false){ // Add genI1 to the set of smallest premises lSmallestPremise.add(genI1);//13 } } } } } // line 14 of the pseudo code // For each smallest premise found in the previous step for(Itemset gs : lSmallestPremise){ // lines 15, 16, 17 of the pseudo code in the IGB paper // Finds all items from I that are not in GS List<Integer> list_i_gs = new ArrayList<Integer>(); for(Integer item : i.itemset){ if(!gs.contains(item)){ list_i_gs.add(item); } } // Ugly conversion (we have to do that because Java cannot convert // from List<Integer> to int[]... int[] temp = new int[list_i_gs.size()]; for(int k=0; k< list_i_gs.size(); k++){ temp[k] = list_i_gs.get(k); } // We create the corresponding rule gs ==> I / gs Itemset i_gs = new Itemset(temp); // We save the rule save(gs, i_gs, i.getAbsoluteSupport(), (double)i.getAbsoluteSupport() / (double)gs.getAbsoluteSupport()); } } /** * Save a rule to memory or file depending on what the user chose * @param itemset1 the left side of the rule * @param itemset2 the right side of the rule * @param absoluteSupport the rule support * @param confidence the confidence of the rule * @throws IOException if error occurs while writing output to file. */ private void save(Itemset itemset1, Itemset itemset2, int absoluteSupport, double confidence) throws IOException { // increase the number of rule found ruleCount++; // if the result should be saved to a file if(writer != null){ StringBuilder buffer = new StringBuilder(); // write itemset 1 if(itemset1.size() == 0){ buffer.append("__"); } else{ for (int i = 0; i < itemset1.size(); i++) { buffer.append(itemset1.get(i)); if (i != itemset1.size() - 1) { buffer.append(" "); } } } // write separator buffer.append(" ==> "); // write itemset 2 for (int i = 0; i < itemset2.size(); i++) { buffer.append(itemset2.get(i)); if (i != itemset2.size() - 1) { buffer.append(" "); } } // write separator buffer.append(" #SUP: "); // write support buffer.append(absoluteSupport); // write separator buffer.append(" #CONF: "); // write confidence buffer.append(doubleToString(confidence)); writer.write(buffer.toString()); writer.newLine(); }// otherwise the result is kept into memory else{ Rule rule = new Rule(itemset1.getItems(), itemset2.getItems(), itemset1.support, absoluteSupport, confidence); rules.addRule(rule); } } /** * Convert a double value to a string with only five decimals * @param value a double value * @return a string */ private String doubleToString(double value) { // convert it to a string with two decimals DecimalFormat format = new DecimalFormat(); format.setMinimumFractionDigits(0); format.setMaximumFractionDigits(5); return format.format(value); } /** * Print statistics about the algorithm execution to System.out. */ public void printStatistics() { System.out .println("============= IGB ASSOCIATION RULE GENERATION - STATS ============="); System.out.println(" Number of association rules generated : " + ruleCount); System.out.println(" Total time ~ " + (endTimeStamp - startTimestamp) + " ms"); System.out .println("==================================================="); } }