package ca.pfv.spmf.algorithms.associationrules.MNRRules;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.algorithms.frequentpatterns.zart.TZTableClosed;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
import ca.pfv.spmf.patterns.rule_itemset_array_integer_with_count.Rule;
import ca.pfv.spmf.patterns.rule_itemset_array_integer_with_count.Rules;
/**
* This is an implementation of an algorithm for finding the set of Minimum Non Redundant rules (MNR)
* from a transaction database.
* <br/><br/>
*
* This set is defined as the set of association rules
* of the form P1 ==> P2 / P1, where P1 is a generator of P2,
* P2 is a closed itemset, and the rule has a support and confidence
* respectively no less than minsup and minconf.
* <br/><br/>
*
* See the following publication for more details:
* <br/><br/>
*
* M. Kryszkiewicz. Representative Association Rules. In PAKDD '98: Proceedings
* of the Second Pacic-Asia Conference on Research and Development in Knowledge
* Discovery and Data Mining, pages 198209, London, UK, 1998. Springer-Verlag.
* <br/><br/>
*
* Here, the implementation is based on the description in Szathmary's thesis (2006).
* The algorithm proceed by exploiting the generators and closed itemset found
* by the Zart algorithm.
*<br/><br/>
*
* This algorithm can save the result to a file or keep it into memory
* if the user gives a null output path to the runAlgorithm() method.
* <br/><br/>
*
* @author Philippe Fournier-Viger
*
*/
public class AlgoMNRRules {
// parameters
private TZTableClosed closedPatternsAndGenerators; //closed itemsets and their generators
private double minconf; // minimum confidence threshold
private int databaseSize; //
// for statistics
long startTimestamp = 0; // last execution start time
long endTimeStamp = 0; // last execution end time
private int ruleCount; // number of rule found
// the rules found (if we save to memory)
private Rules rules;
// object to write the output file if the user wish to write to a file
BufferedWriter writer = null;
/**
* Default constructor
*/
public AlgoMNRRules(){
}
/**
* Run the algorithm.
* @param closedPatternsAndGenerators Closed itemsets and their associated generators.
* @param databaseSize the number of transactions in the transaction database.
* @param minconf minimum confidence threshold
* @param outputFilePath the output file path, if the results should be saved to a file.
* if null, the result are saved in memory and returned by this method.
*
* @return if the user chose to save to memory, this methods return the set of IGB association rules,
* otherwise, the result is saved to the output file chosen by the user.
* @throws IOException
*/
public Rules runAlgorithm(String outputFilePath, double minconf, TZTableClosed closedPatternsAndGenerators, int databaseSize) throws IOException {
// if the user want to keep the result into memory
if(outputFilePath == null){
writer = null;
rules = new Rules("MNR association rules");
}else{
// if the user want to save the result to a file
rules = null;
writer = new BufferedWriter(new FileWriter(outputFilePath));
}
// save the parameters received by the user
this.closedPatternsAndGenerators = closedPatternsAndGenerators;
this.minconf = minconf;
this.databaseSize = databaseSize;
// reset the number of rule found
ruleCount = 0;
// record start time
startTimestamp = System.currentTimeMillis();
// 1 - for each equivalence class
for(Map.Entry<Itemset,List<Itemset>> entryEquivalenceClass : closedPatternsAndGenerators.mapGenerators.entrySet()){
// get the list of generators
List<Itemset> listGenerators = entryEquivalenceClass.getValue();
// if the equivalence class has no generator, then its closed itemset is a generator...
if(listGenerators.size() == 0 && entryEquivalenceClass.getKey().size() !=0){
listGenerators.add(entryEquivalenceClass.getKey());
}
// loop over the generators g of the equivalence class
for(Itemset generatorG : listGenerators){
// 3 - find proper supersets of G among the frequent closed itemsets
Set<Itemset> supersets = new HashSet<Itemset>();
for(Itemset closedItemset : closedPatternsAndGenerators.mapGenerators.keySet()){
if(generatorG.size() < closedItemset.size()
&& closedItemset.containsAll(generatorG)){
supersets.add(closedItemset);
}
}
// 6 - loop over the supersets found
for(Itemset closedItemset : supersets){
Itemset leftSide = generatorG;
Itemset rightSide = closedItemset.cloneItemSetMinusAnItemset(generatorG);
calculateSupport(rightSide);
// left.support = g.support;
double conf = ((double)closedItemset.getAbsoluteSupport()) / ((double)generatorG.getAbsoluteSupport());
//
if(conf >= minconf){
saveRule(leftSide, rightSide, closedItemset.getAbsoluteSupport(), conf);
}
}
}
}
endTimeStamp = System.currentTimeMillis();
// if we saved to a file, we need to close it.
if(writer != null){
writer.close();
}
return rules;
}
/**
* Save a rule to file or to memory (if no output file path is provided)
* @param itemset1 the left side of the rule
* @param itemset2 the right side of the rule
* @param absoluteSupport the support of the rule
* @param confidence the confidence of the rule
* @throws IOException exception if there is an error writing the output file.
*/
private void saveRule(Itemset itemset1, Itemset itemset2,
int absoluteSupport, double confidence) throws IOException {
// increase the number of rules found
ruleCount++;
// if the result should be saved to a file
if(writer != null){
StringBuilder buffer = new StringBuilder();
// WRITE LEFT SIDE OF THE RULE
// If the left side is empty we write ____
if(itemset1.size() == 0){
buffer.append("__");
}
else{
// if the left side is not empty, a loop will save
// each item one by one, separated by spaces
for (int i = 0; i < itemset1.size(); i++) {
buffer.append(itemset1.get(i));
if (i != itemset1.size() - 1) {
buffer.append(" ");
}
}
}
// write separator
buffer.append(" ==> ");
// write the right side of the rule
for (int i = 0; i < itemset2.size(); i++) {
buffer.append(itemset2.get(i));
if (i != itemset2.size() - 1) {
buffer.append(" ");
}
}
// write the support
buffer.append(" #SUP: ");
// write the support as an integer
buffer.append(absoluteSupport);
// write the confidence
buffer.append(" #CONF: ");
buffer.append(doubleToString(confidence));
writer.write(buffer.toString());
writer.newLine(); // write new line
}
else{ // otherwise the result is kept into memory
rules.addRule(new Rule(itemset1.getItems(), itemset2.getItems(), itemset1.getAbsoluteSupport(),
absoluteSupport, confidence));
}
}
/**
* Calculate the support of a given itemset.
* @param itemsetToTest a given itemset (Itemset).
*/
private void calculateSupport(Itemset itemsetToTest) { // THIS WAS CHANGED
// check if closed
for(List<Itemset> list : closedPatternsAndGenerators.levels){
if(list.size() == 0 || list.get(0).size() < itemsetToTest.size()){
continue; // it is not useful to consider itemsets that are smaller
// than itemsetToTest.size
}
for(Itemset itemset : list){
if(itemset.containsAll(itemsetToTest)){
itemsetToTest.setAbsoluteSupport(itemset.getAbsoluteSupport());
return;
}
}
}
}
/**
* Convert a double value to a string with only five decimal
* @param value a double value
* @return a string
*/
private String doubleToString(double value) {
// convert it to a string with two decimals
DecimalFormat format = new DecimalFormat();
format.setMinimumFractionDigits(0);
format.setMaximumFractionDigits(5);
return format.format(value);
}
/**
* Print statistics about the algorithm execution to System.out.
*/
public void printStatistics() {
System.out
.println("============= MNR ASSOCIATION RULE GENERATION - STATS =============");
System.out.println(" Number of association rules generated : "
+ ruleCount);
System.out.println(" Total time ~ " + (endTimeStamp - startTimestamp)
+ " ms");
System.out
.println("===================================================");
}
}