package ca.pfv.spmf.algorithms.associationrules.TopKRules_and_TNR;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import ca.pfv.spmf.algorithms.ArraysAlgos;
import ca.pfv.spmf.datastructures.redblacktree.RedBlackTree;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* TNR is an algorithm for mining the TOP-K non redundant association rules
* with a pattern growth approach and several optimizations. This is the original
* implementation as proposed in the following paper:
* <br/><br/>
*
* Fournier-Viger, P., Tseng, V.S. (2012). Mining Top-K Non-Redundant Association Rules. Proc. 20th International Symposium on Methodologies for Intelligent Systems (ISMIS 2012), Springer, LNCS 7661, pp. 31- 40.
*
* @author Philippe Fournier-Viger, 2012
*/
public class AlgoTNR {
// for statistics
long timeStart = 0; // start time of last execution
long timeEnd = 0; // end time of last execution
// the maximum number of candidates at the same time during the last execution
int maxCandidateCount = 0;
int notAdded = 0; // rules eliminated by strategy 1
int totalremovedCount = 0; // rules eliminated by strategy 2
long totalCandidatesConsideredFromR = 0; // the total number of candidates processed
long totalRules11considered = 0; // the total number of rules with only two items considered
// Parameters
double minConfidence; // minimum confidence threshold
int initialK = 0; // the value of k set by the user
Database database; // the transaction database
int delta = 0; // the delta parameter
// Internal variables
RedBlackTree<RuleG> kRules; // the top k rules found until now
RedBlackTree<RuleG> candidates; // the candidates for expansion
int k=0; // will contain k + delta
int minsuppRelative; // minimum support threshold that will be raised dynamically
// a vertical representation of the database
BitSet[] tableItemTids; // [item], IDs of transaction containing the item
// a table indicating the support of each item
int[] tableItemCount; // [item], support
/**
* Default constructor
*/
public AlgoTNR() {}
/**
* Run the algorithm.
* @param k the value of k.
* @param minConfidence the minimum confidence threshold.
* @param database the database.
* @param delta the delta parameter
* @return a RedBlackTree containing approximately k rules.
*/
public RedBlackTree<RuleG> runAlgorithm(int k, double minConfidence, Database database, int delta) {
// reset statistics
totalremovedCount = 0;
notAdded = 0;
MemoryLogger.getInstance().reset(); // reset utility to check memory usage
maxCandidateCount = 0;
totalCandidatesConsideredFromR =0;
totalRules11considered = 0;
// save parameters
this.delta = delta;
this.minConfidence = minConfidence;
this.database = database;
this. initialK = k;
// calculate k
this.k = k + delta; // IMPORTANT
// set the minimum support threshold that will be raised dynamically
this.minsuppRelative = 1;
// initialize data structures
tableItemTids = new BitSet[database.maxItem+1]; // id item, count
tableItemCount = new int[database.maxItem+1];
kRules = new RedBlackTree<RuleG>();
candidates = new RedBlackTree<RuleG>();
// record the start time
timeStart = System.currentTimeMillis();
// perform the first database scan to generate vertical database representation
scanDatabase(database);
// start the generation of rules
start();
// record the end time
timeEnd = System.currentTimeMillis();
// if more than k rules because several of them have the same support,
// we remove some to only return k to the user
cleanResult();
// return the result
return kRules;
}
/**
* Start the rule generation.
*/
private void start() {
// for each item I in the database
main: for(int itemI=0; itemI<= database.maxItem; itemI++){
// if the item is not frequent according to the current
// minsup threshold, then skip it
if(tableItemCount[itemI] < minsuppRelative){
continue main;
}
// Get the bitset corresponding to item I
BitSet tidsI = tableItemTids[itemI];
// for each item J in the database
main2: for(int itemJ=itemI+1; itemJ <= database.maxItem; itemJ++){
// if the item is not frequent according to the current
// minsup threshold, then skip it
if(tableItemCount[itemJ] < minsuppRelative){
continue main2;
}
// Get the bitset corresponding to item J
BitSet tidsJ = tableItemTids[itemJ];
// Calculate the list of transaction IDs shared
// by I and J.
// To do that with a bitset, we just do a logical AND.
BitSet commonTids = (BitSet) tidsI.clone();
commonTids.and(tidsJ);
// We keep the cardinality of the new bitset because in java
// the cardinality() method is expensive, and we will need it again later.
int support = commonTids.cardinality();
totalRules11considered++; // for stats
// If rules I ==> J and J ==> I have enough support
if(support >= minsuppRelative){
// generate rules I ==> J and J ==> I and remember these rules
// for future possible expansions
generateRuleSize11(itemI, tidsI, itemJ, tidsJ, commonTids, support);
}
}
}
// Now we have finished checking all the rules containing 1 item
// in the left side and 1 in the right side,
// the next step is to recursively expand rules in the set
// "candidates" to find more rules.
while(candidates.size() >0){
// We take the rule that has the highest support first
RuleG rule = candidates.popMaximum();
// if there is no more candidates with enough support, then we stop
if(rule.getAbsoluteSupport() < minsuppRelative){
// candidates.remove(rule);
break;
}
// Otherwise, we try to expand the rule
totalCandidatesConsideredFromR++;
// If the rule should be expanded by both left and ride side
if(rule.expandLR){
// we do it
expandLR(rule);
}else{
// If the rule should only be expanded by left side to
// avoid generating redundant rules, then we
// only expand the left side.
expandR(rule);
}
// candidates.remove(rule);
}
}
/**
* This method test the rules I ==> J and J ==> I for their confidence
* and record them for future expansions.
* @param itemI an item I
* @param tidI the set of IDs of transaction containing item I (BitSet)
* @param itemJ an item J
* @param tidJ the set of IDs of transaction containing item J (BitSet)
* @param commonTids the set of IDs of transaction containing I and J (BitSet)
* @param cardinality the cardinality of "commonTids"
*/
private void generateRuleSize11(Integer itemI, BitSet tidI, Integer itemJ, BitSet tidJ, BitSet commonTids, int cardinality) {
// Create the rule I ==> J
Integer[] itemsetI = new Integer[1];
itemsetI[0] = itemI;
Integer[] itemsetJ = new Integer[1];
itemsetJ[0] = itemJ;
RuleG ruleLR = new RuleG(itemsetI, itemsetJ, cardinality, tidI, commonTids, itemI, itemJ);
// calculate the confidence
double confidenceIJ = ((double) cardinality) / (tableItemCount[itemI]);
// if rule i->j has minimum confidence
if(confidenceIJ >= minConfidence){
// save the rule in current top-k rules
save(ruleLR, cardinality);
}
// register the rule as a candidate for future expansion
registerAsCandidate(true, ruleLR);
// Create the rule J ==> I
double confidenceJI = ((double) cardinality) / (tableItemCount[itemJ]);
RuleG ruleRL = new RuleG(itemsetJ, itemsetI, cardinality, tidJ, commonTids, itemJ, itemI);
// if rule J->I has minimum confidence
if(confidenceJI >= minConfidence){
// save the rule in current top-k rules
save(ruleRL, cardinality);
}
// register the rule as a candidate for future expansion
registerAsCandidate(true, ruleRL);
}
/**
* Register a given rule in the set of candidates for future expansions
* @param expandLR if true the rule will be considered for left/right
* expansions otherwise only right.
* @param rule the given rule
*/
private void registerAsCandidate(boolean expandLR, RuleG rule) {
// add the rule to candidates
rule.expandLR = expandLR;
candidates.add(rule);
// record the maximum number of candidates for statistics
if(candidates.size() >= maxCandidateCount){
maxCandidateCount = candidates.size();
}
// check the memory usage
MemoryLogger.getInstance().checkMemory();
}
/**
* Try to expand a rule by left and right expansions.
* @param ruleG the rule
*/
private void expandLR(RuleG ruleG) {
// Maps to record the potential item to expand the left/right sides of the rule
// Key: item Value: bitset indicating the IDs of the transaction containing the item
// from the transactions containing the rule.
Map<Integer, BitSet> mapCountLeft = new HashMap<Integer, BitSet>();
Map<Integer, BitSet> mapCountRight = new HashMap<Integer, BitSet>();
for (int tid = ruleG.common.nextSetBit(0); tid >= 0; tid = ruleG.common.nextSetBit(tid+1)) {
Iterator<Integer> iter = database.getTransactions().get(tid).getItems().iterator();
while(iter.hasNext()){
Integer item = iter.next();
// CAN DO THIS BECAUSE TRANSACTIONS ARE SORTED BY DESCENDING ITEM IDS (see Database.Java)
if(item < ruleG.maxLeft && item < ruleG.maxRight){ //
break;
}
if(tableItemCount[item] < minsuppRelative){
iter.remove();
continue;
}
if(item > ruleG.maxLeft &&!ArraysAlgos.containsLEX(ruleG.getItemset2(),item, ruleG.maxRight)){
BitSet tidsItem = mapCountLeft.get(item);
if(tidsItem == null){
tidsItem = new BitSet();
mapCountLeft.put(item, tidsItem);
}
tidsItem.set(tid);
}
if(item > ruleG.maxRight && !ArraysAlgos.containsLEX(ruleG.getItemset1(),item, ruleG.maxLeft)){
BitSet tidsItem = mapCountRight.get(item);
if(tidsItem == null){
tidsItem = new BitSet();
mapCountRight.put(item, tidsItem);
}
tidsItem.set(tid);
}
}
}
// for each item c found in the previous step, we create a rule
// I ==> J U {c} if the support is enough
for(Entry<Integer, BitSet> entry : mapCountRight.entrySet()){
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if(ruleSupport >= minsuppRelative){
Integer itemC = entry.getKey();
// create new right part of rule
Integer[] newRightItemset = new Integer[ruleG.getItemset2().length+1];
System.arraycopy(ruleG.getItemset2(), 0, newRightItemset, 0, ruleG.getItemset2().length );
newRightItemset[ruleG.getItemset2().length] = itemC;
// recompute maxRight
int maxRight = (itemC >= ruleG.maxRight) ? itemC : ruleG.maxRight;
// calculate the confidence of the rule
double confidence = ((double)ruleSupport) / ruleG.tids1.cardinality();
// create the rule
RuleG candidate = new RuleG(ruleG.getItemset1(), newRightItemset, ruleSupport, ruleG.tids1, tidsRule, ruleG.maxLeft, maxRight);
// if the confidence is enough
if(confidence >= minConfidence){
// save the rule in current top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for future expansion
registerAsCandidate(false, candidate);
}
}
// for each item c found in the previous step, we create a rule
// I U {c} ==> J if the support is enough
for(Entry<Integer, BitSet> entry : mapCountLeft.entrySet()){
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if(ruleSupport >= minsuppRelative){
Integer itemC = entry.getKey();
// The tidset of the left itemset is calculated
BitSet tidsLeft = (BitSet)ruleG.tids1.clone();
tidsLeft.and(tableItemTids[itemC]);
// create new left part of rule
Integer[] newLeftItemset = new Integer[ruleG.getItemset1().length+1];
System.arraycopy(ruleG.getItemset1(), 0, newLeftItemset, 0, ruleG.getItemset1().length );
newLeftItemset[ruleG.getItemset1().length] = itemC;
// recompute maxLeft for the new rule
int maxLeft = itemC >= ruleG.maxLeft ? itemC : ruleG.maxLeft;
// calculate the confidence
double confidence = ((double)ruleSupport) / tidsLeft.cardinality();
// create the rule
RuleG candidate = new RuleG(newLeftItemset, ruleG.getItemset2(), ruleSupport, tidsLeft, tidsRule, maxLeft, ruleG.maxRight);
// If the confidence is enough
if(confidence >= minConfidence){
// save the rule in current top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for future expansion
registerAsCandidate(true, candidate);
}
}
}
/**
* Try to expand a rule by right expansion only.
* @param ruleG the rule
*/
private void expandR(RuleG ruleG) {
// map to record the potential item to expand the right side of the rule
// Key: item Value: bitset indicating the IDs of the transaction containing the item
// from the transactions containing the rule.
Map<Integer, BitSet> mapCountRight = new HashMap<Integer, BitSet>();
// for each transaction containing the rule
for (int tid = ruleG.common.nextSetBit(0); tid >= 0; tid = ruleG.common.nextSetBit(tid+1)) {
// iterate over the items in this transaction
Iterator<Integer> iter = database.getTransactions().get(tid).getItems().iterator();
while(iter.hasNext()){
Integer item = iter.next();
// if that item is not frequent, then remove it from the transaction
if(tableItemCount[item] < minsuppRelative){
iter.remove();
continue;
}
//If the item is smaller than the largest item in the right side
// of the rule, we can stop this loop because items
// are sorted in lexicographical order.
if(item < ruleG.maxRight){
break;
}
// if the item is larger than the maximum item in the right side
// and is not contained in the left side of the rule
if(item > ruleG.maxRight && !ArraysAlgos.containsLEX(ruleG.getItemset1(),item, ruleG.maxLeft)){
// update the tidset of the item
BitSet tidsItem = mapCountRight.get(item);
if(tidsItem == null){
tidsItem = new BitSet();
mapCountRight.put(item, tidsItem);
}
tidsItem.set(tid);
}
}
}
// for each item c found in the previous step, we create a rule
// I ==> J U {c} if the support is enough
for(Entry<Integer, BitSet> entry : mapCountRight.entrySet()){
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if(ruleSupport >= minsuppRelative){
Integer itemC = entry.getKey();
// create new right part of rule
Integer[] newRightItemset = new Integer[ruleG.getItemset2().length+1];
System.arraycopy(ruleG.getItemset2(), 0, newRightItemset, 0, ruleG.getItemset2().length );
newRightItemset[ruleG.getItemset2().length] = itemC;
// update maxRight
int maxRight = itemC >= ruleG.maxRight ? itemC : ruleG.maxRight;
// calculate the confidence
double confidence = ((double)ruleSupport) / ruleG.tids1.cardinality();
// create the rule
RuleG candidate = new RuleG(ruleG.getItemset1(), newRightItemset, ruleSupport, ruleG.tids1,tidsRule, ruleG.maxLeft, maxRight);
// If the confidence is enough
if(confidence >= minConfidence){
// save the rule in current top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for future expansion
registerAsCandidate(false, candidate); // IMPORTANT: WAS MISSING IN PREVIOUS VERSION !!!!
}
}
}
/**
* Save a rule to the current set of top-k rules.
* @param rule the rule to be saved
* @param support the support of the rule
*/
private void save(RuleG rule, int support) {
// We get a pointer to the node in the redblacktree for the
// rule having a support just lower than support+1.
RedBlackTree<RuleG>.Node lowerRuleNode = kRules.lowerNode(new RuleG(null, null, support+1, null, null, 0, 0));
// Applying Strategy 1 and Strategy 2
Set<RuleG> rulesToDelete = new HashSet<RuleG>();
// for each rule "lowerRuleNode" having the save support as the rule received as parameter
while(lowerRuleNode != null &&
lowerRuleNode.key != null
&& lowerRuleNode.key.getAbsoluteSupport() == support){
// Strategy 1:
// if the confidence is the same and the rule "lowerRuleNode" subsume the new rule
// then we don't add the new rule
if(rule.getConfidence() == lowerRuleNode.key.getConfidence() && subsume(lowerRuleNode.key, rule)){
notAdded++; // for stats
// System.out.println("The rule " + rule + " was not added because it is subsumed by : " + lowerRuleNode.key);
return ;
}
// Strategy 2:
// if the confidence is the same and the rule "lowerRuleNode" subsume the new rule
// then we don't add the new rule
if(rule.getConfidence() == lowerRuleNode.key.getConfidence() && subsume(rule, lowerRuleNode.key)){
// add the rule to the set of rules to be deleted
rulesToDelete.add(lowerRuleNode.key);
totalremovedCount++;
}
// check the next rule
lowerRuleNode = kRules.lowerNode(lowerRuleNode.key);
}
// delete the rules to be deleted
for(RuleG ruleX : rulesToDelete){
// System.out.println("REMOVED " + ruleX + " because subsumed by : " + rule);
kRules.remove(ruleX);
}
// Now the rule "rule" has passed the test of Strategy 1 already,
// so we add it to the set of top-k rules
kRules.add(rule);
// if there is more than k rules
if(kRules.size() > k ){
// and if the support of the rule is higher than minsup
if(support > this.minsuppRelative ){
// recursively find the rule with the lowest support and remove it
// until there is just k rules left
RuleG lower;
do{
lower = kRules.lower(new RuleG(null, null, this.minsuppRelative+1, null, null, 0, 0));
if(lower == null){
break; /// IMPORTANT
}
kRules.remove(lower);
}while(kRules.size() > k);
}
// set the minimum support to the support of the rule having
// the lowest suport.
this.minsuppRelative = kRules.minimum().getAbsoluteSupport();
}
// System.out.println(this.minsuppRelative);
}
//private boolean isRedundant(RuleG rule) {
// if(rule.getItemset1().length > 1){
// for(int i=0; i< rule.getItemset1().length; i++){
// BitSet tids = null;
// for(int j=0; j< rule.getItemset1().length; j++){
// if( i != j){
// if(tids == null){
// tids = (BitSet) tableItemTids[rule.getItemset1()[j]].clone();
// }else{
// tids.and(tableItemTids[rule.getItemset1()[j]]);
// }
// }
// }
// // calculate support
// int support = tids.cardinality();
// if(rule.getItem)
//
// = ;
// if(tids.cardinality() == rule.getAbsoluteSupport()){
// notAdded++;
// return true;
// }
// }
// }
//
//
// return false;
//}
/**
* Check if a rule subsumes another.
* @param rule1 a rule
* @param rule2 a second rule
* @return true if rule1 subsume rule2, otherwise false.
*/
private boolean subsume(RuleG rule1, RuleG rule2) {
// if(rule1 == rule2 || rule1.getConfidence() != rule2.getConfidence() ||
// rule1.getAbsoluteSupport() != rule2.getAbsoluteSupport()){
// return false;
// }
// We check first the size of the itemsets
if(rule1.getItemset1().length <= rule2.getItemset1().length && rule1.getItemset2().length >=rule2.getItemset2().length){
// After that we check the inclusion relationships between
// the itemsets
boolean cond1 = ArraysAlgos.containsOrEquals(rule2.getItemset1(), rule1.getItemset1());
boolean cond2 = ArraysAlgos.containsOrEquals(rule1.getItemset2(), rule2.getItemset2());
// If all the conditions are met the method returns true.
if(cond1 && cond2){
return true;
}
}
// otherwise, it returns false
return false;
}
/**
* This method remove exceeding rules so that only k are presented to the user
*/
private void cleanResult() {
// for each rules in the set of top-k rules
while(kRules.size() > initialK){
// take out the minimum until the size is k
kRules.popMinimum();
}
// set the minimum support to the minimum of the remaining rules
minsuppRelative = kRules.minimum().getAbsoluteSupport();
}
/**
* Method to scan the database to create the vertical database.
* @param database a database of type Database.
*/
private void scanDatabase(Database database) {
// for each transaction
for(int j=0; j < database.getTransactions().size(); j++){
Transaction transaction = database.getTransactions().get(j);
// for each item in the current transaction
for(Integer item : transaction.getItems()){
// update the tidset of this item (represented by a bitset.
BitSet ids = tableItemTids[item];
if(ids == null){
tableItemTids[item] = new BitSet(database.tidsCount);
}
tableItemTids[item].set(j);
// update the support of this item
tableItemCount[item] = tableItemCount[item] +1;
}
}
}
/**
* Write the rules found to an output file.
* @param path the path to the output file
* @throws IOException exception if an error while writing the file
*/
public void writeResultTofile(String path) throws IOException {
// Prepare the file
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
// for each rule
Iterator<RuleG> iter = kRules.iterator();
while (iter.hasNext()) {
// Write the rule
RuleG rule = (RuleG) iter.next();
StringBuilder buffer = new StringBuilder();
buffer.append(rule.toString());
// write separator
buffer.append(" #SUP: ");
// write support
buffer.append(rule.getAbsoluteSupport());
// write Confidence
buffer.append(" #CONF: ");
buffer.append(rule.getConfidence());
writer.write(buffer.toString());
writer.newLine();
}
// close the file
writer.close();
}
/**
* Print statistics about the last algorithm execution.
*/
public void printStats() {
System.out.println("============= NR-TOP-K RULES - STATS =============");
System.out.println("Minsup : " + minsuppRelative);
System.out.println("Rules count: " + kRules.size());
System.out.println("Total time : " + ((timeEnd - timeStart) / 1000) + " s");
System.out.println("Memory : " + MemoryLogger.getInstance().getMaxMemory() + " mb");
// System.out.println("Candidates count : " + candidates.size());
System.out.println("Rules eliminated by strategy 1: " + notAdded);
System.out.println("Rules eliminated by strategy 2: " + totalremovedCount);
System.out.println("--------------------------------");
System.out.println("===================================================");
}
}