package ca.pfv.spmf.algorithms.associationrules.TopKRules_and_TNR;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PriorityQueue;
import ca.pfv.spmf.algorithms.ArraysAlgos;
import ca.pfv.spmf.datastructures.redblacktree.RedBlackTree;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* TopKRules is an algorithm for mining the TOP-K association rules from a
* transaction database using
* a pattern growth approach and several optimizations. This is the original
* implementation as proposed in the following paper:
* <br/><br/>
*
* Fournier-Viger, P., Wu, C.-W., Tseng, V. S. (2012). Mining Top-K Association Rules. Proceedings of the 25th Canadian Conf. on Artificial Intelligence (AI 2012), Springer, LNAI 7310, pp. 61-73.
*
* @author Philippe Fournier-Viger, 2012
*/
public class AlgoTopKRules {
// for statistics
long timeStart = 0; // start time of last execution
long timeEnd = 0; // end time of last execution
// parameters
double minConfidence; // minconf threshold
int k = 0; // variable k
Database database; // a transaction database
// minimum support that will be reased during the search
int minsuppRelative;
// a vertical representation of the database
BitSet[] tableItemTids; // [item], IDs of transaction containing the item
// a table indicating the support of each item
int[] tableItemCount; // [item], support
PriorityQueue<RuleG> kRules; // the top k rules found until now
RedBlackTree<RuleG> candidates; // the candidates for expansion
// the maximum number of candidates at the same time during the last execution
int maxCandidateCount = 0;
/**
* Default constructor
*/
public AlgoTopKRules() {
}
/**
* Run the algorithm.
* @param k the value of k.
* @param minConfidence the minimum confidence threshold.
* @param database the database.
*/
public void runAlgorithm(int k, double minConfidence, Database database) {
// reset statistics
MemoryLogger.getInstance().reset(); // reset utility to check memory usage
maxCandidateCount = 0;
// save parameters
this.minConfidence = minConfidence;
this.database = database;
this.k = k;
// prepare internal variables and structures
this.minsuppRelative = 1;
tableItemTids = new BitSet[database.maxItem + 1]; // id item, count
tableItemCount = new int[database.maxItem + 1];
kRules = new PriorityQueue<RuleG>();
candidates = new RedBlackTree<RuleG>();
// record the start time
timeStart = System.currentTimeMillis();
// perform the first database scan to generate vertical database representation
scanDatabase(database);
// start the generation of rules
start();
// record the end time
timeEnd = System.currentTimeMillis();
}
/**
* Start the rule generation.
*/
private void start() {
// We will now try to generate rules with one item in the
// antecedent and one item in the consequent using
// frequent items.
// for each item I in the database
main: for (int itemI = 0; itemI <= database.maxItem; itemI++) {
// if the item is not frequent according to the current
// minsup threshold, then skip it
if (tableItemCount[itemI] < minsuppRelative) {
continue main;
}
// Get the bitset corresponding to item I
BitSet tidsI = tableItemTids[itemI];
// for each item J in the database
main2: for (int itemJ = itemI + 1; itemJ <= database.maxItem; itemJ++) {
// if the item is not frequent according to the current
// minsup threshold, then skip it
if (tableItemCount[itemJ] < minsuppRelative) {
continue main2;
}
// Get the bitset corresponding to item J
BitSet tidsJ = tableItemTids[itemJ];
// Calculate the list of transaction IDs shared
// by I and J.
// To do that with a bitset, we just do a logical AND.
BitSet commonTids = (BitSet) tidsI.clone();
commonTids.and(tidsJ);
// We keep the cardinality of the new bitset because in java
// the cardinality() method is expensive, and we will need it again later.
int support = commonTids.cardinality();
// If the rules I ==> J and J ==> I have enough support
if (support >= minsuppRelative) {
// generate rules I ==> J and J ==> I and remember these rules
// for future possible expansions
generateRuleSize11(itemI, tidsI, itemJ, tidsJ, commonTids,
support);
}
}
}
// Now we have finished checking all the rules containing 1 item
// in the left side and 1 in the right side,
// the next step is to recursively expand rules in the set
// "candidates" to find more rules.
while (candidates.size() > 0) {
// We take the rule that has the highest support first
RuleG rule = candidates.popMaximum();
// if there is no more candidates with enough support, then we stop
if (rule.getAbsoluteSupport() < minsuppRelative) {
// candidates.remove(rule);
break;
}
// Otherwise, we try to expand the rule
if (rule.expandLR) {
// we do it
expandLR(rule);
} else {
// If the rule should only be expanded by left side to
// avoid generating redundant rules, then we
// only expand the left side.
expandR(rule);
}
// candidates.remove(rule);
}
}
/**
* This method test the rules I ==> J and J ==> I for their confidence
* and record them for future expansions.
* @param itemI an item I
* @param tidI the set of IDs of transaction containing item I (BitSet)
* @param itemJ an item J
* @param tidJ the set of IDs of transaction containing item J (BitSet)
* @param commonTids the set of IDs of transaction containing I and J (BitSet)
* @param cardinality the cardinality of "commonTids"
*/
private void generateRuleSize11(Integer item1, BitSet tid1, Integer item2,
BitSet tid2, BitSet commonTids, int cardinality) {
// Create the rule I ==> J
Integer[] itemset1 = new Integer[1];
itemset1[0] = item1;
Integer[] itemset2 = new Integer[1];
itemset2[0] = item2;
RuleG ruleLR = new RuleG(itemset1, itemset2, cardinality, tid1,
commonTids, item1, item2);
// calculate the confidence
double confidenceIJ = ((double) cardinality) / (tableItemCount[item1]);
// if rule i->j has minimum confidence
if (confidenceIJ >= minConfidence) {
// save the rule in current top-k rules
save(ruleLR, cardinality);
}
// register the rule as a candidate for future expansion
registerAsCandidate(true, ruleLR);
// calculate the confidence
double confidenceJI = ((double) cardinality) / (tableItemCount[item2]);
// Create the rule J ==> I
RuleG ruleRL = new RuleG(itemset2, itemset1, cardinality, tid2,
commonTids, item2, item1);
// if rule J->I has minimum confidence
if (confidenceJI >= minConfidence) {
// save the rule in current top-k rules
save(ruleRL, cardinality);
}
// register the rule as a candidate for future expansion
registerAsCandidate(true, ruleRL);
}
/**
* Register a given rule in the set of candidates for future expansions
* @param expandLR if true the rule will be considered for left/right
* expansions otherwise only right.
* @param rule the given rule
*/
private void registerAsCandidate(boolean expandLR, RuleG rule) {
// add the rule to candidates
rule.expandLR = expandLR;
candidates.add(rule);
// record the maximum number of candidates for statistics
if (candidates.size() >= maxCandidateCount) {
maxCandidateCount = candidates.size();
}
// check the memory usage
MemoryLogger.getInstance().checkMemory();
}
/**
* Try to expand a rule by left and right expansions.
* @param ruleG the rule
*/
private void expandLR(RuleG ruleG) {
// Maps to record the potential item to expand the left/right sides of the rule
// Key: item Value: bitset indicating the IDs of the transaction containing the item
// from the transactions containing the rule.
Map<Integer, BitSet> mapCountLeft = new HashMap<Integer, BitSet>();
Map<Integer, BitSet> mapCountRight = new HashMap<Integer, BitSet>();
for (int tid = ruleG.common.nextSetBit(0); tid >= 0; tid = ruleG.common
.nextSetBit(tid + 1)) {
Iterator<Integer> iter = database.getTransactions().get(tid)
.getItems().iterator();
while (iter.hasNext()) {
Integer item = iter.next();
// CAN DO THIS BECAUSE TRANSACTIONS ARE SORTED BY DESCENDING
// ITEM IDS (see Database.Java)
if (item < ruleG.maxLeft && item < ruleG.maxRight) { //
break;
}
if (tableItemCount[item] < minsuppRelative) {
iter.remove();
continue;
}
if (item > ruleG.maxLeft
&& !ArraysAlgos.containsLEX(ruleG.getItemset2(), item,
ruleG.maxRight)) {
BitSet tidsItem = mapCountLeft.get(item);
if (tidsItem == null) {
tidsItem = new BitSet();
mapCountLeft.put(item, tidsItem);
}
tidsItem.set(tid);
}
if (item > ruleG.maxRight
&& !ArraysAlgos.containsLEX(ruleG.getItemset1(), item,
ruleG.maxLeft)) {
BitSet tidsItem = mapCountRight.get(item);
if (tidsItem == null) {
tidsItem = new BitSet();
mapCountRight.put(item, tidsItem);
}
tidsItem.set(tid);
}
}
}
// for each item c found in the previous step, we create a rule
// I ==> J U {c} if the support is enough
for (Entry<Integer, BitSet> entry : mapCountRight.entrySet()) {
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if (ruleSupport >= minsuppRelative) {
Integer itemC = entry.getKey();
// create new right part of rule
Integer[] newRightItemset = new Integer[ruleG.getItemset2().length + 1];
System.arraycopy(ruleG.getItemset2(), 0, newRightItemset, 0,
ruleG.getItemset2().length);
newRightItemset[ruleG.getItemset2().length] = itemC;
// recompute maxRight
int maxRight = (itemC >= ruleG.maxRight) ? itemC
: ruleG.maxRight;
// calculate the confidence of the rule
double confidence = ((double) ruleSupport)
/ ruleG.tids1.cardinality();
// create the rule
RuleG candidate = new RuleG(ruleG.getItemset1(),
newRightItemset, ruleSupport, ruleG.tids1, tidsRule,
ruleG.maxLeft, maxRight);
// if the confidence is enough
if (confidence >= minConfidence) {
// save the rule in current top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for future expansion
registerAsCandidate(false, candidate);
}
}
// for each item c found in the previous step, we create a rule
// I U {c} ==> J if the support is enough
for (Entry<Integer, BitSet> entry : mapCountLeft.entrySet()) {
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if (ruleSupport >= minsuppRelative) {
Integer itemC = entry.getKey();
// The tidset of the left itemset is calculated
BitSet tidsLeft = (BitSet) ruleG.tids1.clone();
tidsLeft.and(tableItemTids[itemC]);
// create new left part of rule
Integer[] newLeftItemset = new Integer[ruleG.getItemset1().length + 1];
System.arraycopy(ruleG.getItemset1(), 0, newLeftItemset, 0,
ruleG.getItemset1().length);
newLeftItemset[ruleG.getItemset1().length] = itemC;
// recompute maxLeft
int maxLeft = itemC >= ruleG.maxLeft ? itemC : ruleG.maxLeft;
// calculate the confidence of the rule
double confidence = ((double) ruleSupport)
/ tidsLeft.cardinality();
// create the rule
RuleG candidate = new RuleG(newLeftItemset,
ruleG.getItemset2(), ruleSupport, tidsLeft, tidsRule,
maxLeft, ruleG.maxRight);
// if the confidence is high enough
if (confidence >= minConfidence) {
// save the rule to the top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for further expansions
registerAsCandidate(true, candidate);
}
}
}
/**
* Try to expand a rule by right expansion only.
* @param ruleG the rule
*/
private void expandR(RuleG ruleG) {
// map to record the potential item to expand the right side of the rule
// Key: item Value: bitset indicating the IDs of the transaction containing the item
// from the transactions containing the rule.
Map<Integer, BitSet> mapCountRight = new HashMap<Integer, BitSet>();
// for each transaction containing the rule
for (int tid = ruleG.common.nextSetBit(0); tid >= 0; tid = ruleG.common
.nextSetBit(tid + 1)) {
// iterate over the items in this transaction
Iterator<Integer> iter = database.getTransactions().get(tid)
.getItems().iterator();
while (iter.hasNext()) {
Integer item = iter.next();
// if that item is not frequent, then remove it from the transaction
if (tableItemCount[item] < minsuppRelative) {
iter.remove();
continue;
}
//If the item is smaller than the largest item in the right side
// of the rule, we can stop this loop because items
// are sorted in lexicographical order.
if (item < ruleG.maxRight) {
break;
}
// if the item is larger than the maximum item in the right side
// and is not contained in the left side of the rule
if (item > ruleG.maxRight
&& !ArraysAlgos.containsLEX(ruleG.getItemset1(), item,
ruleG.maxLeft)) {
// update the tidset of the item
BitSet tidsItem = mapCountRight.get(item);
if (tidsItem == null) {
tidsItem = new BitSet();
mapCountRight.put(item, tidsItem);
}
tidsItem.set(tid);
}
}
}
// for each item c found in the previous step, we create a rule
// I ==> J U {c} if the support is enough
for (Entry<Integer, BitSet> entry : mapCountRight.entrySet()) {
BitSet tidsRule = entry.getValue();
int ruleSupport = tidsRule.cardinality();
// if the support is enough
if (ruleSupport >= minsuppRelative) {
Integer itemC = entry.getKey();
// create new right part of rule
Integer[] newRightItemset = new Integer[ruleG.getItemset2().length + 1];
System.arraycopy(ruleG.getItemset2(), 0, newRightItemset, 0,
ruleG.getItemset2().length);
newRightItemset[ruleG.getItemset2().length] = itemC;
//recompute maxRight
int maxRight = itemC >= ruleG.maxRight ? itemC : ruleG.maxRight;
// calculate confidence
double confidence = ((double) ruleSupport)
/ ruleG.tids1.cardinality();
// create the rule
RuleG candidate = new RuleG(ruleG.getItemset1(),
newRightItemset, ruleSupport, ruleG.tids1, tidsRule,
ruleG.maxLeft, maxRight);
// if the confidence is enough
if (confidence >= minConfidence) {
// save the rule to the current top-k rules
save(candidate, ruleSupport);
}
// register the rule as a candidate for future expansion(s)
registerAsCandidate(false, candidate);
}
}
}
/**
* Save a rule to the current set of top-k rules.
* @param rule the rule to be saved
* @param support the support of the rule
*/
private void save(RuleG rule, int support) {
// We add the rule to the set of top-k rules
kRules.add(rule);
// if the size becomes larger than k
if (kRules.size() > k) {
// if the support of the rule that we haved added is higher than
// the minimum support, we will need to take out at least one rule
if (support > this.minsuppRelative) {
// we recursively remove the rule having the lowest support,
// until only k rules are left
do {
kRules.poll();
} while (kRules.size() > k);
}
// we raise the minimum support to the lowest support in the
// set of top-k rules
this.minsuppRelative = kRules.peek().getAbsoluteSupport();
}
}
/**
* Method to scan the database to create the vertical database.
* @param database a database of type Database.
*/
private void scanDatabase(Database database) {
// for each transaction
for (int j = 0; j < database.getTransactions().size(); j++) {
Transaction transaction = database.getTransactions().get(j);
// for each item in the current transaction
for (Integer item : transaction.getItems()) {
// update the tidset of this item (represented by a bitset.
BitSet ids = tableItemTids[item];
if (ids == null) {
tableItemTids[item] = new BitSet(database.tidsCount);
}
tableItemTids[item].set(j);
// update the support of this item
tableItemCount[item] = tableItemCount[item] + 1;
}
}
}
/**
* Print statistics about the last algorithm execution.
*/
public void printStats() {
System.out.println("============= TOP-K RULES - STATS =============");
System.out.println("Minsup : " + minsuppRelative);
System.out.println("Rules count: " + kRules.size());
System.out.println("Memory : " + MemoryLogger.getInstance().getMaxMemory() + " mb");
System.out.println("Total time : " + (timeEnd - timeStart) + " ms");
System.out
.println("===================================================");
}
/**
* Write the rules found to an output file.
* @param path the path to the output file
* @throws IOException exception if an error while writing the file
*/
public void writeResultTofile(String path) throws IOException {
// Prepare the file
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
// sort the rules in sorted order before printing them
// because the Iterator from Java on a priority queue do not
// show the rules in priority order unfortunately (even though
// they are sorted in the priority queue.
Object[] rules = kRules.toArray();
Arrays.sort(rules);
// for each rule
for(Object ruleObj : rules){
RuleG rule = (RuleG) ruleObj;
// Write the rule
StringBuilder buffer = new StringBuilder();
buffer.append(rule.toString());
// write separator
buffer.append(" #SUP: ");
// write support
buffer.append(rule.getAbsoluteSupport());
// write separator
buffer.append(" #CONF: ");
// write confidence
buffer.append(rule.getConfidence());
writer.write(buffer.toString());
writer.newLine();
}
// close the file
writer.close();
}
}