package ca.pfv.spmf.algorithms.frequentpatterns.upgrowth_ihup;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This is an implementation of the UPGrowth algorithm.<\br><\br>
*
* Copyright (c) 2014 Prashant Barhate <\br><\br>
*
* The UP-Growth algorithm was proposed in this paper: <\br><\br>
*
* V. S. Tseng, C.-W. Wu, B.E. Shie, P. S. Yu:
* UP-Growth: an efficient algorithm for high utility itemset mining.
* KDD 2010: 253-262 <\br><\br>
*
* This file is part of the SPMF DATA MINING SOFTWARE *
* (http://www.philippe-fournier-viger.com/spmf). <\br><\br>
*
* SPMF is free software: you can redistribute it and/or modify it under the *
* terms of the GNU General Public License as published by the Free Software *
* Foundation, either version 3 of the License, or (at your option) any later *
* version. SPMF is distributed in the hope that it will be useful, but WITHOUT ANY *
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details. <\br><\br>
*
* You should have received a copy of the GNU General Public License along with SPMF.
* If not, see <http://www.gnu.org/licenses/>. <\br><\br>
*
* @author Prashant Barhate
*/
public class AlgoUPGrowth {
// variable for statistics
private double maxMemory = 0; // the maximum memory usage
private long startTimestamp = 0; // the time the algorithm started
private long endTimestamp = 0; // the time the algorithm terminated
private int huiCount = 0; // the number of HUIs generated
private int phuisCount; // the number of PHUIs generated
// map for minimum node utility during DLU(Decreasing Local Unpromizing
// items) strategy
private Map<Integer, Integer> mapMinimumItemUtility;
// writer to write the output file
private BufferedWriter writer = null;
// Structure to store the potential HUIs
private List<Itemset> phuis = new ArrayList<Itemset>();
// To activate debug mode
private final boolean DEBUG = false;
/**
* Method to run the algorithm
*
* @param input path to an input file
* @param output path for writing the output file
* @param minUtility the minimum utility threshold
* @throws IOException exception if error while reading or writing the file
*/
public void runAlgorithm(String input, String output, int minUtility)
throws IOException {
maxMemory = 0;
startTimestamp = System.currentTimeMillis();
writer = new BufferedWriter(new FileWriter(output));
// We create a map to store the TWU of each item
final Map<Integer, Integer> mapItemToTWU = new HashMap<Integer, Integer>();
// ******************************************
// First database scan to calculate the TWU of each item.
BufferedReader myInput = null;
String thisLine;
try {
// prepare the object for reading the file
myInput = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(input))));
// for each line (transaction) until the end of file
while ((thisLine = myInput.readLine()) != null) {
// if the line is a comment, is empty or is a kind of metadata
if (thisLine.isEmpty() == true || thisLine.charAt(0) == '#'
|| thisLine.charAt(0) == '%' || thisLine.charAt(0) == '@') {
continue;
}
// split the transaction according to the : separator
String split[] = thisLine.split(":");
// the first part is the list of items
String items[] = split[0].split(" ");
// the second part is the transaction utility
int transactionUtility = Integer.parseInt(split[1]);
// for each item, we add the transaction utility to its TWU
for (int i = 0; i < items.length; i++) {
// convert item to integer
Integer item = Integer.parseInt(items[i]);
// get the current TWU of that item
Integer twu = mapItemToTWU.get(item);
// add the utility of the item in the current transaction to its twu
twu = (twu == null) ? transactionUtility : twu + transactionUtility;
mapItemToTWU.put(item, twu);
}
}
} catch (Exception e) {
// catches exception if error while reading the input file
e.printStackTrace();
} finally {
if (myInput != null) {
myInput.close();
}
}
// ******************************************
// second database scan generate revised transaction and global UP-Tree
// and calculate the minimum utility of each item
// (required by the DLU(Decreasing Local Unpromizing items) strategy)
mapMinimumItemUtility = new HashMap<Integer, Integer>();
try {
UPTree tree = new UPTree();
// prepare the object for reading the file
myInput = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(input))));
// Transaction ID to track transactions
// for each line (transaction) until the end of file
while ((thisLine = myInput.readLine()) != null) {
// if the line is a comment, is empty or is a kind of metadata
if (thisLine.isEmpty() == true || thisLine.charAt(0) == '#'
|| thisLine.charAt(0) == '%' || thisLine.charAt(0) == '@') {
continue;
}
// split the line according to the separator
String split[] = thisLine.split(":");
// get the list of items
String items[] = split[0].split(" ");
// get the list of utility values corresponding to each item
// for that transaction
String utilityValues[] = split[2].split(" ");
int remainingUtility = 0;
// Create a list to store items
List<Item> revisedTransaction = new ArrayList<Item>();
// for each item
for (int i = 0; i < items.length; i++) {
// convert values to integers
int itm = Integer.parseInt(items[i]);
int utility = Integer.parseInt(utilityValues[i]);
if (mapItemToTWU.get(itm) >= minUtility) {
Item element = new Item(itm, utility);
// add it
revisedTransaction.add(element);
remainingUtility += utility;
// get the current Minimum Item Utility of that item
Integer minItemUtil = mapMinimumItemUtility.get(itm);
// Minimum Item Utility is utility of Transaction T if there
// does not exist Transaction T' such that utility(T')<
// utility(T)
if ((minItemUtil == null) || (minItemUtil >= utility)) {
mapMinimumItemUtility.put(itm, utility);
}
// prepare object for garbage collection
element = null;
}
}
// revised transaction in desceding order of TWU
Collections.sort(revisedTransaction, new Comparator<Item>() {
public int compare(Item o1, Item o2) {
return compareItemsDesc(o1.name, o2.name, mapItemToTWU);
}
});
// add transaction to the global UP-Tree
tree.addTransaction(revisedTransaction, remainingUtility);
}
// We create the header table for the global UP-Tree
tree.createHeaderList(mapItemToTWU);
// check the memory usage
checkMemory();
if(DEBUG) {
System.out.println("GLOBAL TREE" + "\nmapITEM-TWU : " +mapItemToTWU +
"\nmapITEM-MINUTIL : " +mapMinimumItemUtility + "\n" + tree.toString());
}
// Mine tree with UPGrowth with 2 strategies DLU and DLN
upgrowth(tree, minUtility, new int[0]);
// check the memory usage again and close the file.
checkMemory();
} catch (Exception e) {
// catches exception if error while reading the input file
e.printStackTrace();
} finally {
if (myInput != null) {
myInput.close();
}
}
// save the number of candidate found
phuisCount = phuis.size();
// ******************************************
// Third database scan to calculate the
// exact utility of each PHUIs and output those that are HUIS.
// First sort the PHUIs by size for optimization
Collections.sort(phuis, new Comparator<Itemset>() {
public int compare(Itemset arg0, Itemset arg1) {
return arg0.size() - arg1.size();
}}
);
try {
// prepare the object for reading the file
myInput = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(input))));
// for each line (transaction) until the end of file
while ((thisLine = myInput.readLine()) != null) {
// if the line is a comment, is empty or is a kind of metadata
if (thisLine.isEmpty() == true || thisLine.charAt(0) == '#'
|| thisLine.charAt(0) == '%' || thisLine.charAt(0) == '@') {
continue;
}
// split the line according to the separator
String split[] = thisLine.split(":");
// get the list of items
String items[] = split[0].split(" ");
// get the list of utility values corresponding to each item
// for that transaction
String utilityValues[] = split[2].split(" ");
// Create a list to store items
List<Item> revisedTransaction = new ArrayList<Item>();
// for each item
for (int i = 0; i < items.length; i++) {
// / convert values to integers
int item = Integer.parseInt(items[i]);
int utility = Integer.parseInt(utilityValues[i]);
Item element = new Item(item, utility);
if (mapItemToTWU.get(item) >= minUtility) {
revisedTransaction.add(element);
}
}
// sort the transaction by lexical order
// for faster comparison since PHUIs have been sorted
// by lexical order and this will make faster
// comparison
Collections.sort(revisedTransaction, new Comparator<Item>() {
public int compare(Item o1, Item o2) {
return o1.name - o2.name;
}});
// Compare each itemset with the transaction
for(Itemset itemset : phuis){
// OPTIMIZATION:
// if this itemset is larger than the current transaction
// it cannot be included in the transaction, so we stop
// and we don't need to consider the folowing itemsets
// either since they are ordered by increasing size.
if(itemset.size() > revisedTransaction.size()) {
break;
}
// Now check if itemset is included in the transaction
// and if yes, update its utility
updateExactUtility(revisedTransaction, itemset);
}
}
} catch (Exception e) {
e.printStackTrace();
}
// OUTPUT ALL HUIs
for(Itemset itemset : phuis) {
if(itemset.getExactUtility() >= minUtility) {
writeOut(itemset);
}
}
// check the memory usage again
checkMemory();
// record end time
endTimestamp = System.currentTimeMillis();
// Release some memory
phuis.clear();
mapMinimumItemUtility = null;
// CLOSE OUTPUT FILE
writer.close();
}
private int compareItemsDesc(int item1, int item2, Map<Integer, Integer> mapItemEstimatedUtility) {
int compare = mapItemEstimatedUtility.get(item2) - mapItemEstimatedUtility.get(item1);
// if the same, use the lexical order otherwise use the TWU
return (compare == 0) ? item1 - item2 : compare;
}
/**
* Mine UP Tree recursively
*
* @param tree UPTree to mine
* @param minUtility minimum utility threshold
* @param prefix the prefix itemset
*/
private void upgrowth(UPTree tree, int minUtility, int[] prefix) throws IOException {
// For each item in the header table list of the tree in reverse order.
for (int i = tree.headerList.size() - 1; i >= 0; i--) {
// get the item
Integer item = tree.headerList.get(i);
// ===== CREATE THE LOCAL TREE =====
UPTree localTree = createLocalTree(minUtility, tree, item);
// NEXT LINE IS FOR DEBUGING:
if(DEBUG) {
System.out.println("LOCAL TREE for projection by:" + ((prefix== null)?"": Arrays.toString(prefix)+ ",") + item +
"\n" + localTree.toString());
}
// ===== CALCULATE SUM OF ITEM NODE UTILITY =====
// take node from bottom of header table
UPNode pathCPB = tree.mapItemNodes.get(item);
// take item
// int itemCPB = pathCPB.itemID;
int pathCPBUtility = 0;
while (pathCPB != null) {
// sum of items node utility
pathCPBUtility += pathCPB.nodeUtility;
pathCPB = pathCPB.nodeLink;
}
// if path utility of 'item' in header table is greater than
// minUtility
// then 'item' is a PHUI (Potential high utility itemset)
if (pathCPBUtility >= minUtility) {
// Create the itemset by appending the item to the current prefix
// This gives us a PHUI
int[] newPrefix = new int[prefix.length+1];
System.arraycopy(prefix, 0, newPrefix, 0, prefix.length);
newPrefix[prefix.length] = item;
// Save the PHUI
savePHUI(newPrefix);
// Make a recursive call to the UPGrowth procedure to explore
// other itemsets that are extensions of the current PHUI
if(localTree.headerList.size() >0) {
upgrowth(localTree, minUtility, newPrefix);
}
// for (int j = localTree.headerList.size() - 1; j >= 0; j--) {
//
// // get the item
// Integer headerItem = localTree.headerList.get(j);
// int pathUtility = 0;
//
// UPNode itempath = localTree.mapItemNodes
// .get(headerItem);
//
// UPNode itempathcopy = itempath;// copy for traversing up to
// // last to calculate node
// // utility
// while (itempathcopy != null) {
//
// pathUtility += itempathcopy.nodeUtility;
// itempathcopy = itempathcopy.nodeLink;
// }
//
// // check for item whether is promising or not
// if (pathUtility >= minUtility) {
//
// // list to add High Utility Itemset
// List<Integer> PHUI = new ArrayList<Integer>();
//
// // find out set of items with traversing pattern base
// // for that item
// while (itempath != null) {
//
// int nodeutility = itempath.nodeUtility;
// // we check if node utility ids greater than minimum
// // utility
// if (nodeutility >= minUtility) {
//
// // if so then we add item in High utility
// // itemset
// PHUI.add(itemCPB);
//
// // HUI.add(itempath.itemID);
// while (itempath.itemID != -1) {
//
// if (itempath.nodeUtility >= minUtility) {
// PHUI.add(itempath.itemID);
// }
// itempath = itempath.parent;
// }
//
// savePHUI(PHUI); // method to save PHUI
//
// }
//
// itempath = itempath.nodeLink;
//
// }
// }
//
// }
}
}
}
private UPTree createLocalTree(int minUtility, UPTree tree, Integer item) {
// === Construct conditional pattern base ===
// It is a subdatabase which consists of the set of prefix paths
List<List<UPNode>> prefixPaths = new ArrayList<List<UPNode>>();
UPNode path = tree.mapItemNodes.get(item);
// map to store path utility of local items in CPB
final Map<Integer, Integer> itemPathUtility = new HashMap<Integer, Integer>();
while (path != null) {
// get the Node Utiliy of the item
int nodeutility = path.nodeUtility;
// if the path is not just the root node
if (path.parent.itemID != -1) {
// create the prefixpath
List<UPNode> prefixPath = new ArrayList<UPNode>();
// add this node.
prefixPath.add(path); // NOTE: we add it just to keep its // utility,
// actually it should not be part of the prefixPath
// Recursively add all the parents of this node.
UPNode parentnode = path.parent;
while (parentnode.itemID != -1) {
prefixPath.add(parentnode);
// pu - path utility
Integer pu = itemPathUtility.get(parentnode.itemID);
pu = (pu == null) ? nodeutility : pu + nodeutility;
itemPathUtility.put(parentnode.itemID, pu);
parentnode = parentnode.parent;
}
// add the path to the list of prefixpaths
prefixPaths.add(prefixPath);
}
// We will look for the next prefixpath
path = path.nodeLink;
}
if(DEBUG) {
System.out.println("\n\n\nPREFIXPATHS:");
for (List<UPNode> prefixPath : prefixPaths) {
for(UPNode node : prefixPath) {
System.out.println(" " +node);
}
System.out.println(" --");
}
}
// Calculate the Utility of each item in the prefixpath
UPTree localTree = new UPTree();
// for each prefixpath
for (List<UPNode> prefixPath : prefixPaths) {
// the Utility of the prefixpath is the node utility of its
// first node.
int pathCount = prefixPath.get(0).count;
int pathUtility = prefixPath.get(0).nodeUtility;
List<Integer> localPath = new ArrayList<Integer>();
// for each node in the prefixpath,
// except the first one, we count the frequency
for (int j = 1; j < prefixPath.size(); j++) {
int itemValue = 0; // It store multiplication of minimum
// item utility and pathcount
// for each node in prefixpath
UPNode node = prefixPath.get(j);
// Here is DLU Strategy #################
// we check whether local item is promising or not
if (itemPathUtility.get(node.itemID) >= minUtility) {
localPath.add(node.itemID);
} else { // If item is unpromising then we recalculate path
// utillity
Integer minItemUtility = mapMinimumItemUtility
.get(node.itemID);
itemValue = minItemUtility * pathCount;
}
pathUtility = pathUtility - itemValue;
}
if(DEBUG) {
System.out.println(" path utility after DGU,DGN,DLU: " + pathUtility);
}
// we reorganize local path in decending order of path utility
Collections.sort(localPath, new Comparator<Integer>() {
public int compare(Integer o1, Integer o2) {
// compare the TWU of the items
return compareItemsDesc(o1, o2, itemPathUtility);
}
});
// create tree for conditional pattern base
localTree.addLocalTransaction(localPath, pathUtility,
mapMinimumItemUtility, pathCount);
}
// We create the local header table for the tree item - CPB
localTree.createHeaderList(itemPathUtility);
return localTree;
}
/**
* Save a PHUI in the list of PHUIs
* @param itemset the itemset
*/
private void savePHUI(int[] itemset) {
// Create an itemset object and store it in the list of pHUIS
Itemset itemsetObj = new Itemset(itemset);
// Sort the itemset by lexical order to faster calculate its
// exact utility later on.
Arrays.sort(itemset);
// add the itemset to the list of PHUIs
phuis.add(itemsetObj);
}
/**
* Update the exact utility of an itemset given a transaction
* It assumes that itemsets are sorted according to the lexical order.
* @param itemset1 the first itemset
* @param itemset2 the second itemset
* @return true if the first itemset contains the second itemset
*/
public void updateExactUtility(List<Item> transaction, Itemset itemset){
int utility = 0;
// for each item in the itemset
loop1: for(int i =0; i < itemset.size(); i++){
Integer itemI = itemset.get(i);
// for each item in the transaction
for(int j =0; j < transaction.size(); j++){
Item itemJ = transaction.get(j);
// if the current item in transaction is equal to the one in itemset
// search for the next one in itemset1
if(itemJ.name == itemI){
utility += transaction.get(j).utility;
continue loop1;
}
// if the current item in itemset1 is larger
// than the current item in itemset2, then
// stop because of the lexical order.
else if(itemJ.name > itemI){
return;
}
}
// means that an item was not found
return;
}
// if all items were found, increase utility.
itemset.increaseUtility(utility);
}
/**
* Write a HUI to the output file
* @param HUI
* @param utility
* @throws IOException
*/
private void writeOut(Itemset HUI) throws IOException {
huiCount++; // increase the number of high utility itemsets found
//Create a string buffer
StringBuilder buffer = new StringBuilder();
//Append each item
for (int i = 0; i < HUI.size(); i++) {
buffer.append(HUI.get(i));
buffer.append(' ');
}
buffer.append("#UTIL: ");
buffer.append(HUI.getExactUtility());
// write to file
writer.write(buffer.toString());
writer.newLine();
}
/**
* Method to check the memory usage and keep the maximum memory usage.
*/
private void checkMemory() {
// get the current memory usage
double currentMemory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1024d / 1024d;
// if higher than the maximum until now replace the maximum with the current memory usage
if (currentMemory > maxMemory) {
maxMemory = currentMemory;
}
}
/**
* Print statistics about the latest execution to System.out.
*/
public void printStats() {
System.out.println("============= UP-GROWTH ALGORITHM - STATS =============");
System.out.println(" PHUIs (candidates) count: " + phuisCount);
System.out.println(" Total time ~ " + (endTimestamp - startTimestamp) + " ms");
System.out.println(" Memory ~ " + maxMemory + " MB");
System.out.println(" HUIs count : " + huiCount);
System.out.println("===================================================");
}
}