package ca.pfv.spmf.algorithms.frequentpatterns.estDec;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
/**
* This is an implementation of a estTree. <br/>
* <br/>
*
* This implementation was made by Azadeh Soltani <br/>
* <br/>
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf). <br/>
* <br/>
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version. <br/>
* <br/>
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details. <br/>
* <br/>
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @see Algo_estDec
* @see estNode
* @author Azadeh Soltani
*/
public class estTree {
double N; // |Dk|
double d; // decay rate
int k; // current tid
// itemset count
int patternCount =0;
// Hashtable for storing frequent patterns into memory
Hashtable<int[], Double> patterns;
// writer used if result is saved to file
BufferedWriter writer;
double minsup;
double minsig;
estNode root; // tree root
int[] itemsetBuffer = new int[500];
/**
* Constructor
* @param minSigValue minsig parameter
*/
estTree(double mins, double minSigValue) {
// default decay rate
setDecayRate(2, 10000);
N = 0;
k = 0;
minsup = mins;
minsig = minSigValue;
System.out.println(" " + minsup + " " + minsig);
root = new estNode(); // null node
}
/**
* Set the decay rate
* @param b decay base
* @param h decay-base life
*/
void setDecayRate(double b, double h) {
d = Math.pow(b, -1.0 / h);
}
/**
* Method for updating parameters (Phase 1: parameter updating phase)
* @param transaction
*/
void updateParams(int[] transaction) {
// |Dk| = |Dk| x d + 1
N = N * d + 1;
k++;
updateNodes(root, transaction, 0);
}
/********************************************************************
* Recursive method for updating the counters of itemsets that
* belong to a given transaction (Phase 2: count updating phase).
*
* @param currentNode a tree node
* @param transaction the transaction for updating
* @param ind depth of the branch ending at the current node
********************************************************************/
void updateNodes(estNode currentNode, int[] transaction, int ind) {
// stop recursion
if (ind >= transaction.length)
return;
// get item at position "ind" in the transaction
int item = transaction[ind];
// look if there is a node for this item in the est-Tree
estNode child = currentNode.getChildWithID(item);
if (child != null) {
// update count of the node
child.update(k, 1, d);
// if the support is enough
if (child.computeSupport(N) >= minsig)
updateNodes(child, transaction, ind + 1);
else {
/* PFV: WHY NOT REMOVE?
* currentNode.children.remove(child); else
*/
}
}
updateNodes(currentNode, transaction, ind + 1);
}
/********************************************************************
* Method for inserting a new item to the tree (itemset of size 1).
*
* @param it the item
********************************************************************/
void insertItem(Integer it) {
// create the node with a count of 0
double c = 0;// (getN(k-1)*minsig)*d+1;
root.children.add(new estNode(it, c, k));
}
/********************************************************************
* Method for inserting new possible frequent itemsets to the tree based on
* the new transaction (Phase 3 : Delayed Insertion Phase).
*
* @param transaction the new transaction
********************************************************************/
void insertItemset(int[] transaction) {
// create a new transaction
List<Integer> transaction2 = new ArrayList<Integer>();
// add each item from the given transaction that has enough support to the new transaction
for (int it : transaction) {
estNode child = root.getChildWithID(it);
if (child == null)
insertItem(it);
else if (child.computeSupport(N) >= minsig)
transaction2.add(it);
}
// insert the new transaction
// func 1
//insert_n_itemsets(root, transaction2, 0, new int[0]);
//func2
/*int[] itemset = new int[1];
for (int i = 0; i < transaction2.size(); ++i) {
itemset[0] = transaction2.get(i);
insert_n_itemsets2(root.getChildWithID(transaction2.get(i)), transaction2, i + 1, itemset);
}*/
//func3
for (int i = 0; i < transaction2.size(); ++i) {
itemsetBuffer[0] = transaction2.get(i);
insert_n_itemsets3(root.getChildWithID(transaction2.get(i)), transaction2, i + 1, itemsetBuffer,1);
}
}
/********************************************************************
* Method for calculating |D|k
*
* @param n
********************************************************************/
double getN(int k) {
return (1 - Math.pow(d, k)) / (1 - d);
}
/********************************************************************
* Method for obtaining the count of an itemset while ignoring
* an item at a given position.
*
* @param itemset the itemset
* @param pos the index of the item to be ignored in the itemset
********************************************************************/
double getCountOfItemsetWithoutItemAtPosition(int[] itemset,int length, int pos) {
// stop recursion
estNode currentNode = root;
for (int i=0; i< length; i++) {
if(i != pos) {
int item = itemset[i];
estNode child = currentNode.getChildWithID(item);
if (child == null)
return 0;
currentNode = child;
}
}
return currentNode.counter;
}
/********************************************************************
* Method for estimating the count of n-itemset from its n-1 subsets
*
* @param currentNode
* , transaction, index
********************************************************************/
double estimateCount(int[] itemset,int length) {
double min = Double.MAX_VALUE;
for (int i = 0; i < length; ++i) {
double c = getCountOfItemsetWithoutItemAtPosition(itemset,length, i);
if (c < min)
min = c;
}
double C_upper = minsig * getN(k - (length - 1)) * Math.pow(d, length - 1)
+ (1 - Math.pow(d, length - 1)) / (1 - d);
if (min > C_upper)
min = C_upper;
return min;
}
/********************************************************************
* Recursive method for inserting all itemsets corresponding to the a transaction
*
* @param currentNode a tree node
* @param transaction the transaction
* @param the depth of the current node with respect to the root of the tree
* @param itemset
********************************************************************/
public void insert_n_itemsets(estNode currentNode,
List<Integer> transaction, int ind, int[] itemset) {
// stop recursion
if (ind >= transaction.size())
return;
Integer item = transaction.get(ind);
// look if there is a node already in the est-Tree
estNode child = currentNode.getChildWithID(item);
// Itemset2 := itemset U item
int[] itemset2 = new int[itemset.length+1];
System.arraycopy(itemset, 0, itemset2, 0, itemset.length);
itemset2[itemset2.length-1] = item;
if (child == null) {
double c = estimateCount(itemset2,itemset2.length);
// if its estimated support is greater than minsig insert a new node
// with itemId=item counter=c, tid=k
if (c / N >= minsig) {
child = new estNode(item, c, k);
currentNode.children.add(child);
}
}// if child
else {
if (child.counter / N < minsig) {
// if its support is less than minsig delete the node
if (currentNode.itemID != -1)
currentNode.children.remove(currentNode
.getChildIndexWithID(item));
} else {
// if its support is greater than minsig continue the recursion
// with this subtree
insert_n_itemsets(child, transaction, ind + 1, itemset2);
}
}
insert_n_itemsets(currentNode, transaction, ind + 1, itemset);
}
/********************************************************************
* Recursive method for inserting all itemsets corresponding to the a transaction
*
* @param currentNode a tree node
* @param transaction the transaction
* @param the depth of the current node with respect to the root of the tree
* @param itemset
********************************************************************/
public void insert_n_itemsets2(estNode currentNode,
List<Integer> transaction, int ind, int[] itemset) {
// stop recursion
if (ind >= transaction.size())
return;
int[] itemset2 = new int[itemset.length + 1];
System.arraycopy(itemset, 0, itemset2, 0, itemset.length);
for (int i=ind;i<transaction.size();++i){
int item = transaction.get(i);
// Itemset2 := itemset U item
itemset2[itemset2.length-1]=item;
// look if there is a node already in the est-Tree
estNode child = currentNode.getChildWithID(item);
if (child == null) {
double c = estimateCount(itemset2,itemset2.length);
// if its estimated support is greater than minsig insert a new node
// with itemId=item counter=c, tid=k
if (c / N >= minsig) {
child = new estNode(item, c, k);
currentNode.children.add(child);
}
}// if child
else
if (child.counter / N < minsig) {
// if its support is less than minsig delete the node
if (currentNode.itemID != -1)
currentNode.children.remove(currentNode
.getChildIndexWithID(item));
} else {
// if its support is greater than minsig continue the recursion
// with this subtree
insert_n_itemsets2(child, transaction, i + 1, itemset2);
}
}//for
}
//********************************************************************
public void insert_n_itemsets3(estNode currentNode,
List<Integer> transaction, int ind, int[] itemset, int length) {
// stop recursion
if (ind >= transaction.size())
return;
for (int i=ind;i<transaction.size();++i){
int item = transaction.get(i);
itemset[length]=item;
// look if there is a node already in the est-Tree
estNode child = currentNode.getChildWithID(item);
if (child == null) {
double c = estimateCount(itemset,length+1);
// if its estimated support is greater than minsig insert a new node
// with itemId=item counter=c, tid=k
if (c / N >= minsig) {
child = new estNode(item, c, k);
currentNode.children.add(child);
}
}// if child
else
if (child.counter / N < minsig) {
// if its support is less than minsig delete the node
if (currentNode.itemID != -1)
currentNode.children.remove(currentNode
.getChildIndexWithID(item));
} else {
// if its support is greater than minsig continue the recursion
// with this subtree
insert_n_itemsets3(child, transaction, i + 1, itemset,length+1);
}
}//for
}
/********************************************************************
/********************************************************************
* Method for force pruning
*
* @param root t
********************************************************************/
void forcePruning(estNode root) {
for (int i = 0; i < root.children.size(); ++i) {
estNode node = root.children.get(i);
node.update(k, 0, d);
if (node.computeSupport(N) < minsig && root.itemID != -1)
root.children.remove(i--);
else
forcePruning(node);
}
}
/********************************************************************
* Recursive method for finding frequent patterns.
* @param root root of the current subtree
* @param pattern current pattern
* @throws IOException
********************************************************************/
void patternMining(estNode root, int[] pattern, int patternLength) throws IOException {
// We increase the pattern length for extensions of "pattern"
// by adding a single item
int newPatternLength = patternLength + 1;
// For each children
for (estNode node : root.children) {
node.update(k, 0, d);
// if the estimated support is enough
double s = node.computeSupport(N);
if (s > minsup) {
// Create a new pattern by concatening
// itemset "pattern" with the first item of node
pattern[patternLength] = node.itemID;
patternCount++;
// if store into file
if(patterns == null) {
writeItemset(pattern, newPatternLength, s);
}else {
// else, if store into memory, we make a copy of the pattern
// because until now, it was stored in a temporary array
int[] patt2 = new int[patternLength+1];
System.arraycopy(pattern, 0, patt2, 0, newPatternLength);
// then we add the pattern to the result set
patterns.put(patt2, s);
}
// recursive call to find larger patterns
patternMining(node, pattern, newPatternLength);
}
}
}
/********************************************************************
* Method for finding frequent patterns and save them into memory
* @param root root of the current subtree
********************************************************************/
Hashtable<int[], Double> patternMining_saveToMemory() throws IOException {
// Initialize hashtable for storing frequent patterns into memory
patterns = new Hashtable<int[], Double>();
patternCount = 0;
// recursive method for pattern mining
patternMining(root, itemsetBuffer, 0);
return patterns; // return patterns found
}
/********************************************************************
* Method for finding frequent patterns and save them into file
* @param root the root of the curent subtree
* @param outputPath the output file path
* @throws IOException
********************************************************************/
void patternMining_saveToFile(String outputPath) throws IOException {
patterns = null; // because we will not save into memory
writer = new BufferedWriter(new FileWriter(outputPath));
patternCount = 0;
// recursive method for pattern mining
patternMining(root, itemsetBuffer, 0);
writer.close();
}
/********************************************************************
* Method for writing frequent patterns in output file
* @param itemset the pattern to be saved
* @param support a double value
********************************************************************/
void writeItemset(int[] itemset, int patternLength, double support) throws IOException {
StringBuilder buffer = new StringBuilder();
// for each item
for (int i= 0; i < patternLength; i++) {
// write the item
buffer.append(itemset[i]);
buffer.append(" ");
}
// write the support
buffer.append("#SUP: ");
buffer.append(support);
writer.write(buffer.toString());
writer.newLine();
}
/**
* Get the last transaction id
* @return the transaction id (integer)
*/
int getK() {
return k;
}
int nodeCount(estNode currentNode) {
int s=1;
for (estNode child: currentNode.children)
s+= nodeCount(child);
return s;
}
}// class