package ca.pfv.spmf.algorithms.frequentpatterns.estDec; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Hashtable; import ca.pfv.spmf.tools.MemoryLogger; /** * This is an implementation of the estDecPlus algorithm * <br/> * * Se Jung Shin , Dae Su Lee , Won Suk Lee, �CP-tree: An adaptive synopsis structure * for compressing frequent itemsets over online data streams�, * Information Sciences,Volume 278, 10 September 2014, Pages 559�576<br/> * * This implementation was made by Azadeh Soltani <br/> * <br/> * * Copyright (c) 2008-2012 Azadeh Soltani, Philippe Fournier-Viger <br/> * <br/> * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). <br/> * <br/> * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. <br/> * <br/> * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. <br/> * <br/> * * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. * * @see CPTreeNode * @see CPTree * @author Azadeh Soltani */ public class Algo_estDecPlus { // the Compressible Prefix tree CPTree tree; // the number of transactions int transactionCount = 0; // the total time for mining (for stats) private long miningTime = 0; // the total time for transaction insertion (for stats) double sumTransactionInsertionTime = 0; /** * Constructor * * @param mins the minimum support threshold as a value in [0,1] representing a percentage * @param d the decay parameter * @param minSigValue the minSig parameter * @param deltaValue the delta value * @param minMergeValue the minMerge parameter */ public Algo_estDecPlus(double mins, double d, double minSigValue, double deltaValue, double minMergeValue) { // Reset memory logger MemoryLogger.getInstance().reset(); // create the "Monitoring Lattice" tree tree = new CPTree(d, mins, minSigValue, deltaValue, minMergeValue); System.out.println(" Parameters : minsup: " + mins + " minSig: "+minSigValue+ " minmerge: "+minMergeValue+ " delta: "+deltaValue); } /** * A method to set the decay rate "d" using the "b" and "h" parameter (see the * EstDec article) * @param b decay base * @param h decay-base life */ public void setDecayRate(double b, double h) { tree.setDecayRate(b,h); } /** * Run the algorithm by loading the transactions from an input file. * * @param input the input file path * @param output the output file path for saving the result * @param mins the minsup threshold as a double value in [0, 1] * @throws FileNotFoundException if error opening the input file * @throws IOException if error reading/writing files */ public void processTransactionFromFile(String input) throws FileNotFoundException, IOException { // read the input file BufferedReader reader = new BufferedReader(new FileReader(input)); String line; // int n=0; // for each line (transaction) while (((line = reader.readLine()) != null)) { // split the line into an array of strings (items) String[] lineSplited = line.split(" "); // convert the array of strings to an array of integers (items) int[] transaction = new int[lineSplited.length]; for (int i = 0; i < lineSplited.length; i++) { transaction[i] = Integer.parseInt(lineSplited[i]); } //sort the transaction // Arrays.sort(transaction); // process the transaction // if (transaction.length<40) processTransaction(transaction); // else // n++; }// while // System.out.println("Transaction read from file:" + n); reader.close(); } /** * Run the algorithm by loading the transactions from an input file. * * @param input the input file path * @param output the output file path for saving the result * @param mins the minsup threshold as a double value in [0, 1] * @param lineCount the number of lines to be read * @throws FileNotFoundException if error opening the input file * @throws IOException if error reading/writing files */ public void processTransactionFromFile(String input, int numberOfLines) throws FileNotFoundException, IOException { // read the input file BufferedReader reader = new BufferedReader(new FileReader(input)); String line; int n=0; // for each line (transaction) while (((line = reader.readLine()) != null) && n < numberOfLines) { // split the line into an array of strings (items) String[] lineSplited = line.split(" "); // convert the array of strings to an array of integers (items) int[] transaction = new int[lineSplited.length]; for (int i = 0; i < lineSplited.length; i++) { transaction[i] = Integer.parseInt(lineSplited[i]); } //sort the transaction // Arrays.sort(transaction); // process the transaction processTransaction(transaction); n++; }// while System.out.println(n); reader.close(); } /** * Mine recent frequent itemsets from the current tree and save the result * to a file. * * @throws IOException if error writting to output file * @param outputPath the output file path */ public void performMining_saveResultToFile(String outputPath) throws IOException { // Record start time long startMiningTimeStamp = System.currentTimeMillis(); // Perform mining tree.patternMining_saveToFile(outputPath); // Record memory usage and end time System.gc(); MemoryLogger.getInstance().checkMemory(); miningTime = System.currentTimeMillis() - startMiningTimeStamp; } /** * Mine recent frequent itemsets from the current tree and save the result * to memory * * @throws IOException if error when writting to output file * @param outputPath the output file path * @return */ public Hashtable<int[], Double> performMining_saveResultToMemory() throws IOException { // Check memory usage System.gc(); MemoryLogger.getInstance().checkMemory(); // Record start time long startMiningTimeStamp = System.currentTimeMillis(); // Perform mining Hashtable<int[], Double> patterns = tree.patternMining_saveToMemory(); // Record end time miningTime = System.currentTimeMillis() - startMiningTimeStamp; // Record memory usage System.gc(); MemoryLogger.getInstance().checkMemory(); return patterns; } /** * Process a transaction (add it to the tree and update itemsets * * @param transaction an ArrayList of integers */ public void processTransaction(int[] transaction) { // record st double startCTimestamp = System.currentTimeMillis(); // phase 1) Parameter updating tree.updateParams(); // phase 2) Node restructuring for (int i = 0; i < tree.root.children.size(); ++i) tree.traverse(tree.root.children.get(i), tree.root, (short) -1, transaction); // phase 3) Itemset Insertion tree.insertItemset(transaction); transactionCount++; if (transactionCount % 1000 == 0) tree.forcePruning(tree.root); tree.forcePruning(tree.root); sumTransactionInsertionTime += (System.currentTimeMillis() - startCTimestamp); } /** * Method to print the CP-tree to the console for debugging purposes. */ public void showTreeForDebugging() { System.out.println(tree.toString()); } /** * Print statistics about the algorithm execution to the console. */ public void printStats() { System.out.println("=========== estDecPlus - STATS ==========="); System.out.println(" Number of nodes : " + tree.nodeCount(tree.root)); System.out.println(" Frequent itemsets count : " + tree.patternCount); System.out.println(" Maximum memory usage : " + MemoryLogger.getInstance().getMaxMemory() + " mb"); System.out.println(" Number of transactions: " + transactionCount); System.out.println(" Total insertion time ~ " + sumTransactionInsertionTime); System.out.println(" Insertion time per transaction ~ " + sumTransactionInsertionTime / (double)transactionCount + " ms"); System.out.println(" Mining time ~ " + miningTime + " ms"); System.out.println("============================================"); } }