package ca.pfv.spmf.algorithms.associationrules.fhsar;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import ca.pfv.spmf.datastructures.redblacktree.RedBlackTree;
/**
* An implementation of the FHSAR algorithm for hiding sensitive association rules in a
* transaction database. This algorithm is described in the paper:
* <br/><br/>
* Weng, C. C., Chen, S. T., & Lo, H. C. (2008, November). A Novel Algorithm for Completely Hiding Sensitive Association Rules. In Intelligent Systems Design and Applications, 2008. ISDA'08. Eighth International Conference on (Vol. 3, pp. 202-208). IEEE.
*
* @author Philippe Fournier-Viger
*/
public class AlgoFHSAR {
// variables for statistics
int tidcount = 0; // the number of transactions in the last database read
long startTimestamp = 0; // the start time of the last execution
long endTimeStamp = 0; // the end time of the last execution
// the relative minimum suport (integer) chosen by the user
private int minSuppRelative;
/**
* Run the FHSAR algorithm
* @param input the file path to a transaction database
* @param inputSAR the file path to a set of sensitive association rules to be hidden
* @param output the output file path for writing the modified transaction database
* @param minsup the minimum support threshold
* @param minconf the minimum confidence threshold
* @throws IOException exception if an error while writing the file
*/
public void runAlgorithm(String input, String inputSAR, String output,
double minsup, double minconf) throws IOException {
// record the start time
startTimestamp = System.currentTimeMillis();
// the sensitive rules
List<Rule> sensitiveRules = new ArrayList<Rule>();
// the transactions from the database
List<Set<Integer>> transactions = new ArrayList<Set<Integer>>();
// a red-black tree to store the transactions ordered by their wi value
RedBlackTree<Transaction> PWT = new RedBlackTree<Transaction>();
// STEP1 : Read the sensitive association rules from the file into memory
readSensitiveRulesIntoMemory(inputSAR, sensitiveRules);
// STAGE 1 of the FHSAR algorithm
// Read the database into memory.
// At the same time, we will calculate the wi for each transaction in the database
String line;
BufferedReader reader = new BufferedReader(new FileReader(input));
tidcount=0; // to count the number of transaction
// for each line (transaction) of the input file until the end of the file
while( ((line = reader.readLine())!= null)){
// if the line is not a comment, is not empty or is not other
// kind of metadata
if (line.isEmpty() == true ||
line.charAt(0) == '#' || line.charAt(0) == '%'
|| line.charAt(0) == '@') {
continue;
}
// we split the current transactions into items by separating the line by spaces
String[] lineSplited = line.split(" ");
// we will check if each association rule is contained in the transaction.
Set<Integer> transaction = new HashSet<Integer>(lineSplited.length); // the items in this transaction
boolean thereIsARuleSupportedByTransaction = false; // flag to know if at least one sensitive rule appear in this transaction
List<Rule> rulesContained = new ArrayList<Rule>(); // the list of sensitive rules contained in this transaction
// for each sensitive rules
for(Rule rule : sensitiveRules){
// the number of items from the antecedent of the sensitive association rule
// that was found in the current transaction.
Set<Integer> matchLeft = new HashSet<Integer>();
// the number of items from the consequent of the sensitive association rule
// that was found in the current transaction.
Set<Integer> matchRight = new HashSet<Integer>();
// for each item in the current transaction
loop: for(int i=0; i<lineSplited.length; i++){
// convert from string to int
int item = Integer.parseInt(lineSplited[i]);
// add it to the
transaction.add(item);
// if the left side of this sensitive rule matches with this transaction
if(matchLeft.size() != rule.leftSide.size() && rule.leftSide.contains(item)){
matchLeft.add(item);
// if the antecedent was completely found
if(matchLeft.size() == rule.leftSide.size()){
rule.leftSideCount++;
}
} // else if the item appears in the right side of this transaction
// but we have not seen all items from the right side yet
else if(matchRight.size() != rule.rightSide.size() && rule.rightSide.contains(item)){
matchRight.add(item);
}
// if the rule completely matches with this transaction... (both left and right sides)
if(matchLeft.size() == rule.leftSide.size() && matchRight.size() == rule.rightSide.size()){
// increase the support of the rule
rule.count++;
// remember that this rule appears in this transaction
rulesContained.add(rule);
thereIsARuleSupportedByTransaction = true;
break loop; // stop the loop because we know that this rule match already!
}
}
}
// if at least a rule is supported by this transaction,
// we calculate the wi for the transaction and then
// we will insert the transaction with its wi into PWT.
if(thereIsARuleSupportedByTransaction){
// (1) calculate MIC
// MIC : a map where Key = item Value = support in rules
Map<Integer, Integer> mapItemCount = new HashMap<Integer,Integer>();
// for each rule contained in this transaction
for(Rule rule : rulesContained){
// for each item in the antecedent
for(Integer item : rule.leftSide){
// increase the support of the item
Integer count = mapItemCount.get(item);
if(count == null){
count = 0;
}
mapItemCount.put(item, count+1);
}
// for each item in the consequent
for(Integer item : rule.rightSide){
// increase the support of the item
Integer count = mapItemCount.get(item);
if(count == null){
count = 0;
}
mapItemCount.put(item, count+1);
}
}
// Do a loop to find the item that has the max count
// (represented as max(|rk| in the paper) and keep it
int MIC = -1;
int maxItem = -1;
for(Entry<Integer, Integer> entry: mapItemCount.entrySet()){
if(entry.getValue() > MIC){
maxItem = entry.getKey();
MIC = entry.getValue();
}
}
// WI is the maximum count divized by 2^(transaction.size() -1)
double wi = MIC / Math.pow(2, transaction.size() - 1);
// add transaction to PWT
PWT.add(new Transaction(transaction, wi, maxItem));
}
tidcount++; // increase the number of transaction
transactions.add(transaction); // add the transaction to the list of transactions
}
// close the input file
reader.close();
// We transform the minsup parameter into a relative value.
minSuppRelative = (int) Math.ceil(minsup * tidcount);
// STAGE 2 of the FHSAR algorithm
// This part is not well-explained in the paper so it might not be exactly like
// what the authors did. But the main idea is the same.
// We will delete items until the sensitive association rules fell below the threshold.
while(sensitiveRules.isEmpty() == false){
// We take the transaction that has the highest wi from PWT.
Transaction td = PWT.popMaximum();
// Select the item with the maximum |Rk| for this transaction
// (we don't choose randomly in my implementation)
int maxItem = td.maxItem;
// Now we calculate the new wi for this transaction if we remove the item.
// At the same time we will update the support count of the sensitive association rules.
Map<Integer, Integer> mapItemCount = new HashMap<Integer,Integer>();
boolean atLeastOneRule = false;
// for each sensitive rule remaining
for(Rule rule : sensitiveRules){
// if the transaction td contains the rule
if(td.items.containsAll(rule.leftSide) && td.items.containsAll(rule.rightSide)){
// if the antecedent of the rule contains "maxItem"
if(rule.leftSide.contains(maxItem)){
// decrease the counts
rule.count--;
rule.leftSideCount--;
}// if the consequent of the rule contains "maxItem"
else if(rule.rightSide.contains(maxItem)){
// decrease the count
rule.count--;
}else{
// otherwise we note that there is at least one sensitive rule
// still contained in this transaction
atLeastOneRule = true;
// for all items of the antecedent
for(Integer item : rule.leftSide){
// increase the count
Integer count = mapItemCount.get(item);
if(count == null){
count = 0;
}
mapItemCount.put(item, count+1);
}
// for all items of the consequent
for(Integer item : rule.rightSide){
// increase the count
Integer count = mapItemCount.get(item);
if(count == null){
count = 0;
}
mapItemCount.put(item, count+1);
}
}
}
}
// we remove the item "maxItem" from the transaction
// System.out.println("remove " + maxItem + " from " + td.items);
td.items.remove(maxItem);
// We remove all rules that have become below the thresholds
Iterator<Rule> iter = sensitiveRules.iterator();
while (iter.hasNext()) { // for each rule
Rule rule = (Rule) iter.next();
// if the threshold are not satisfied anymore, remove it
if(rule.count < minSuppRelative || ((rule.count / (double)rule.leftSideCount) < minconf)){
iter.remove();
}
}
// if at least one sensitive rule is STILL contained in this transaction, we need to update
// the wi and add this transaction again in PWT.
if(atLeastOneRule){
// find the maximum item like we did before
int MIC = -1;
int newMaxItem = -1;
for(Entry<Integer, Integer> entry: mapItemCount.entrySet()){
if(entry.getValue() > MIC){
newMaxItem = entry.getKey();
MIC = entry.getValue();
}
}
// calculate WI
double wi = MIC / Math.pow(2, td.items.size() - 1);
td.wi = wi;
td.maxItem = newMaxItem;
// add the transaction to PWT with the new wi and maxItem
PWT.add(td);
}
}
// Now, write the transformed transaction database to disk!
BufferedWriter writer = new BufferedWriter(new FileWriter(output));
for(Set<Integer> transaction : transactions){
// we sort the transaction in lexical order because we were
// using a set that was not sorted
List<Integer> sorted = new ArrayList<Integer>(transaction);
Collections.sort(sorted);
// for each item
for(int i=0; i< sorted.size(); i++){
// we write the item to disk
// if not the first item, we add a space before
if(i > 0){
writer.write(" " + sorted.get(i));
} else{ //otherwise, no space
writer.write("" + sorted.get(i));
}
}
writer.newLine();
}
writer.close(); // close the output file
// save the end time.
endTimeStamp = System.currentTimeMillis();
}
/**
* This method reads the sensitive rules into memory
* @param inputSAR the file path to a set of sensitive association rules
* @param rules a structure for storing the sensitive association rules
* @throws IOException if error reading the file
*/
private void readSensitiveRulesIntoMemory(String inputSAR, List<Rule> rules)
throws IOException {
// open the input file
BufferedReader reader = new BufferedReader(new FileReader(inputSAR));
String line;
// for each line (rule) until the end of the file
while( ((line = reader.readLine())!= null)){
// Each rule should have the format "4 ==> 5" in the file
// So we split the line according to the arrow:
String[] lineSplited = line.split("==> ");
// left side
String [] leftStrings = lineSplited[0].split(" ");
// right side
String [] rightStrings = lineSplited[1].split(" ");
Rule rule = new Rule(); // create the rule
// add each item from the left side after converting from string to int
for(String string : leftStrings){
rule.leftSide.add(Integer.parseInt(string));
}
// add each item from the right side after converting from string to int
for(String string : rightStrings){
rule.rightSide.add(Integer.parseInt(string));
}
// add the rule to the set of rules
rules.add(rule);
}
// close the input file
reader.close();
}
/**
* Print statistics about the latest execution.
*/
public void printStats() {
System.out.println("============= FSHAR - STATS =============");
System.out.println(" Transactions count from original database : " + tidcount);
System.out.println(" minsup : " + minSuppRelative + " transactions");
System.out.println(" Total time ~ " + (endTimeStamp - startTimestamp)+ " ms");
System.out.println("============================================");
}
}