package ca.pfv.spmf.algorithms.frequentpatterns.charm;
/* This file is copyright (c) 2008-2014 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Set;
import ca.pfv.spmf.datastructures.triangularmatrix.TriangularMatrix;
import ca.pfv.spmf.input.transaction_database_list_integers.TransactionDatabase;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
/**
* This class represents an HashTable for storing itemsets found by the Charm
* algorithm to perform the closeness check.
*
* @see AlgoCharm_Bitset
* @see TriangularMatrix
* @see TransactionDatabase
* @see Itemset
* @see Itemsets
* @author Philippe Fournier-Viger
*/
class HashTable {
// the internal array for the hash table
private List<Itemset>[] table;
/**
* Construtor.
* @param size size of the internal array for the hash table.
*/
public HashTable(int size) {
table = new ArrayList[size];
}
/**
* Check if the hash table contains a superset of a given itemset.
* @param itemset the given itemset
* @param hashcode the hashcode of the itemset (need to be calculated before by using the
* provided hashcode() method.
* @return true if the hash table contains at least one superset, otherwise false.
*/
public boolean containsSupersetOf(Itemset itemset, int hashcode) {
// If the position in the array that is given by the hashcode is empty,
// then return false.
if (table[hashcode] == null) {
return false;
}
// For each itemset X at that hashcode position
for (Object object : table[hashcode]) {
Itemset itemsetX = (Itemset) object;
// if the support of X is the same as the given itemset and X contains
// the given itemset
if (itemsetX.getAbsoluteSupport() == itemset.getAbsoluteSupport()
&& itemsetX.containsAll(itemset)) {
// then return true
return true;
}
}
// Otherwise no superset is in the hashtable, so return false
return false;
}
/**
* Add an itemset to the hash table.
* @param itemset the itemset to be added to the hashtable
* @param hashcode the hashcode of the itemset (need to be calculated before by using the
* provided hashcode() method.
*/
public void put(Itemset itemset, int hashcode) {
// if the position in the array is empty create a new array list
// for that position
if (table[hashcode] == null) {
table[hashcode] = new ArrayList<Itemset>();
}
// store the itemset in the arraylist of that position
table[hashcode].add(itemset);
}
/**
* Calculate the hashcode of an itemset as the sum of the tids of its tidset,
* modulo the internal array length.
* @param tidset the tidset of the itemset
* @return the hashcode (an integer)
*/
public int hashCode(BitSet tidset) {
int hashcode = 0;
// for each tid in the tidset
for (int tid = tidset.nextSetBit(0); tid >= 0; tid = tidset.nextSetBit(tid+1)) {
// make the sum
hashcode += tid;
}
// If an integer overflow occurs and the hashcode is negative,
// then we make it positive.
if(hashcode < 0){
hashcode = 0 - hashcode;
}
// Finally the hashcode is obtained by performing the modulo
// operation using the size of the internal array.
return (hashcode % table.length);
}
/**
* Calculate the hashcode of an itemset as the sum of the tids of its tidset,
* modulo the internal array length.
* @param tidset the tidset of the itemset
* @return the hashcode (an integer)
*/
public int hashCode(Set<Integer> tidset) {
int hashcode = 0;
// for each tid in the tidset
for (int tid : tidset) {
// make the sum
hashcode += tid;
}
// If an integer overflow occurs and the hashcode is negative,
// then we make it positive.
if(hashcode < 0){
hashcode = 0 - hashcode;
}
// Finally the hashcode is obtained by performing the modulo
// operation using the size of the internal array.
return (hashcode % table.length);
}
}