package ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.idLists;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.patterns.Pattern;
/**
* Inspired in SPMF. Implementation of a Idlist for SPADE and SPAM. This IdList
* is based on a big bitmap and it codes all the sequences by means of a bitset
* and a list of bitsets. In the first only set we keep the sequence identifiers
* of all the sequences where the pattern appears. For each sequence we have an
* itemset. Therefore, if we have a bitset with 512 bits, we also have a list of
* 512 bitsets, where each bitset codes an itemset.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class IDListFatBitmap implements IDList {
final int BIT_PER_SEQUENCE = 512; // the number of bit that we use to code a database
final int BIT_PER_ITEMSET = 64; // the number of bit that we use for each sequence
/**
* Bitset that codes all the sequences where the pattern associated with
* this bitmap appears.
*/
private BitSet sequences;
/**
* List That codes all the itemsets of this bitmap. We have as many itemsets
* as sequences exist
*/
private List<BitSet> itemsetsOfSequences;
/**
* Support corresponding to the pattern associated to this IdList. It is
* extracted by counting the number of occurrences that there are in
* sequences attribute.
*/
private int support;
/**
* Standard constructor for a bitmap.
*/
public IDListFatBitmap() {
super();
sequences = new BitSet(BIT_PER_SEQUENCE);
itemsetsOfSequences = new ArrayList<BitSet>(BIT_PER_SEQUENCE);
}
/**
* Constructor from the set of sequences and a list of itemsets.
*
* @param sequences
* @param itemsets
*/
private IDListFatBitmap(BitSet sequences, List<BitSet> itemsets) {
this.sequences = sequences;
this.itemsetsOfSequences = (ArrayList<BitSet>) itemsets;
}
/**
* It adds a appearance of the pattern in the sequence, denoted by sid, and
* the itemset, denoted by tid
*
* @param sid The sequence identifier where the pattern appears
* @param tid The itemset where the pattern appears
*/
public void registerBit(int sid, int tid) {
int bitIndex = tid;
//Insert the sid in the bitset of sequences
insertInSequence(sid);
//Get the bitset associated to that sequence sid
BitSet itemsetsFromSequence = itemsetsOfSequences.get(sid);
if (itemsetsFromSequence == null) {
itemsetsFromSequence = new BitSet(BIT_PER_ITEMSET);
itemsetsOfSequences.set(sid, itemsetsFromSequence);
}
//and we set the appearance given by tid
itemsetsFromSequence.set(bitIndex);
//Updating the support
this.support = sequences.cardinality();
}
/**
* It adds all the appearances of the pattern in the sequence, denoted by
* sid, and all the itemset apperances, denoted by tids
*
* @param sid The sequence identifier where the pattern appears
* @param tids The itemsets where the pattern appears
*/
public void registerNBits(int sid, List<Integer> tids) {
//Insert the sid in the bitset of sequences
insertInSequence(sid);
//Updating the support
this.support = sequences.cardinality();
//Get the bitset associated to that sequence sid
BitSet itemsetsFromSequence = itemsetsOfSequences.get(sid);
if (itemsetsFromSequence == null) {
itemsetsFromSequence = new BitSet(BIT_PER_ITEMSET);
itemsetsOfSequences.set(sid, itemsetsFromSequence);
}
//and we set all the appearances given by tids
for (Integer tid : tids) {
int bitIndex = tid;
itemsetsFromSequence.set(bitIndex);
}
}
/**
* It gets the number of sequences the IdList is active, so the pattern does
* appear
*
* @return the number of sequences
*/
@Override
public int getSupport() {
return support;
}
/**
* Get the string representation of this IdList
*
* @return the string
*/
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
/*
* for (int i = secuencias.nextSetBit(0); i >= 0;
* secuencias.nextSetBit(i + 1)) { int sid = i; BitSet bitmap =
* itemsets.get(sid); for (int bit = bitmap.nextSetBit(0); bit >= 0; bit
* = bitmap.nextSetBit(bit + 1)) { buffer.append("[sid=");
* buffer.append(sid); buffer.append(" tid="); buffer.append(bit);
* buffer.append("]"); }
}
*/
return buffer.toString();
}
/**
* It return the intersection IdList that results from the current object
* and the IdList given as an argument.
*
* @param idList IdList with which we join the current IdList.
* @param equals Flag indicating if we want a intersection for equal
* relation, or, if it is false, an after relation.
* @param minSupport Minimum relative support.
* @return the intersection
*/
@Override
public IDList join(IDList idList, boolean equals, int minSupport) {
//We create a new fatBitmap to keep the result
IDListFatBitmap result = new IDListFatBitmap();
//We get the parameter idList
IDListFatBitmap idStandard = (IDListFatBitmap) idList;
//And we obtain its sequence bitset and the itemsets bitsets
BitSet sequencesIdList = idStandard.sequences;
List<BitSet> itemsetsIdList = idStandard.itemsetsOfSequences;
//If the flag is activated
if (equals) {
//We make a join operation under the equal relation
equalLoop(result, sequencesIdList, itemsetsIdList, minSupport);
} else {
//Otherwise we do it under the after operation
laterLoop(result, sequencesIdList, itemsetsIdList, minSupport);
}
return result;
}
/**
* Setter method to insert in the pattern given as parameter the set of
* sequence identifiers where the IdList appears, so the pattern does
*
* @param pattern Pattern where we insert the sid list
*/
@Override
public void setAppearingSequences(Pattern pattern) {
pattern.setAppearingIn(sequences);
}
@Override
public void clear() {
}
/**
* It adds, for a particular sequence, all the apperarances given by the
* list of itemsets
*
* @param sequence Sequence id where the itemsets will be inserted
* @param itemsets Set of itemsets to insert in a sequence
*/
public void addAppearancesInSequence(Integer sequence, List<Integer> itemsets) {
registerNBits(sequence, itemsets);
}
/**
* It executes a join operation under the equal relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param thisBitmap Set of appearances of the the current IdList
* @param otherBitmap Set of appearances of the given IdList
* @return The resulting bitmap
*/
private BitSet equalOperation(BitSet thisBitmap, BitSet otherBitmap) {
//If the bitmap exist for the associated sequence
if (thisBitmap != null) {
BitSet result = (BitSet) thisBitmap.clone();
//We make an and operation
result.and(otherBitmap);
//And if there is a result, we return it
if (result.cardinality() > 0) {
return result;
}
}
return null;
}
/**
* It executes a join operation under the after relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param thisBitmap Set of appearances of the the current IdList
* @param otherBitmap Set of appearances of the given IdList
* @return The resulting bitmap
*/
private BitSet greaterThanOperation(BitSet thisBitmap, BitSet otherBitmap) {
BitSet result = (BitSet) otherBitmap.clone();
//If the bitmap exist for the associated sequence
if (thisBitmap != null) {
/*
* We get the first index where there is a bit set to 1 value, i.e.,
* the index where the first appearance of the first pattern is
*/
int index = thisBitmap.nextSetBit(0);
/*
* If the index is 0 or positive and is less than the index of the
* last item of the other bitmap
*/
if (index >= 0 && index < (otherBitmap.length() - 1)) {
/*
* The new resulting value is equal to the bitmap associated to
* the second Idlist (otherBitmap), having set to 0 all the
* values that appear before or at the same position of the
* first activated index in thisBitmap
*/
int newIndex = index + 1;
result.clear(0, newIndex);
//If there still are some appearances
if (result.nextSetBit(newIndex) > 0) {
//We return the new bitmap
return result;
}
}
}
return null;
}
/**
* Method to do the join operation under equal relation.
*
* @param newIdList Map where we put the new elements resulting from the
* join method
* @param sequencesFromIdList Sequence bitset with which we are going to
* join the current IdList.
* @param itemsetsFromIdList Itemsets bitset with which we are going to join
* the current IdList.
* @param minSupport Mininum relative support
*/
private void equalLoop(IDListFatBitmap newIdList, BitSet sequencesFromIdList, List<BitSet> itemsetsFromIdList, int minSupport) {
List<BitSet> itemsetIntersection = (ArrayList<BitSet>) newIdList.getItemsets();
//We clone the sequence bitset of the current IdList
BitSet sequencesIntersection = (BitSet) sequences.clone();
/*
* And we make an and operation with the sequence bitset of the other
* IDlist in order to know the potential support
*/
sequencesIntersection.and(sequencesFromIdList);
//We fit the number of itemsets to the number of sequences that we have
setSize(itemsetIntersection, sequencesIntersection.length());
//Updating of support
newIdList.setSupport(sequencesIntersection.cardinality());
//If the new sequence bitset has a potential support at least as the minimum support
if (newIdList.getSupport() >= minSupport) {
newIdList.setSequences(sequencesIntersection);
//For each sequence appearance
for (int i = sequencesIntersection.nextSetBit(0); i >= 0; i = sequencesIntersection.nextSetBit(i + 1)) {
/*
* we get the itemsets of both the current Idlist and the other
* with which we are joining
*/
BitSet otherItemset = itemsetsFromIdList.get(i);
BitSet thisItemset = itemsetsOfSequences.get(i);
//If the itemset for the current IdList is not null
if (thisItemset != null) {
//We make an equal operation
BitSet equalResult = null;
equalResult = equalOperation(thisItemset, otherItemset);
if (equalResult != null) {//We keep it if the result exists
itemsetIntersection.set(i, equalResult);
} else {//otherwise we decrease the support
sequencesIntersection.clear(i);
newIdList.decreaseSupport();
}
}
}
}
}
/**
* Method to do the join operation under an after relation.
*
* @param newIdList Map where we put the new elements resulting from the
* join method
* @param sequencesFromIdList Sequence bitset with which we are going to
* join the current IdList.
* @param itemsetsFromIdList Itemsets bitset with which we are going to join
* the current IdList.
* @param minSupport Mininum relative support
*/
private void laterLoop(IDListFatBitmap newIdList, BitSet sequencesFromIdList, List<BitSet> itemsetsFromIdList, int minSupport) {
List<BitSet> itemsetIntersection = (ArrayList<BitSet>) newIdList.getItemsets();
//We clone the sequence bitset of the current IdList
BitSet sequenceIntersection = (BitSet) sequences.clone();
/*
* And we make an and operation with the sequence bitset of the other
* IDlist in order to know the potential support
*/
sequenceIntersection.and(sequencesFromIdList);
//We fit the number of itemsets to the number of sequences that we have
setSize(itemsetIntersection, sequenceIntersection.length());
//Updating of support
newIdList.setSupport(sequenceIntersection.cardinality());
//If the new sequence bitset has a potential support at least as the minimum support
if (newIdList.getSupport() >= minSupport) {
newIdList.setSequences(sequenceIntersection);
//For each sequence appearance
for (int i = sequenceIntersection.nextSetBit(0); i >= 0; i = sequenceIntersection.nextSetBit(i + 1)) {
/*
* we get the itemsets of both the current Idlist and the other
* with which we are joining
*/
BitSet otherItemset = itemsetsFromIdList.get(i);
BitSet thisItemset = itemsetsOfSequences.get(i);
//If the itemset for the current IdList is not null
if (thisItemset != null) {
//We make an after operation
BitSet greaterThanResult = null;
greaterThanResult = greaterThanOperation(thisItemset, otherItemset);
if (greaterThanResult != null) {//We keep it if the result exists
itemsetIntersection.set(i, greaterThanResult);
} else {//otherwise we decrease the support
sequenceIntersection.clear(i);
newIdList.decreaseSupport();
}
}
}
}
}
/**
* It returns the list of sequences, meaning their itemsets, of the current
* IdList
*
* @return
*/
private List<BitSet> getItemsets() {
return this.itemsetsOfSequences;
}
private void setSupport(int support) {
this.support = support;
}
/**
* It decreases the support associated with the IdList
*/
private void decreaseSupport() {
this.support--;
}
/**
* It increases the support associated with the IdList
*/
private void increaseSupport() {
this.support++;
}
/**
* It insert the sequence identifier given as parameter in the set of
* sequences. If it did not exist before, we create a null itemset for it.
*
* @param sid
*/
private void insertInSequence(int sid) {
//We add the sequence
sequences.set(sid);
int currentSize = itemsetsOfSequences.size();
/*
* If the sequence inserted is greater than the greatest sequence
* identifier that we had so far
*/
int last = sid + 1;
if (currentSize < last) {
//We add null values in the itemset list until get the position pointed by sid
while (currentSize < last) {
itemsetsOfSequences.add(null);
currentSize++;
}
}
}
private void setSequences(BitSet sequences) {
this.sequences = sequences;
}
/**
* It adjust the list of itemsets bitset to the same size given by the
* length parameter
*
* @param list list of itemsets bitset to adjust
* @param length size that the list should have
*/
private void setSize(List<BitSet> list, int length) {
//We get the difference
int dif = list.size() - length;
//If we have more element than it is necessary, we remove them
if (dif > 0) {
int index = list.size() - 1;
for (int i = 0; i < dif; i++) {
list.remove(index);
index--;
}
//If, conversely, we have less elements, we add nulls buckets until get the given length
} else if (dif < 0) {
int amountOfNulls = (-1) * dif;
for (int i = 0; i < amountOfNulls; i++) {
list.add(null);
}
}
}
private BitSet equalOperation(BitSet thisItemset, BitSet otherItemset, int temporalDistance) {
throw new UnsupportedOperationException("Not yet implemented");
}
}