package ca.pfv.spmf.algorithms.sequentialpatterns.spam; import java.util.BitSet; import java.util.Collections; import java.util.List; /** * Implementation of a bitmap for SPAM. * <br/><br/> * * Copyright (c) 2008-2012 Philippe Fournier-Viger * <br/><br/> * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * <br/><br/> * * SPMF is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * <br/><br/> * * SPMF is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * <br/><br/> * * You should have received a copy of the GNU General Public License * along with SPMF. If not, see <http://www.gnu.org/licenses/>. * * @see AlgoCMSPAM * @see AlgoSPAM * @see AlgoTKS * @see AlgoVMSP */ public class Bitmap { public static long INTERSECTION_COUNT = 0; // A bitmap is implemented using the BitSet class of Java. BitSet bitmap = new BitSet(); // For calculating the support more efficiently // we keep some information: int lastSID = -1; // the sid of the last sequence inserted in that bitmap that contains a bit set to 1 int firstItemsetID = -1; // the id of the first itemset containing a bit set to 1 (in any sequence) private int support = 0; // the number of bits that are currently set to 1 int sidsum = 0; /** * Constructor * @param lastBitIndex the desired size of the bitset minus 1 */ Bitmap(int lastBitIndex){ this.bitmap = new BitSet(lastBitIndex+1); } /** * Constructor * @param bitmap a bitset to initialize this Bitmap. */ private Bitmap(BitSet bitmap){ this.bitmap = bitmap; } /** * Set a bit to 1 in this bitmap * @param sid the sid corresponding to that bit * @param tid the tid corresponding to that bit * @param sequencesSize the list of sequence length to know how many bits are allocated to each sequence */ public void registerBit(int sid, int tid, List<Integer> sequencesSize) { // calculate the position of the bit that we need to set to 1 int pos = sequencesSize.get(sid) + tid; // set the bit to 1 bitmap.set(pos, true); // Update the count of bit set to 1 if(sid != lastSID){ support++; sidsum += sid; // FOR THE VGEN ALGORITHM } // if(firstItemsetID == -1 || tid < firstItemsetID){ firstItemsetID = tid; } // remember the last SID with a bit set to 1 lastSID = sid; } /** * Given the position of a bit, return the corresponding sequence ID. * @param bit the position of the bit in the bitmap * @param sequencesSize the list of lengths of sequence by sequence ID. * @return the corresponding sequence ID */ private int bitToSID(int bit, List<Integer> sequencesSize) { // Do a binary search int result = Collections.binarySearch(sequencesSize, bit); if(result >= 0){ return result; } return 0 - result -2; } /** * Get the support of this bitmap (the number of bits set to 1) * @return the support. */ public int getSupport() { return support; } /** * Create a new bitmap for the s-step by doing a AND between this * bitmap and the bitmap of an item. * @param bitmapItem the bitmap of the item used for the S-Step * @param sequencesSize the sequence lengths * @param lastBitIndex the last bit index * @param maxGap * @return return the new bitmap */ Bitmap createNewBitmapSStep(Bitmap bitmapItem, List<Integer> sequencesSize, int lastBitIndex, int maxGap) { //INTERSECTION_COUNT++; // create a new bitset that will be use for the new bitmap BitSet newBitset = new BitSet(lastBitIndex); // create the new bitmap Bitmap newBitmap = new Bitmap(newBitset); // We do an AND with the bitmap of the item and this bitmap for (int bitK = bitmap.nextSetBit(0); bitK >= 0; bitK = bitmap.nextSetBit(bitK+1)) { // find the sid of this bit int sid = bitToSID(bitK, sequencesSize); // get the last bit for this sid int lastBitOfSID = lastBitOfSID(sid, sequencesSize, lastBitIndex); boolean match = false; for (int bit = bitmapItem.bitmap.nextSetBit(bitK+1); bit >= 0 && bit <= lastBitOfSID && (bit - bitK <=maxGap); bit = bitmapItem.bitmap.nextSetBit(bit+1)) { // new int tid = bit - sequencesSize.get(sid); newBitmap.bitmap.set(bit); match = true; // System.out.println(); // System.out.println("bit " + bit); // System.out.println("sid " + sid); // System.out.println("seqSize " + sequencesSize.get(sid)); // System.out.println("tid " + tid); if(firstItemsetID == -1 || tid < firstItemsetID){ firstItemsetID = tid; } } if(match){ // update the support if(sid != newBitmap.lastSID){ newBitmap.support++; newBitmap.sidsum += sid; } newBitmap.lastSID = sid; } bitK = lastBitOfSID; // to skip the bit from the same sequence } // We return the resulting bitmap return newBitmap; } private int lastBitOfSID(int sid, List<Integer> sequencesSize, int lastBitIndex) { if(sid+1 >= sequencesSize.size()){ return lastBitIndex; }else{ return sequencesSize.get(sid+1) -1; } } /** * Create a new bitmap by performing the I-STEP with this * bitmap and the bitmap of an item. * @param bitmapItem the bitmap of the item * @param sequencesSize the sequence lengths * @param lastBitIndex the last bit index * @return the new bitmap */ Bitmap createNewBitmapIStep(Bitmap bitmapItem, List<Integer> sequencesSize, int lastBitIndex) { //INTERSECTION_COUNT++; // We create the new bitmap BitSet newBitset = new BitSet(lastBitIndex); // TODO: USE LAST SET BIT Bitmap newBitmap = new Bitmap(newBitset); // We do an AND with the bitmap of the item for (int bit = bitmap.nextSetBit(0); bit >= 0; bit = bitmap.nextSetBit(bit+1)) { if(bitmapItem.bitmap.get(bit)){ // if both bits are TRUE // set the bit newBitmap.bitmap.set(bit); // update the support int sid = bitToSID(bit, sequencesSize); if(sid != newBitmap.lastSID){ newBitmap.sidsum += sid; newBitmap.support++; } newBitmap.lastSID = sid; // remember the last SID // new int tid = bit - sequencesSize.get(sid); if(firstItemsetID == -1 || tid < firstItemsetID){ firstItemsetID = tid; } // end new } } // Then do the AND newBitset.and(bitmapItem.bitmap); // We return the resulting bitmap return newBitmap; } /** * Set the support of this bitmap without using the internal BitSet object. * This method is used by VGEN * @param support the support as an integer value. */ public void setSupport(int support) { this.support = support; bitmap = null; } /** * Get the list of sids corresponding to this bitmap as a string * @param sequencesSize the list of sequence length to know how many bits are allocated to each sequence * @return a string */ public String getSIDs(List<Integer> sequencesSize) { StringBuilder builder = new StringBuilder(); // We do an AND with the bitmap of the item and this bitmap for (int bitK = bitmap.nextSetBit(0); bitK >= 0; bitK = bitmap.nextSetBit(bitK+1)) { // find the sid of this bit int sid = bitToSID(bitK, sequencesSize); builder.append(" " + sid); } return builder.toString(); } }