package ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.idLists; import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.patterns.Pattern; /** * Inspired in SPMF. * Implementation of a Idlist for SPADE and SPAM. This IdList is based on a hash * map of entries <Integer, Bitset>, and it makes a correspondence between a sid, * denoted by the Integer, with a the apperances of the pattern in that sequence, * denoted by the bitset. In that bitset we will have one bit set to 1 if in that * itemset, a appearance of the pattern can be found. * In order to make the join operation, we will do it entry by entry, for those * entries shared by two sequences. * * Copyright Antonio Gomariz PeƱalver 2013 * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. * * @author agomariz */ public class IDListBitmap implements IDList { /** * the default number of bit that we use for each sequence */ final int BIT_PER_SECTION = 8; /** * the map where we keep the appearances of a pattern in a sequence. * With an integer we stand for a sequence id, whereas a bitset is a * representation of an itemset. */ Map<Integer, BitSet> sequence_ItemsetEntries; /** * A bitset to keep just the sequences where a pattern appears. Is the bitset * representation of the keyset of the map sequence_ItemsetEntries */ BitSet sequences; /** * Standard Constructor. It creates an empty IdList */ public IDListBitmap() { super(); sequence_ItemsetEntries = new HashMap<Integer, BitSet>(); sequences = new BitSet(); } /** * It creates a IdList from a map of entries <Integer, Bitset> * @param sequenceItemsetEntries */ private IDListBitmap(Map<Integer, BitSet> sequenceItemsetEntries) { sequence_ItemsetEntries = sequenceItemsetEntries; sequences = new BitSet(sequenceItemsetEntries.size()); } /** * It adds the appearance of the pattern in the itemset "tid" and sequence "sid" * @param sid The sequence identifier where the pattern appears * @param tid The itemset timestamp where the pattern appears */ public void registerBit(int sid, int tid) { int bitIndex = tid; BitSet bitmap = sequence_ItemsetEntries.get(sid); if (bitmap == null) { bitmap = new BitSet(BIT_PER_SECTION); sequence_ItemsetEntries.put(sid, bitmap); sequences.set(sid); } bitmap.set(bitIndex); } /** * It adds the appearances of the pattern in the itemsets contained in "tids" and sequence "sid" * @param sid The sequence identifier wher the pattern appears * @param tids The set of itemset timestamps where the pattern appears */ public void registerNBits(int sid, List<Integer> tids) { BitSet bitmap = sequence_ItemsetEntries.get(sid); if (bitmap == null) { bitmap = new BitSet(BIT_PER_SECTION); sequence_ItemsetEntries.put(sid, bitmap); sequences.set(sid); } for (Integer tid : tids) { int bitIndex = tid; bitmap.set(bitIndex, true); } } /** * It return the number of sequences where the IdList is active. * @return the number of sequences */ @Override public int getSupport() { return sequences.cardinality(); } /** * Get the string representation of this kind of IdList * @return the string representation */ @Override public String toString() { StringBuilder buffer = new StringBuilder(); for (Integer sid : sequence_ItemsetEntries.keySet()) { BitSet bitmap = sequence_ItemsetEntries.get(sid); for (int bit = bitmap.nextSetBit(0); bit >= 0; bit = bitmap.nextSetBit(bit + 1)) { buffer.append("[sid="); buffer.append(sid); buffer.append(" tid="); buffer.append(bit); buffer.append("]"); } } return buffer.toString(); } /** * It return the intersection IdList that results from the current object and * the IdList given as an argument. * @param idList IdList with which we join the current IdList. * @param equals Flag indicating if we want a intersection for equal relation, * or, if it is false, an after relation. * @param minSupport Minimum relative support. * @return the resulting idlist */ @Override public IDList join(IDList idList, boolean equals, int minSupport) { //We create the result map of entries of bitsets Map<Integer, BitSet> intersection = new HashMap<Integer, BitSet>(((IDListBitmap) idList).getSecuenceItemsetEntries().size()); //We create an empty bitset where we will keep the pattern appearances BitSet newSequences = new BitSet(getSecuenceItemsetEntries().size()); //Cast in the argument IdList IDListBitmap idStandard = (IDListBitmap) idList; //And we get the map of entries of bitsets Map<Integer, BitSet> idListMap = idStandard.getSecuenceItemsetEntries(); Set<Map.Entry<Integer, BitSet>> entries = idListMap.entrySet(); //If flag equals is activated if (equals) { //We execute a join for equal relation equalLoop(intersection, entries,newSequences); } else { //Otherwise we execute a join for an after relation laterLoop(intersection, entries,newSequences); } //We create the new IdList from the resulting map and sequences bitset IDListBitmap output = new IDListBitmap(intersection); output.sequences=newSequences; return output; } /** * Method to do the join operation under equal relation. * @param sequenceItemsetEntries Map where we put the new elements resulting * from the join method * @param entries Map with which we are going to join the current IdList. * @param sequences New bitset where we keep the sequences where the new * IdList is active */ private void equalLoop(Map<Integer, BitSet> sequenceItemsetEntries, Set<Map.Entry<Integer, BitSet>> entries,BitSet sequences) { //For each entry for (Map.Entry<Integer, BitSet> entry : entries) { //we get the bitset of the entry of the Idlist argument BitSet otherIdList = entry.getValue(); //We get the bitset for the same entry (sid) in the current IdList BitSet thisIdList = sequence_ItemsetEntries.get(entry.getKey()); //If contains any value for that sid if (thisIdList != null) { BitSet equalResult; /* We make a join equal operation for that pair of bitsets that * represent the different appearances in sequence sid of the * pattern with which the IdList will be associated */ equalResult = equalOperation(thisIdList, otherIdList); //If there is any result if (equalResult != null) { int sid = entry.getKey(); //We keep that result in the new map sequenceItemsetEntries.put(sid, equalResult); sequences.set(sid); } } } } /** * Method to do the join operation under after relation. * @param sequenceItemsetEntries Map where we put the new elements resulting * from the join method * @param entries Map with which we are going to join the current IdList. * @param sequences New bitset where we keep the sequences where the new * IdList is active */ private void laterLoop(Map<Integer, BitSet> sequenceItemsetEntries, Set<Map.Entry<Integer, BitSet>> entries, BitSet sequences) { //For each entry for (Map.Entry<Integer, BitSet> entry : entries) { //we get the bitset of the entry of the Idlist argument BitSet otherIdList = entry.getValue(); //We get the bitset for the same entry (sid) in the current IdList BitSet thisIdList = sequence_ItemsetEntries.get(entry.getKey()); //If contains any value for that sid if (thisIdList != null) { BitSet greaterThanResult; /* We make a join after operation for that pair of bitsets that * represent the different appearances in sequence sid of the * pattern with which the IdList will be associated */ greaterThanResult = greaterThanOperation(thisIdList, otherIdList); //If there is any result if (greaterThanResult != null) { int sid = entry.getKey(); //We keep that result in the new map sequenceItemsetEntries.put(sid, greaterThanResult); sequences.set(sid); } } } } /** * Setter method to insert in the pattern given as parameter the set of * sequence identifiers where the IdList appears, so the pattern does * @param pattern Pattern where we insert the sid list */ @Override public void setAppearingSequences(Pattern pattern) { pattern.setAppearingIn(sequences); } @Override public void clear() { } /** * It adds, for a particular sequence, all the apperarances given by the list * of itemsets * @param sid Sequence id where the itemsets will be inserted * @param itemsets Set of itemsets to insert in a sequence */ public void addAppearancesInSequence(Integer sid, List<Integer> itemsets) { registerNBits(sid, itemsets); } /** * Getter method for the map of entries * @return the map of entries <integer, bitset> */ public Map<Integer, BitSet> getSecuenceItemsetEntries() { return sequence_ItemsetEntries; } /** * Set the map of entries * @param sequenceItemsetEntries the map of entries */ public void setSequenceItemsetEntries(Map<Integer, BitSet> sequenceItemsetEntries) { this.sequence_ItemsetEntries = sequenceItemsetEntries; } /** * * @param thisBitmap * @param otherBitmap * @param temporalDistance * @return */ private BitSet equalOperation(BitSet thisBitmap, BitSet otherBitmap, int temporalDistance) { if (thisBitmap != null) { BitSet result = (BitSet) thisBitmap.clone(); result.and(shiftToLeft(otherBitmap, temporalDistance)); if (result.cardinality() > 0) { return shiftToRight(result, temporalDistance); } } return null; } /** * It executes a join operation under the equal relation for a two sets of * appearances that correspond to a same sequence in two different patterns * @param thisBitmap Set of appearances of the the current IdList * @param otherBitmap Set of appearances of the given IdList * @return The resulting bitmap */ private BitSet equalOperation(BitSet thisBitmap, BitSet otherBitmap) { //If the bitmap exist for the associated sequence if (thisBitmap != null) { BitSet result = (BitSet) thisBitmap.clone(); //We make an and operation result.and(otherBitmap); //And if there is a result, we return it if (result.cardinality() > 0) { return result; } } return null; } /** * It executes a join operation under the after relation for a two sets of * appearances that correspond to a same sequence in two different patterns * @param thisBitmap Set of appearances of the the current IdList * @param otherBitmap Set of appearances of the given IdList * @return The resulting bitmap */ private BitSet greaterThanOperation(BitSet thisBitmap, BitSet otherBitmap) { BitSet result = (BitSet) otherBitmap.clone(); //If the bitmap exist for the associated sequence if (thisBitmap != null) { /* We get the first index where there is a bit set to 1 value, i.e., * the index where the first appearance of the first pattern is */ int index = thisBitmap.nextSetBit(0); /* * If the index is 0 or positive and is less than the index of * the last item of the other bitmap */ if (index >= 0 && index < (otherBitmap.length() - 1)) { /* * The new resulting value is equal to the bitmap associated to * the second Idlist (otherBitmap), having set to 0 all the values * that appear before or at the same position of the first activated * index in thisBitmap */ int newIndex = index + 1; result.clear(0, newIndex); //If there still are some appearances if (result.nextSetBit(newIndex) > 0) { //We return the new bitmap return result; } } } return null; } private BitSet shiftToLeft(BitSet bitsetArg, int temporalDistance) { BitSet result = new BitSet(bitsetArg.length()); for (int bitIndex = bitsetArg.nextSetBit(0); bitIndex >= 0; bitIndex = bitsetArg.nextSetBit(bitIndex + 1)) { int dif = bitIndex - temporalDistance; if (dif >= 0) { result.set(dif); } } return result; } private BitSet shiftToRight(BitSet bitsetArg, int temporalDistance) { BitSet result = new BitSet(bitsetArg.length()); for (int bitIndex = bitsetArg.nextSetBit(0); bitIndex >= 0; bitIndex = bitsetArg.nextSetBit(bitIndex + 1)) { int dif = bitIndex + temporalDistance; result.set(dif); } return result; } }