package ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.idLists;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.patterns.Pattern;
/**
* Inspired in SPMF. Implementation of a Idlist for SPADE and SPAM. This IdList
* is based on a hash map of entries <Integer, List<Integer>>, and it makes a
* correspondence between a sid, denoted by the Integer, with a the apperances
* of the pattern in that sequence, denoted by the list of Integer. In that list
* we will have one itemset timestamp where an appearance of the pattern can be
* found, and is increasingly sorted in the itemset timestamps.
*
* In order to make the join operation, we will do it entry by entry, for those
* entries shared by two sequences.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class IDListStandard_Map implements IDList {
/**
* The map where we keep the appearances of a pattern in a sequence. With an
* integer we stand for a sequence id, whereas a list of itemsets correspond
* to all the itemset timestamps where the pattern occurs
*/
Map<Integer, List<Integer>> itemsetSequenceEntries;
/**
* A bitset to keep just the sequences where a pattern appears. Is the
* bitset representation of the keyset of the map sequence_ItemsetEntries
*/
BitSet sequences;
/**
* The standard constructor. It creates an empty IdList.
*/
public IDListStandard_Map() {
this.itemsetSequenceEntries = new HashMap<Integer, List<Integer>>();
this.sequences = new BitSet();
}
/**
* It creates an IdList from a map of <Integer,List<Integer>>
*
* @param itemsetSequenceEntries
*/
public IDListStandard_Map(Map<Integer, List<Integer>> itemsetSequenceEntries) {
this.itemsetSequenceEntries = itemsetSequenceEntries;
this.sequences = new BitSet(itemsetSequenceEntries.size());
}
/**
* It return the intersection IdList that results from the current object
* and the IdList given as an argument.
*
* @param idList IdList with which we join the current IdList.
* @param equals Flag indicating if we want a intersection for equal
* relation, or, if it is false, an after relation.
* @param minSupport Minimum relative support.
* @return the intersection
*/
@Override
public IDList join(IDList idList, boolean equals, int minSupport) {
//We create the result map of entries of list of itemset timestamps
Map<Integer, List<Integer>> intersection = new HashMap<Integer, List<Integer>>(((IDListStandard_Map) idList).getSequenceItemsetEntries().size());
//We create an empty bitset where we will keep the pattern appearances
BitSet newSequences = new BitSet(idList.getSupport());
//Cast in the argument IdList
IDListStandard_Map idStandard = (IDListStandard_Map) idList;
//And we get the map of entries of bitsets
Map<Integer, List<Integer>> idListMap = idStandard.getSequenceItemsetEntries();
Set<Map.Entry<Integer, List<Integer>>> entries = idListMap.entrySet();
//For each entry of the given IdList
for (Map.Entry<Integer, List<Integer>> entry : entries) {
/*
* We get the transactions that correspond with the sequence given
* by the key of the current entry
*/
List<Integer> transactionAppearancesInSequence = entry.getValue();
/*
* We create a new list of itemset timestamp where we keep the
* result for this entry
*/
List<Integer> transactionAppearances = null;
int sid = entry.getKey();
//If the flag is activated
if (equals) {
//We make an equal operation join for the current sequence sid
transactionAppearances = equalOperation(sid, transactionAppearancesInSequence);
} else {
//otherwise, we make an after operation join for the current sequence sid
transactionAppearances = laterOperation(sid, transactionAppearancesInSequence);
}
//If there is any result, we keep it
if (transactionAppearances != null) {
intersection.put(sid, transactionAppearances);
newSequences.set(sid);
}
}
//Finally, we return the new IdList and the sequence bitset associated with it
IDListStandard_Map output = new IDListStandard_Map(intersection);
output.sequences = newSequences;
return output;
}
/**
* It gets the map that codes the appearances of the pattern in this IdList
*
* @return the map
*/
public Map<Integer, List<Integer>> getSequenceItemsetEntries() {
return itemsetSequenceEntries;
}
/**
* It executes a join operation under the after relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param sid Sequence identifier of the sequence where we want to check if
* it exists the pattern
* @param transactionAppearancesInSequence Itemset timestamps of the
* parameter Idlist
* @return The new Entry for the new IdList
*/
private List<Integer> laterOperation(Integer sid, List<Integer> transactionAppearancesInSequence) {
//We get the itemset timestamps for the same sequence for the current IdList
List<Integer> transactionAppearancesInSequenceOfMyIdList = itemsetSequenceEntries.get(sid);
//If there is not any occurrence we end the join operation
if (transactionAppearancesInSequenceOfMyIdList == null || transactionAppearancesInSequenceOfMyIdList.isEmpty()) {
return null;
}
//Otherwise we create a new List of itemset where we keep the new entries
List<Integer> result = new ArrayList<Integer>();
int index = -1;
/*
* For all the timestamps of the itemset of the parameter Idlist that
* appear after the first timestamp of the itemset of the current IdList
*/
for (int i = 0; i < transactionAppearancesInSequence.size() && index < 0; i++) {
int eid = transactionAppearancesInSequence.get(i);
if (transactionAppearancesInSequenceOfMyIdList.get(0) < eid) {
index = i;
}
}
/*
* We keep them in the new result list
*/
if (index >= 0) {
for (int i = index; i < transactionAppearancesInSequence.size(); i++) {
result.add(transactionAppearancesInSequence.get(i));
}
}
if (result.isEmpty()) {
return null;
}
return result;
}
/**
* It executes a join operation under the equal relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param sid Sequence identifier of the sequence where we want to check if
* it exists the pattern
* @param transactionAppearancesInSequence Itemset timestamps of the
* parameter Idlist
* @return The new Entry for the new IdList
*/
private List<Integer> equalOperation(Integer sid, List<Integer> transactionAppearancesInSequence) {
//We get the itemsets for the same sequence for the current IdList
List<Integer> transactionAppearancesInSequenceOfMyIdList = itemsetSequenceEntries.get(sid);
//If there is not any occurrence we end the join operation
if (transactionAppearancesInSequenceOfMyIdList == null || transactionAppearancesInSequenceOfMyIdList.isEmpty()) {
return null;
}
//Otherwise we create a new List of itemset where we keep the new entries
List<Integer> result = new ArrayList<Integer>();
int beginningIndex = 0;
/*
* We explore the smaller list and we search in the greater one
*/
List<Integer> listToExplore, listToSearch;
if (transactionAppearancesInSequenceOfMyIdList.size() <= transactionAppearancesInSequence.size()) {
listToExplore = transactionAppearancesInSequenceOfMyIdList;
listToSearch = transactionAppearancesInSequence;
} else {
listToExplore = transactionAppearancesInSequence;
listToSearch = transactionAppearancesInSequenceOfMyIdList;
}
//For each itemset timestamp in the list to explores
for (Integer eid : listToExplore) {
/*
* For each itemset timestamp from the beginning index to the end of
* the list to search
*/
for (int i = beginningIndex; i < listToSearch.size(); i++) {
//We make a comparison
int comparison = listToSearch.get(i).compareTo(eid);
/*
* If that comparison says that the element of the list to
* search is greater than or equal to eid
*/
if (comparison >= 0) {
/*
* If is equal to eid, we add it in the result list and
* update the beginning index (The lists from the IdList are
* sorted)
*/
if (comparison == 0) {
result.add(eid);
beginningIndex = i + 1;
}
/*
* Nevertheless, we stop searching since we know that the
* rest of timestamp are all greater than eid (the timestamp
* occur later since the idlists are sorted)
*/
break;
}
}
}
if (result.isEmpty()) {
return null;
}
return result;
}
@Override
public int getSupport() {
return sequences.cardinality();
}
/**
* It adds an appearance for the sequence and timestamp given as parameter
* in the current IdList
*
* @param sequence Sequence identifier where the appearence occurs
* @param timestamp Itemset timestamp where the appearance occurs
*/
public void addAppearance(Integer sequence, Integer timestamp) {
List<Integer> transactionAppearancesInSequenceOfMyIdList = itemsetSequenceEntries.get(sequence);
if (transactionAppearancesInSequenceOfMyIdList == null) {
transactionAppearancesInSequenceOfMyIdList = new ArrayList<Integer>();
}
if (!transactionAppearancesInSequenceOfMyIdList.contains(timestamp)) {
transactionAppearancesInSequenceOfMyIdList.add(timestamp);
itemsetSequenceEntries.put(sequence, transactionAppearancesInSequenceOfMyIdList);
sequences.set(sequence);
}
}
/**
* It adds the appearances for the sequence and the timestamp list given as
* parameter in the current IdList
*
* @param sid sequence identifier where the appearence occurs
* @param itemsets Itemset timestamps where the appearances occur
*/
public void addAppearancesInSequence(Integer sid, List<Integer> itemsets) {
List<Integer> transactionAppearancesInSequenceOfMyIdList = itemsetSequenceEntries.get(sid);
if (transactionAppearancesInSequenceOfMyIdList == null) {
transactionAppearancesInSequenceOfMyIdList = itemsets;
}
itemsetSequenceEntries.put(sid, transactionAppearancesInSequenceOfMyIdList);
sequences.set(sid);
}
/**
* Get the string representation of this IdList
*
* @return the string representation
*/
@Override
public String toString() {
StringBuilder result = new StringBuilder();
Set<Map.Entry<Integer, List<Integer>>> entries = itemsetSequenceEntries.entrySet();
for (Map.Entry<Integer, List<Integer>> entry : entries) {
result.append("\t").append(entry.getKey()).append(" {");
List<Integer> eids = entry.getValue();
for (Integer i : eids) {
result.append(i).append(",");
}
result.deleteCharAt(result.length() - 1);
result.append("}\n");
}
return result.toString();
}
/**
* Setter method to insert in the pattern given as parameter the set of
* sequence identifiers where the IdList appears, so the pattern does
*
* @param pattern Pattern where we insert the sid list
*/
@Override
public void setAppearingSequences(Pattern pattern) {
pattern.setAppearingIn(sequences);
}
/**
* It clears the attributes of this IdList
*/
@Override
public void clear() {
itemsetSequenceEntries.clear();
sequences.clear();
}
}