package ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.idlists;
import java.util.*;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.patterns.Pattern;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.tries.Trie;
/**
* Inspired in SPMF. Implementation of a Idlist for ClaSP. This IdList is based
* on a hash map of entries <Integer, List<Position>>, and it makes a
* correspondence between a sid, denoted by the Integer, with the apperances of
* the pattern in that sequence, denoted by the list of positions. In that list
* we will have positions with the where an appearance of the pattern can be
* found, and is increasingly sorted in the itemset timestamps first, and the
* item positions later.
*
* In order to make the join operation, we will do it entry by entry, for those
* entries shared by two sequences.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class IDListStandard_Map implements IDList {
/**
* The map where we keep the appearances of a pattern in a sequence. With an
* integer we stand for a sequence id, whereas a list of itemsets correspond
* to all the itemset timestamps where the pattern occurs
*/
private Map<Integer, List<Position>> sequencePositionsEntries;
/**
* A bitset to keep just the sequences where a pattern appears. Is the
* bitset representation of the keyset of the map sequence_ItemsetEntries
*/
BitSet sequences;
//Map with the original size of all sequences
private static Map<Integer, Integer> originalSizeOfSequences = new HashMap<Integer, Integer>();
/*
* Value that counts the total of elements that appear after the last item
* appearance of the pattern where this IdList is referring to
*/
private int totalElementsAfterPrefixes = 0;
/**
* The standard constructor. It creates an empty IdList.
*/
public IDListStandard_Map() {
this.sequencePositionsEntries = new HashMap<Integer, List<Position>>();
}
/**
* It creates an IdList from a map of <Integer,List<Position>>
*
* @param sequencePositionsEntries
*/
public IDListStandard_Map(Map<Integer, List<Position>> sequencePositionsEntries) {
this.sequencePositionsEntries = sequencePositionsEntries;
this.sequences = new BitSet(sequencePositionsEntries.size());
}
/**
* It return the intersection IdList that results from the current object
* and the IdList given as an argument.
*
* @param idList IdList with which we join the current IdList.
* @param equals Flag indicating if we want a intersection for equal
* relation, or, if it is false, an after relation.
* @param minSupport Minimum relative support.
* @return the intersection
*/
@Override
public IDList join(IDList idList, boolean equals, int minSupport) {
//We create the result map of entries of list of item positions
Map<Integer, List<Position>> intersection = new HashMap<Integer, List<Position>>(((IDListStandard_Map) idList).getSequencePositionsEntries().size());
//We create an empty bitset where we will keep the pattern appearances
BitSet newSequences = new BitSet(idList.getSupport());
//Cast in the argument IdList
IDListStandard_Map idStandard = (IDListStandard_Map) idList;
int[] newTotalElementsAfterPrefixes = new int[1];
//And we get the map of entries of list of positions
Map<Integer, List<Position>> idListMap = idStandard.getSequencePositionsEntries();
Set<Map.Entry<Integer, List<Position>>> entries = idListMap.entrySet();
//For each entry of the given IdList
for (Map.Entry<Integer, List<Position>> entry : entries) {
int sid = entry.getKey();
/*
* We get the positions that correspond with the sequence given
* by the key of the current entry
*/
List<Position> positionAppearancesInSequence = entry.getValue();
/*
* We create a new list of positions where we keep the
* result for this entry
*/
List<Position> positionAppearances;
//If the flag is activated
if (equals) {
//We make an equal operation join for the current sequence sid
positionAppearances = equalOperation(sid, positionAppearancesInSequence, newTotalElementsAfterPrefixes);
} else {
//otherwise, we make an after operation join for the current sequence sid
positionAppearances = laterOperation(sid, positionAppearancesInSequence, newTotalElementsAfterPrefixes);
}
//If there is any result, we keep it
if (positionAppearances != null) {
intersection.put(entry.getKey(), positionAppearances);
newSequences.set(sid);
}
}
//Finally, we return the new IdList and the sequence bitset associated with it
IDListStandard_Map output = new IDListStandard_Map(intersection);
output.sequences = newSequences;
output.setTotalElementsAfterPrefixes(newTotalElementsAfterPrefixes[0]);
return output;
}
/**
* It gets the map that codes that appearances of the pattern in this IdList
*
* @return the map
*/
public Map<Integer, List<Position>> getSequencePositionsEntries() {
return sequencePositionsEntries;
}
/**
* It executes a join operation under the after relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param sid Sequence identifier of the sequence where we want to check if
* it exists the pattern
* @param positionAppearancesInSequence Position items of the
* parameter Idlist
* @param dif Place where we store the difference between the original size
* of the sequence and the elements that there are up to the last item
* appearance of the pattern that the IdList is referring to
* @return The new Entry for the new IdList
*/
private List<Position> laterOperation(Integer sid, List<Position> positionAppearancesInSequence, int[] dif) {
//We get the positions for the same sequence for the current IdList
List<Position> positionItemsAppearancesInSequenceOfMyIdList = sequencePositionsEntries.get(sid);
//If there is not any occurrence we end the join operation
if (positionItemsAppearancesInSequenceOfMyIdList == null || positionItemsAppearancesInSequenceOfMyIdList.isEmpty()) {
return null;
}
//Otherwise we create a new List of position items where we keep the new entries
List<Position> result = new ArrayList<Position>();
int index = -1;
/*
* For all the position items of the parameter Idlist that appear after
* the first position item of the current IdList
*/
for (int i = 0; i < positionAppearancesInSequence.size() && index < 0; i++) {
int eid = positionAppearancesInSequence.get(i).getItemsetIndex();
if (positionItemsAppearancesInSequenceOfMyIdList.get(0).getItemsetIndex() < eid) {
index = i;
}
}
/*
* We keep them in the new result list
*/
if (index >= 0) {
for (int i = index; i < positionAppearancesInSequence.size(); i++) {
Position pos = positionAppearancesInSequence.get(i);
result.add(pos);
if(i==index) dif[0] += (originalSizeOfSequences.get(sid) - pos.getItemIndex());
}
}
if (result.isEmpty()) {
return null;
}
return result;
}
/**
* It executes a join operation under the equal relation for a two sets of
* appearances that correspond to a same sequence in two different patterns
*
* @param sid Sequence identifier of the sequence where we want to check if
* it exists the pattern
* @param positionItemsAppearancesInSequence Position items of the
* parameter Idlist
* @param dif Place where we store the difference between the original size
* of the sequence and the elements that there are up to the last item
* appearance of the pattern that the IdList is referring to
* @return The new Entry for the new IdList
*/
private List<Position> equalOperation(Integer key, List<Position> positionItemsAppearancesInSequence, int[] dif) {
//We get the position items for the same sequence for the current IdList
List<Position> positionItemsAppearancesInSequenceOfMyIdList = sequencePositionsEntries.get(key);
//If there is not any occurrence we end the join operation
if (positionItemsAppearancesInSequenceOfMyIdList == null || positionItemsAppearancesInSequenceOfMyIdList.isEmpty()) {
return null;
}
//Otherwise we create a new List of position items where we keep the new entries
List<Position> result = new ArrayList<Position>();
int beginningIndex = 0;
/*
* We explore the smaller list and we search in the greater one
*/
List<Position> listToExplore, listToSearch;
if (positionItemsAppearancesInSequenceOfMyIdList.size() <= positionItemsAppearancesInSequence.size()) {
listToExplore = positionItemsAppearancesInSequenceOfMyIdList;
listToSearch = positionItemsAppearancesInSequence;
} else {
listToExplore = positionItemsAppearancesInSequence;
listToSearch = positionItemsAppearancesInSequenceOfMyIdList;
}
//For each itemset timestamp in the list to explores
for (Position eid : listToExplore) {
/*
* For each itemset timestamp from the beginning index to the end of
* the list to search
*/
for (int i = beginningIndex; i < listToSearch.size(); i++) {
Position currentPosition = listToSearch.get(i);
//We make a comparison
int comparison = currentPosition.getItemsetIndex().compareTo(eid.getItemsetIndex());
/*
* If that comparison says that the element of the list to
* search is greater than or equal to eid
*/
if (comparison >= 0) {
/*
* If is equal to eid, we add it in the result list and
* update the beginning index (The lists from the IdList are
* sorted). Besides, we calculate the value of the elements
* that appear after the last item of the pattern that the
* IdList is referring to
*/
if (comparison == 0) {
if (eid.getItemIndex() > currentPosition.getItemIndex()) {
result.add(eid);
dif[0] += (originalSizeOfSequences.get(key) - eid.getItemIndex());
} else {
result.add(currentPosition);
dif[0] += (originalSizeOfSequences.get(key) - currentPosition.getItemIndex());
}
beginningIndex = i + 1;
}
/*
* Nevertheless, we stop searching since we know that the
* rest of timestamp are all greater than eid (the timestamp
* occur later since the idlists are sorted)
*/
break;
}
}
}
if (result.isEmpty()) {
return null;
}
return result;
}
@Override
public int getSupport() {
return sequences.cardinality();
}
/**
* It adds an appearance for the sequence and a position item given
* as parameter in the current IdList
*
* @param sequence Sequence identifier where the appearence occurs
* @param positionItem Itemset timestamp where the appearance occurs
*/
public void addAppearance(Integer sequence, Position positionItem) {
List<Position> eids = sequencePositionsEntries.get(sequence);
if (eids == null) {
eids = new ArrayList<Position>();
}
if (!eids.contains(positionItem)) {
eids.add(positionItem);
sequencePositionsEntries.put(sequence, eids);
sequences.set(sequence);
}
}
/**
* It adds the appearances for the sequence and the position items
* list given as parameter in the current IdList
*
* @param sid Sequence identifier where the appearence occurs
* @param itemsets Itemset timestamps where the appearances occur
*/
public void addAppearancesInSequence(Integer sid, List<Position> itemsets) {
List<Position> positionItemsAppearancesInSequenceOfMyIdList = sequencePositionsEntries.get(sid);
if (positionItemsAppearancesInSequenceOfMyIdList == null) {
positionItemsAppearancesInSequenceOfMyIdList = itemsets;
}
sequencePositionsEntries.put(sid, positionItemsAppearancesInSequenceOfMyIdList);
sequences.set(sid);
}
/**
* Get a string representation of this IdList
*
* @return the string representation
*/
@Override
public String toString() {
StringBuilder result = new StringBuilder();
Set<Map.Entry<Integer, List<Position>>> entries = sequencePositionsEntries.entrySet();
for (Map.Entry<Integer, List<Position>> entry : entries) {
result.append("\t").append(entry.getKey()).append(" {");
List<Position> eids = entry.getValue();
for (Position i : eids) {
result.append(i.getItemsetIndex()).append(",");
}
result.deleteCharAt(result.length() - 1);
result.append("}\n");
}
return result.toString();
}
/**
* It set in the Trie object, given as parameter, the sequence
* identifiers where the pattern associated with the IdList appears
* @param trie
*/
@Override
public void setAppearingIn(Trie trie) {
trie.setAppearingIn((BitSet)sequences.clone());
}
/**
* It clears the attributes of this IdList
*/
@Override
public void clear() {
sequencePositionsEntries.clear();
sequences.clear();
}
public static void sclear() {
if (originalSizeOfSequences != null) {
originalSizeOfSequences.clear();
originalSizeOfSequences = null;
}
}
@Override
public Map<Integer, List<Position>> appearingInMap() {
return sequencePositionsEntries;
}
/**
* It returns the number of elements that appears after each
* appearance of the pattern associated with the IdList
* @return the number of elements
*/
@Override
public int getTotalElementsAfterPrefixes() {
return totalElementsAfterPrefixes;
}
/**
* It sets the number of elements that appears after each
* appearance of the pattern associated with the IdList
* @param i the number of elements
*/
@Override
public void setTotalElementsAfterPrefixes(int i) {
this.totalElementsAfterPrefixes = i;
}
/**
* It sets the original lengths of the database sequences
* @param map
*/
@Override
public void SetOriginalSequenceLengths(Map<Integer, Integer> map) {
originalSizeOfSequences = map;
}
/**
* It moves to a pattern the sequences where the Idlist is active.
* @param pattern the pattern
*/
@Override
public void setAppearingIn(Pattern pattern) {
pattern.setAppearingIn((BitSet)sequences.clone());
}
}