package ca.pfv.spmf.algorithms.sequentialpatterns.fournier2008_seqdim; /* This file is copyright (c) 2008-2013 Philippe Fournier-Viger * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. */ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import ca.pfv.spmf.algorithms.sequentialpatterns.fournier2008_seqdim.multidimensionalsequentialpatterns.AlgoSeqDim; /** * Implementation of a sequence (a list of itemsets), which * represents a sequential pattern or a sequence of a sequence database, as used by SeqDim and * Fournier-Viger (2008) algorithms. * * @see AlgoSeqDim * @see AlgoFournierViger08 * @author Philippe Fournier-Viger */ public class Sequence{ /** the list of itemsets in that sequence */ private final List<Itemset> itemsets = new ArrayList<Itemset>(); /** the sequence ID */ private int id; /** List of IDS of all patterns that contains this one. */ private Set<Integer> sequencesID = null; /** * Constructor. * @param id a sequence ID. */ public Sequence(int id){ this.id = id; } /** * Get the support of this itemset as a percentage (double value). * @param databaseSize the number of sequences in the database. * @return the support as a string with five decimals */ public String getRelativeSupportFormated(int databaseSize) { // calculate the support double support = ((double)sequencesID.size()) / ((double) databaseSize); // format as a String with two decimals DecimalFormat format = new DecimalFormat(); format.setMinimumFractionDigits(0); format.setMaximumFractionDigits(5); // return the string return format.format(support); } /*** * Get the support of this sequential pattern as an integer value. * @return an integer. */ public int getAbsoluteSupport(){ return sequencesID.size(); } /** * Add an itemset to this sequence. * @param itemset An Itemset */ public void addItemset(Itemset itemset) { itemsets.add(itemset); } /** * Make a copy of this sequence * @return a new Sequence */ public Sequence cloneSequence(){ // create new sequence with same ID Sequence sequence = new Sequence(getId()); // for each itemset, make a copy and add it to the new sequence for(Itemset itemset : itemsets){ sequence.addItemset(itemset.cloneItemSet()); } // return the new sequence. return sequence; } /** * Print this sequence to System.out. */ public void print() { System.out.print(toString()); } /** * Return a string representation of this sequence. */ public String toString() { // create string buffer StringBuilder r = new StringBuilder(""); // for each itemset for(Itemset itemset : itemsets){ // append timestamp r.append("{t="); r.append(itemset.getTimestamp()); r.append(", "); // append each item from this itemset for(ItemSimple item : itemset.getItems()){ String string = item.toString(); r.append(string); r.append(' '); } r.append('}'); } // print the list of IDs of sequences that contains this pattern. if(getSequencesID() != null){ r.append(" Sequence ID: "); for(Integer id : getSequencesID()){ r.append(id); r.append(' '); } } // return the string return r.append(" ").toString(); } /** * Return an abbreviated string representation of this sequence. */ public String toStringShort() { // create string buffer StringBuilder r = new StringBuilder(""); // for each itemset for(Itemset itemset : itemsets){ // appennd its timestamp r.append("{t="); r.append(itemset.getTimestamp()); r.append(", "); // append all items in that itemset for(ItemSimple item : itemset.getItems()){ String string = item.toString(); r.append(string); r.append(' '); } r.append('}'); } // return the string return r.append(" ").toString(); } /** * Get a String representation of the itemsets in this Sequence. * @return a string */ public String itemsetsToString() { // create a StringBuilder StringBuilder r = new StringBuilder(""); // for each itemset in that sequence for(Itemset itemset : itemsets){ // append timestamp r.append("{t="); r.append(itemset.getTimestamp()); r.append(", "); // append each item for(ItemSimple item : itemset.getItems()){ String string = item.toString(); r.append(string); r.append(' '); } r.append('}'); } // return the string return r.append(" ").toString(); } /** * Get the ID of this sequence. * @return an integer. */ public int getId() { return id; } /** * Get the list of itemsets in this sequence. * @return the list of Itemset objects. */ public List<Itemset> getItemsets() { return itemsets; } /** * Get the itemset at a given position in this sequence. * @param index the position * @return the Itemset */ public Itemset get(int index) { return itemsets.get(index); } /** * Get the ith item in this sequence. * @param i the position i * @return an Item. */ public ItemSimple getIthItem(int i) { // for each itemset for(int j=0; j< itemsets.size(); j++){ // if i is smaller than the current itemset size if(i < itemsets.get(j).size()){ // return the item at position i return itemsets.get(j).get(i); } // otherwise subtract the current itemset size from i i = i- itemsets.get(j).size(); } // if there is no i-th item, then return null. return null; } /** * Get the size of this sequence (number of itemsets). * @return the size (an integer). */ public int size(){ return itemsets.size(); } /** * Get the sequence IDs containing this seq. pattern * @return a Set of Integer */ public Set<Integer> getSequencesID() { return sequencesID; } /** * Set the sequence IDs containing this seq. pattern. * @param sequencesID a Set of Integer. */ public void setSequencesID(Set<Integer> sequencesID) { this.sequencesID = sequencesID; } /** * Return the sum of the size of all itemsets of this sequence. */ public int getItemOccurencesTotalCount(){ // initialize counter int count =0; // for each itemset in that sequence for(Itemset itemset : itemsets){ // add the size of the itemset count += itemset.size(); } // return thte count return count; } /** * Get the time length in terms of time units of this sequential pattern * (is useful if timestamps are used, otherwise it is 0) * @return the time length as a long. */ public long getTimeLength() { return itemsets.get(itemsets.size()-1).getTimestamp() - itemsets.get(0).getTimestamp(); } /** * Check if this sequence contains another given sequence. * @param sequence2 a given sequence * @return Return 1 if this sequence STRICTLY contains sequence * Return 2 if this sequence is exactly the same as sequence2 * Return 0 if this sequence does not contains sequence2. */ public int strictlyContains(Sequence sequence2) { // call another recursive method to check if this sequence is contained int retour = strictlyContainsHelper(sequence2, 0, 0, 0, 0); // if it is contained if(retour ==2){ // if the size is the same, they are equal, otherwise it is strictly contained return (size() == sequence2.size()) ? 2 : 1; } // return the value found by the other method return retour; } /** * Helper method for checking if this sequence is contained in another given sequence * @param sequence2 the given sequence * @param index itemset position indicating where the comparison should * start for this sequence * @param index2 itemset position indicating where the comparison should * start for the given sequence * @param previousTimeStamp previous timestamp in this sequence * @param previousTimeStamp2 previous timestamp in the given sequence * @return Return 1 if this sequence STRICTLY contains sequence * Return 2 if this sequence is exactly the same as sequence2 * Return 0 if this sequence does not contains sequence2. */ private int strictlyContainsHelper(Sequence sequence2, int index, int index2, long previousTimeStamp, long previousTimeStamp2) { if(index == size()){ // then this sequence does not contain the given sequence return 0; } // if the size of this sequence minus the current position is smaller // than what is remaining to be compared in the given sequence, // then it cannot be conained so return 0. if(size() - index < sequence2.size() - index2){ return 0; } // int returnValue = 0; // for each itemset in this sequence starting from the index position for(int i=index; i <size(); i++){ // calculate timestamp interval between itemset i and previous itemset // for this sequence long interval1 = get(i).getTimestamp() - previousTimeStamp; // do the same thing for the given sequence long interval2 = sequence2.get(index2).getTimestamp() - previousTimeStamp2; // if itemset at position i contains all items from the itemset at position // index in the given sequence and that they have the same time intervals if(get(i).getItems().containsAll(sequence2.get(index2).getItems()) && interval1 == interval2){ // check if the two itemsets have the same size boolean sameSize = get(i).getItems().size() == sequence2.get(index2).size(); // If we have found here the last itemset of the given sequence if(sequence2.size()-1 == index2){ // if this last itemset has the same size if(sameSize){ // then it is strictly contained and we return 2 return 2; } // otherwise strictly contains (1) returnValue = 1; } else{ // if it was not the last itemset, // then the method is called recursively to try to // find the next itemset of the given sequence int resultat = strictlyContainsHelper(sequence2, i+1, index2+1, get(i).getTimestamp(), sequence2.get(index2).getTimestamp()); // if the result from this recursive call is 2 and // they have the same size, they are equal so return 2 if(resultat == 2 && sameSize){ return 2; }else if (resultat != 0){ // otherwise, if !-0, then strictly contains and return -1. returnValue = 1; } } } } // return the value calculated. return returnValue; } /** * Make a copy of a sequence minus a given item * @param mapSequenceID a map indicating for each item (key) the IDs of sequences * containing the item (value). * @param relativeMinSup a minimum support value (double) * @return a new Sequence */ public Sequence cloneSequenceMinusItems(Map<ItemSimple, Set<Integer>> mapSequenceID, double relativeMinSup) { // create sequence Sequence sequence = new Sequence(getId()); // for each itemset for(Itemset itemset : itemsets){ // make a copy without the new item Itemset newItemset = itemset.cloneItemSetMinusItems(mapSequenceID, relativeMinSup); // if the resulting itemset size is not 0 if(newItemset.size() !=0){ // add it to the sequence sequence.addItemset(newItemset); } } // return the new sequence return sequence; } /** * Set the ID of this squence. * @param id2 a new ID. */ public void setID(int id2) { id = id2; } }