package ca.pfv.spmf.algorithms.sequentialpatterns.BIDE_and_prefixspan;
import java.util.List;
import ca.pfv.spmf.input.sequence_database_list_integers.Sequence;
/**
* This represents a sequence from a projected database (as based in PrefixSpan).
* Since it is a projected sequence, it makes reference to the original sequence.
*
* This class is used by the PrefixSpan and BIDE+ algorithms.
*
* Copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SPMF is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
public class PseudoSequence {
// the corresponding sequence in the original database
protected Sequence sequence;
// the first itemset of this pseudo-sequence in the original sequence
protected int firstItemset;
// the first item of this pseudo-sequence in the original sequence
protected int firstItem;
/**
* Default constructor
*/
protected PseudoSequence(){
}
/**
* Get the original sequence corresponding to this projected sequence.
* @return the original sequence
*/
public Sequence getOriginalSequence() {
return sequence;
}
/**
* Create a pseudo-sequence from a sequence that is a pseudo sequence.
* @param sequence the original pseudo-sequence.
* @param indexItemset the itemset where the pseudo-sequence should start in terms of the original sequence.
* @param indexItem the item where the pseudo-sequence should start in terms of the original sequence.
*/
protected PseudoSequence(PseudoSequence sequence, int indexItemset, int indexItem){
// remember the original sequence
this.sequence = sequence.sequence;
// record the position of where the pseudo-sequence starts
// in terms of the original pseudo-sequence
this.firstItemset = indexItemset + sequence.firstItemset;
if(this.firstItemset == sequence.firstItemset){
this.firstItem = indexItem + sequence.firstItem;
}else{
this.firstItem = indexItem;
}
}
/**
* Create a pseudo-sequence from a sequence that is an original sequence.
* @param sequence the original sequence.
* @param indexItemset the itemset where the pseudo-sequence should start in terms of the original sequence.
* @param indexItem the item where the pseudo-sequence should start in terms of the original sequence.
*/
protected PseudoSequence(Sequence sequence, int indexItemset, int indexItem){
// remember the original sequence
this.sequence = sequence;
// remember the starting position of this pseudo-sequence in terms
// of the original sequence.
this.firstItemset = indexItemset;
this.firstItem = indexItem;
}
/**
* Return the size of this pseudo-sequence in terms of itemsets.
* @return the size.
*/
protected int size() {
// the size is the size of the original sequence minus
// the itemset where this pseudo-sequence start
int size = sequence.size() - firstItemset;
// if the size is 1 and it the only itemset is empty, return 0
if(size == 1 && sequence.getItemsets().get(firstItemset).size() == 0){
return 0;
}
// return the size
return size;
}
/**
* Return the size in terms of items of an itemset at a given position
* (overloaded).
* @param index the position of the itemset
* @return the number of items in that itemset
*/
public int getSizeOfItemsetAt(int index) {
// We obtain the size of the itemset by looking at the original
// sequence. To obtain the position of the itemset we do
// index + firstItemset.
int size = sequence.getItemsets().get(index + firstItemset).size();
// if it is the first itemset of the pseudo-sequence
if(isFirstItemset(index)){
// we remove some items if this itemset is cut at left.
size -= firstItem;
}
return size; // return the size
}
/**
* Return true if this itemset is cut at left (a postfix).
* @param indexItemset the position of the given itemset.
* @return true if it is cut at left.
*/
protected boolean isPostfix(int indexItemset) {
// if it is the first itemset of the pseudo-sequence
// and it is cut at left, we return true.
return indexItemset == 0 && firstItem !=0;
}
/**
* Method to check if an itemset is the first one of a pseudo-sequence
* @param index the position of an itemset
* @return true if it is the first one.
*/
protected boolean isFirstItemset(int index) {
return index == 0;
}
/**
* Method to check if an itemset is the last one of a pseudo-sequence
* @param index the position of an itemset
* @return true if it is the last one.
*/
protected boolean isLastItemset(int index) {
return (index + firstItemset) == sequence.getItemsets().size() -1;
}
/**
* Get an item at a given position inside a given itemset
* @param indexItem the position of the item
* @param indexItemset the position of the itemset
* @return the item.
*/
public Integer getItemAtInItemsetAt(int indexItem, int indexItemset) {
// if it is in the first itemset
if(isFirstItemset(indexItemset)){
// we need to consider if the itemset was cut at the left
// by adding "firstItem"
return getItemset(indexItemset).get(indexItem + firstItem);
}else{// otherwise
return getItemset(indexItemset).get(indexItem);
}
}
/**
* Get the itemset at a given position
* @param index the position of the itemset
* @return the itemset
*/
public List<Integer> getItemset(int index) {
return sequence.get(index+firstItemset);
}
/**
* Get the sequence ID of this sequence.
* @return a sequence ID (integer)
*/
protected int getId() {
return sequence.getId();
}
/**
* Print this pseudo-sequence to System.out.
*/
public void print() {
System.out.print(toString());
}
/**
* Get a string representation of this sequence.
*/
public String toString() {
StringBuilder r = new StringBuilder();
// for each itemset
for(int i=0; i < size(); i++){
// for each item
for(int j=0; j < getSizeOfItemsetAt(i); j++){
// append the item
r.append(getItemAtInItemsetAt(j, i).toString());
// if it is in a postfix, we add a "*" symbol beside the item
if(isPostfix(i)){
r.append('*');
}
// if(!isLastItemset(i) ){
r.append(' ');
// }
}
r.append(" -1 "); // end of an itemset
}
r.append(" -2 ");
// return the string
return r.toString();
}
/**
* Get the position of an item inside an itemset.
* @param indexItemset the given itemset position
* @param idItem the item that we want to search.
* @return the position of the item or -1 if it is not found
*/
protected int indexOfBis(int indexItemset, int idItem) {
// for each item in that itemset
for(int i=0; i < getSizeOfItemsetAt(indexItemset); i++){
// check if equals to the item that we search
if(getItemAtInItemsetAt(i, indexItemset) == idItem){
return i; // if equal, return the current position
}else if(getItemAtInItemsetAt(i, indexItemset) > idItem){
continue;
}
}
return -1; // not found, return -1.
}
/**
* Get the position of an item inside an itemset.
* @param indexItemset the given itemset position
* @param idItem the item that we want to search.
* @return the position of the item or -1 if it is not found
*/
protected int indexOf(int sizeOfItemsetAti, int indexItemset, int idItem) {
// for each item in that itemset
for(int i=0; i <sizeOfItemsetAti; i++){
// check if equals to the item that we search
if(getItemAtInItemsetAt(i, indexItemset) == idItem){
return i; // if equal, return the current position
}else if(getItemAtInItemsetAt(i, indexItemset) > idItem){
continue;
}
}
return -1; // not found, return -1.
}
@Override
public boolean equals(Object obj) {
PseudoSequence temp = (PseudoSequence) obj;
return temp.getId() == getId() && firstItemset == temp.firstItemset &&
temp.firstItem == this.firstItem;
}
}