package ca.pfv.spmf.algorithms.sequentialpatterns.lapin; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Set; /*** * This is an implementation of a SE position list used by the LAPIN-SPAM algorithm, to represent the * positions where some items appear in a sequence. * * The LAPIN-SPAM algorithm was originally described in this paper: * * Zhenlu Yang and Masrau Kitsuregawa. LAPIN-SPAM: An improved algorithm for mining sequential pattern * In Proc. of Int'l Special Workshop on Databases For Next Generation Researchers (SWOD'05) * in conjunction with ICDE'05, pp. 8-11, Tokyo, Japan, Apr. 2005. * * Copyright (c) 2008-2013 Philippe Fournier-Viger * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * SPMF is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with SPMF. If not, see <http://www.gnu.org/licenses/>. */ public class SEPositionList { /** The list of items */ int[] listItems = null; /** the list of positions corresponding to the items*/ List<Short> [] listPositions = null; /** * Constructor (perform some intialization * @param a set of integers that will be inserted into this list */ @SuppressWarnings("unchecked") public SEPositionList(Set<Integer> itemsAlreadySeen) { // Get the number of items that will be inserted to initialized the lists int size = itemsAlreadySeen.size(); listItems = new int[size]; listPositions = new List[size]; // For each item, add them to the list of items and initialize the corresponding list of positions int i=0; for(int item : itemsAlreadySeen) { listItems[i] = item; listPositions[i] = new ArrayList<Short>(); i++; } // Sort items by ascending order so that later we can do a binary search on the list // (as described in the LAPIN paper) Arrays.sort(listItems); } /** * This method add the position of an item to this position list * @param item the item id * @param position the position (a byte indicating in which itemset the item appears, e.g. 0 for the first itemset) */ public void register(Integer item, short position) { int index = Arrays.binarySearch(listItems, item); listPositions[index].add(position); } /** * Get a string representation of this SE position list */ public String toString() { StringBuilder buffer = new StringBuilder(); // for each item for(int i=0; i<listItems.length; i++) { // append the corresponding position list buffer.append(" position list of item: "); buffer.append(listItems[i]); buffer.append(" is: "); // for each position for(Short pos : listPositions[i]) { // append the position buffer.append(pos); buffer.append(" "); } buffer.append("\n"); } // return the string return buffer.toString(); } /** * Get the position list of an item * @param item the item * @return the position list as a List of Shorts, or null if there is none for that item. */ public List<Short> getListForItem(int item) { // Do a binary search to find the index where the item is in the position list int index = Arrays.binarySearch(listItems, item); // if the item does not appear in the list if(index < 0) { // we return null return null; } // return the position list return listPositions[index]; } }