package ca.pfv.spmf.algorithms;
import java.util.Arrays;
import java.util.Comparator;
/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* This class provides a set of basic methods that can be used with itemsets
* represented as arrays of integers.
* All the methods are static methods so that they can be used in any classes.
* @author Philippe Fournier-Viger
*
*/
public class ArraysAlgos {
/**
* Make a copy of this itemset but exclude a given item
* @param itemToRemove the given item
* @return the copy
*/
public static int[] cloneItemSetMinusOneItem(int[] itemset, Integer itemToRemove) {
// create the new itemset
int[] newItemset = new int[itemset.length -1];
int i=0;
// for each item in this itemset
for(int j =0; j < itemset.length; j++){
// copy the item except if it is the item that should be excluded
if(itemset[j] != itemToRemove){
newItemset[i++] = itemset[j];
}
}
return newItemset; // return the copy
}
/**
* Make a copy of this itemset but exclude a set of items
* @param itemsetToNotKeep the set of items to be excluded
* @return the copy
*/
public static int[] cloneItemSetMinusAnItemset(int[] itemset, int[] itemsetToNotKeep) {
// create a new itemset
int[] newItemset = new int[itemset.length - itemsetToNotKeep.length];
int i=0;
// for each item of this itemset
for(int j = 0; j < itemset.length; j++){
// copy the item except if it is not an item that should be excluded
if(Arrays.binarySearch(itemsetToNotKeep, itemset[j]) < 0 ){
newItemset[i++] = itemset[j];
}
}
return newItemset; // return the copy
}
/**
* This method checks if this itemset is the same as another itemset
* except for the last item. It assumes that both itemsets have the same length.
* @param itemset1 the first itemset
* @param itemset2 the second itemset
* @return true if they are the same except for the last item
*/
public static boolean allTheSameExceptLastItem(int[] itemset1, int[] itemset2) {
// Otherwise, we have to compare item by item
for (int i = 0; i < itemset1.length - 1; i++) {
// if they are not the last items, they should be the same
// otherwise return false
if (itemset1[i] != itemset2[i]) {
return false;
}
}
// All items are the same. We return true.
return true;
}
/**
* Method to concatenate two arrays in a new array
* @param prefix the first array
* @param suffix the second array
* @return the resulting array
*/
public static int[] concatenate(int[] prefix, int[] suffix) {
int[] concatenation = new int[prefix.length + suffix.length];
System.arraycopy(prefix, 0, concatenation, 0, prefix.length);
System.arraycopy(suffix, 0, concatenation, prefix.length, suffix.length);
return concatenation;
}
/**
* This method performs the intersection of two sorted arrays of integers and return a new sorted array.
* @param a the first array
* @param b the second array
* @return the resulting sorted array
*/
public static int[] intersectTwoSortedArrays(int[] array1, int[] array2){
// create a new array having the smallest size between the two arrays
final int newArraySize = (array1.length < array2.length) ? array1.length : array2.length;
int[] newArray = new int[newArraySize];
int pos1 = 0;
int pos2 = 0;
int posNewArray = 0;
while(pos1 < array1.length && pos2 < array2.length) {
if(array1[pos1] < array2[pos2]) {
pos1++;
}else if(array2[pos2] < array1[pos1]) {
pos2++;
}else { // if they are the same
newArray[posNewArray] = array1[pos1];
posNewArray++;
pos1++;
pos2++;
}
}
// return the subrange of the new array that is full.
return Arrays.copyOfRange(newArray, 0, posNewArray);
}
/**
* Check if an itemset contains another itemset.
* It assumes that itemsets are sorted according to the lexical order.
* @param itemset1 the first itemset
* @param itemset2 the second itemset
* @return true if the first itemset contains the second itemset
*/
public static boolean containsOrEquals(Integer itemset1 [], Integer itemset2 []){
// for each item in the first itemset
loop1: for(int i =0; i < itemset2.length; i++){
// for each item in the second itemset
for(int j =0; j < itemset1.length; j++){
// if the current item in itemset1 is equal to the one in itemset2
// search for the next one in itemset1
if(itemset1[j] == itemset2[i]){
continue loop1;
// if the current item in itemset1 is larger
// than the current item in itemset2, then
// stop because of the lexical order.
}else if(itemset1[j] > itemset2[i]){
return false;
}
}
// means that an item was not found
return false;
}
// if all items were found, return true.
return true;
}
/**
* This method checks if an item "item" is in the itemset "itemset".
* It assumes that items in the itemset are sorted in lexical order and
* that the largest item in the itemset is known.
* @param itemset an itemset
* @param item the item
* @param maxItemInArray the largest item in the itemset
* @return returnt true if the item appears in the itemset
*/
public static boolean containsLEX(Integer itemset[], Integer item, int maxItemInArray) {
// if the item is larger than the largest item
// in the itemset, return false
if(item > maxItemInArray){
return false;
}
// Otherwise, for each item in itemset
for(Integer itemI : itemset){
// check if the current item is equal to the one that is searched
if(itemI.equals(item)){
// if yes return true
return true;
}
// if the current item is larger than the searched item,
// the method returns false because of the lexical order in the itemset.
else if(itemI > item){
return false; // <-- xxxx
}
}
// if the searched item was not found, return false.
return false;
}
/**
* Method to compare two sorted list of integers and see if they are the same,
* while ignoring an item from the second list of integer.
* This methods is used by some Apriori algorithms.
* @param itemset1 the first itemset
* @param itemsets2 the second itemset
* @param posRemoved the position of an item that should be ignored from "itemset2" to perform the comparison.
* @return 0 if they are the same, 1 if itemset is larger according to lexical order,
* -1 if smaller.
*/
public static int sameAs(int [] itemset1, int [] itemsets2, int posRemoved) {
// a variable to know which item from candidate we are currently searching
int j=0;
// loop on items from "itemset"
for(int i=0; i<itemset1.length; i++){
// if it is the item that we should ignore, we skip it
if(j == posRemoved){
j++;
}
// if we found the item j, we will search the next one
if(itemset1[i] == itemsets2[j]){
j++;
// if the current item from i is larger than j,
// it means that "itemset" is larger according to lexical order
// so we return 1
}else if (itemset1[i] > itemsets2[j]){
return 1;
}else{
// otherwise "itemset" is smaller so we return -1.
return -1;
}
}
return 0;
}
/**
* Check if a sorted itemset is contained in another
* @param itemset1 the first itemset
* @param itemset2 the second itemset
* @return true if yes, otherwise false
*/
public static boolean includedIn(int[] itemset1, int[] itemset2) {
int count = 0; // the current position of itemset1 that we want to find in itemset2
// for each item in itemset2
for(int i=0; i< itemset2.length; i++){
// if we found the item
if(itemset2[i] == itemset1[count]){
// we will look for the next item of itemset1
count++;
// if we have found all items already, return true
if(count == itemset1.length){
return true;
}
}
}
// it is not included, so return false!
return false;
}
/**
* This method checks if the item "item" is in the itemset.
* It asumes that items in the itemset are sorted in lexical order
* This version also checks that if the item "item" was added it would be the largest one
* according to the lexical order.
* @param itemset an itemset
* @param item the item
* @return return true if the above conditions are met, otherwise false
*/
public static boolean containsLEXPlus(int[] itemset, int item) {
// for each item in itemset
for(int i=0; i< itemset.length; i++){
// check if the current item is equal to the one that is searched
if(itemset[i] == item){
// if yes return true
return true;
// if the current item is larger than the item that is searched,
// then return true because if if the item "item" was added it would be the largest one
// according to the lexical order.
}else if(itemset[i] > item){
return true; // <-- XXXX
}
}
// if the searched item was not found, return false.
return false;
}
/**
* This method checks if the item "item" is in the itemset.
* It assumes that items in the itemset are sorted in lexical order
* @param itemset an itemset
* @param item the item
* @return return true if the item
*/
public static boolean containsLEX(int[] itemset, int item) {
// for each item in itemset
for(int i=0; i< itemset.length; i++){
// check if the current item is equal to the one that is searched
if(itemset[i] == item){
// if yes return true
return true;
// if the current item is larger than the item that is searched,
// then return false because of the lexical order.
}else if(itemset[i] > item){
return false; // <-- xxxx
}
}
// if the searched item was not found, return false.
return false;
}
/**
* Check if an a sorted list of integers contains an integer.
* @param itemset the sorted list of integers
* @param item the integer
* @return true if the item appears in the list, false otherwise
*/
public static boolean contains(int[] itemset, int item) {
// for each item in the itemset
for(int i=0; i<itemset.length; i++){
// if the item is found, return true
if(itemset[i] == item){
return true;
// if the current item is larger than the item that is searched,
// then return false because of the lexical order.
}else if(itemset[i] > item){
return false;
}
}
// not found, return false
return false;
}
/** A Comparator for comparing two itemsets having the same size using the lexical order. */
public static Comparator<int[]> comparatorItemsetSameSize = new Comparator<int[]>() {
@Override
/**
* Compare two itemsets and return -1,0 and 1 if the second itemset
* is larger, equal or smaller than the first itemset according to the lexical order.
*/
public int compare(int[] itemset1, int[] itemset2) {
// for each item in the first itemset
for(int i=0; i < itemset1.length; i++) {
// if the current item is smaller in the first itemset
if(itemset1[i] < itemset2[i]) {
return -1; // than the first itemset is smaller
// if the current item is larger in the first itemset
}else if(itemset2[i] < itemset1[i]) {
return 1; // than the first itemset is larger
}
// otherwise they are equal so the next item in both itemsets will be compared next.
}
return 0; // both itemsets are equal
}
};
}