package ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.creators; import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.Item; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.Itemset; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.Pair; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.PseudoSequence; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.Sequence; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.abstractions.Abstraction_Generic; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.abstractions.Abstraction_Qualitative; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.abstractions.ItemAbstractionPair; import ca.pfv.spmf.algorithms.sequentialpatterns.clospan_AGP.items.patterns.Pattern; /** * This class is the implementation of a creator of a qualitative abstraction. * * Copyright Antonio Gomariz PeƱalver 2013 * * This file is part of the SPMF DATA MINING SOFTWARE * (http://www.philippe-fournier-viger.com/spmf). * * SPMF is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * SPMF. If not, see <http://www.gnu.org/licenses/>. * * @author agomariz */ public class AbstractionCreator_Qualitative extends AbstractionCreator { /** * Static reference to make this class singleton */ private static AbstractionCreator_Qualitative instance = null; private AbstractionCreator_Qualitative() { } /** * Get the static reference of this singleton class * * @return the static instance */ public static AbstractionCreator_Qualitative getInstance() { if (instance == null) { instance = new AbstractionCreator_Qualitative(); } return instance; } /** * It creates a default abstraction. The abstraction is established to false * @return the created abstraction */ @Override public Abstraction_Generic CreateDefaultAbstraction() { return Abstraction_Qualitative.crear(false); } /** * It creates a relation with the given parameter. * @param equalRelation The boolean indicatin if the item has an equal * relation with the previous item in the pattern * @return the created relation */ public Abstraction_Generic createAbstraction(boolean equalRelation) { return Abstraction_Qualitative.crear(equalRelation); } /** * It adds a Pair object to one list when we keep the sequences counted for * that pair. If the pair has not been previously kept, we keep the sequenceID * @param pairMap a map of Pair object * @param alreadyCountedForSequenceID the set of sequence IDs that have been already counted * @param id an ID * @param item the item * @param postfix indicates if it is the case of a postfix (an itemset cut at left) */ private void addPair(Map<Pair, Pair> pairMap, Set<Pair> alreadyCountedForSequenceID, int id, Item item, boolean postfix) { /* * We create a new Pair object from the given item and the postfix flag */ Pair pair = new Pair(postfix, ItemAbstractionPairCreator.getInstance().getItemAbstractionPair(item, createAbstraction(postfix))); //We obtain the pair that was previously managed Pair oldPair = pairMap.get(pair); //And if this sequence was not already used for this pair if (alreadyCountedForSequenceID.add(pair)) { //we keep the new pair if if did not appear in the map if (oldPair == null) { pairMap.put(pair, pair); } else { pair = oldPair; } // we keep the sequence id pair.getSequencesID().set(id); } } /** * Method to find all frequent items in a context (database). * This is for k> 1. * @param sequences the list of pseudosequences from the database * @return a set of pair containing the items and their support */ @Override public Set<Pair> findAllFrequentPairs(List<PseudoSequence> sequences) { // we will scan the database and store the cumulative support of each pair in a map. Map<Pair, Pair> pairMap = new HashMap<Pair, Pair>(); Set<Pair> alreadyCountedForSequenceID = new HashSet<Pair>(); for (PseudoSequence sequence : sequences) { // if the sequence does not have the same id, we clear the map. alreadyCountedForSequenceID.clear(); loop1: for (int k = 0; k < sequence.numberOfProjectionsIncluded(); k++) { for (int i = 0; i < sequence.size(k); i++) { //If we are after the first projection and after the first itemset of the pseudosequence if (k > 0 && i > 0) { //we continue to the next projection continue loop1; } //we get the original itemset Itemset itemset = sequence.getItemset(i, k); //We obtain the beginning of that itemset for our projection int beginning = sequence.getBeginningOfItemset(k, i); /*And for each item from the beginning we add a new Pair in *order to find the frequent items in the projection* */ for (int j = beginning; j < itemset.size(); j++) { Item item = itemset.get(j); boolean postfix = sequence.isPostfix(k, i); addPair(pairMap, alreadyCountedForSequenceID, sequence.getId(), item, postfix); } } } } //We sort the set of keys ArrayList<Pair> sortedSet = new ArrayList<Pair>(pairMap.keySet()); Collections.sort(sortedSet); Set s=new HashSet<Pair>(); s.addAll(sortedSet); return s; } /** * Convert a Map<Item, BitSet> to Map<Item, Set<Abstraction_Generic>>. * @param sequence the sequence * @param frequentItems a map of frequent item and their corresponding bitsets. * @return the resulting map */ @Override public Map<Item, Set<Abstraction_Generic>> createAbstractions(Sequence sequence, Map<Item, BitSet> frequentItems) { return new HashMap<Item, Set<Abstraction_Generic>>(); } /** * It creates an abstraction from a prefix and it is the same abstraction that is received as parameter * @param prefix a prefix * @param abstraccion the abstraction * @return the abstraction */ @Override public Abstraction_Generic createAbstractionFromAPrefix(Pattern prefix, Abstraction_Generic abstraccion) { return abstraccion; } /** * Method that check if for the two patterns given as parameters, the * shortest one is a subpattern of the longest one * @param shorter The pattern which we check if is a subpattern of another * longer than it * @param larger Pattern which we want to check if another pattern is * subpattern of itself * @param index index that indicates which position we have to take into account * @param positions List of positions of the appearances of the elements of * the shorter pattern in the longer one * @return true if the condition is met */ @Override public boolean isSubpattern(Pattern shorter, Pattern larger, int index, List<Integer> positions) { //We get the pair indicated by index ItemAbstractionPair pair = shorter.getIthElement(index); Item itemPair = pair.getItem(); Abstraction_Generic absPair = pair.getAbstraction(); //And we also get the abstraction that was in the index-1 position Abstraction_Generic previousAbs = index > 0 ? shorter.getIthElement(index - 1).getAbstraction() : null; //Flag in order to cancel the search boolean cancelled = false; Integer pos = null; /* While the item index pointed out by the position is less than the * size of the largest pattern */ while (positions.get(index) < larger.size()) { /* * We search for the item of the shorter pattern pointed by index in * the longer one */ if (index == 0) { pos = searchForFirstAppearance(larger, positions.get(index), itemPair); } else { pos = findItemPositionInPattern(larger, itemPair, absPair, previousAbs, positions.get(index), positions.get(index - 1)); } //If we found any position if (pos != null) { //We set it in the array of positions positions.set(index, pos); //if we are not in the last element of the shorter pattern if (index + 1 < shorter.size()) { //We create a new position that is just one position after Integer newPos = increasePosition(positions.get(index)); //And we initialize the next index position to that new position positions.set(index + 1, newPos); /* And we make a recursive call to go on checking if shorter * is a subpattern of longer */ boolean output = isSubpattern(shorter, larger, index + 1, positions); //If we have found a matching between both patterns if (output) { //we return a true answer return true; } } else {//If, conversely, we are in the last element of the shorter pattern /* * We have already found a matching between shorter and * longer and we conclude that one is a subpattern of the * other one */ return true; } } else {//If conversely, we did not find any position for the current index //If we are not in the first element of the pattern if (index > 0) { /* We increase the itemset position of the previous index in * order to find other matching elements */ int newPos = increaseItemset(larger, positions.get(index - 1)); //And we update that position positions.set(index - 1, newPos); } //We set to to true the flag that indicates the end of the method cancelled = true; /* * And break the loop in order to go back and try to find other * matching elements that makes the subsequence possible */ break; } } /* If we are finish the loop and not by breaking it, and we are not looking * for the first element of the shorter pattern */ if (index > 0 && !cancelled) { /* We increase the itemset position of the previous index in order * to find other matching elements */ int newPos = increaseItemset(larger, positions.get(index - 1)); //And we update that position positions.set(index - 1, newPos); } /* * We return a false value, indicating that we cannot reach a matching * with the current choices of elements in the longer pattern */ return false; } /** * Method that search the first appearance of an item (given as parameter) * in a pattern, starting from a beginning index * @param p Pattern where we search for an item * @param beginning Index from which we start to search from the item * @param itemPair Item to search for * @return The item position where we found the item, or null if this * does not appear */ public Integer searchForFirstAppearance(Pattern p, Integer beginning, Item itemPair) { for (int i = beginning; i < p.size(); i++) { Item currentItem = p.getIthElement(i).getItem(); if (currentItem.equals(itemPair)) { return i; } } return null; } /** * It searches for a position in the pattern given as parameter where * an item, also given as a parameter, appears * @param p Pattern where we are going to search for * @param itemPair Item to search for * @param currentAbs Abstraction of the current element of the pattern * where the item appeared * @param previousAbs Astraction of the previous element of the pattern * where the item appeared * @param currentPosition Position for the current element * @param previousPosition Position of the previous element * @return the position */ public Integer findItemPositionInPattern(Pattern p, Item itemPair, Abstraction_Generic currentAbs, Abstraction_Generic previousAbs, Integer currentPosition, Integer previousPosition) { Abstraction_Qualitative abs = (Abstraction_Qualitative) currentAbs; Integer pos = null; //If the current Abstraction has an equal relation with the previous pair if (abs.hasEqualRelation()) { //We search for the item in the same itemset where the previous item appeared pos = searchForInTheSameItemset(p, itemPair, currentPosition); } else {//Otherwise //We start keeping the currentPosition int positionToSearchFor = currentPosition; /* * If the positions of both the current item and the previous one * are not in different itemsets */ if (!areInDifferentItemsets(p, previousPosition, currentPosition)) { /* * We increase the position until we get the first element that * appear in another itemset */ positionToSearchFor = increaseItemset(p, currentPosition); } pos = searchForFirstAppearance(p, positionToSearchFor, itemPair); } return pos; } /** * It increase the position of a given position * @param beginning the position to be increased * @return the position + 1 */ public Integer increasePosition(Integer beginning) { return beginning + 1; } /** * Increase a position to the first element position where it starts * another itemset * @param p Pattern in which we search for the beginning of another itemset * @param beginning Index from which we start to search for * @return The item index where a new Itemset starts */ public int increaseItemset(Pattern p, Integer beginning) { //For all the elements appearing after beginning index for (int i = beginning + 1; i < p.size(); i++) { ItemAbstractionPair currentPair = p.getIthElement(i); Abstraction_Qualitative qualitativeAbs = (Abstraction_Qualitative) currentPair.getAbstraction(); //If the relation is not an equal relation, then we have changed of itemset if (!qualitativeAbs.hasEqualRelation()) { //And return the index return i; } } /* * If we have got this point that means that we were in the last itemset * of the pattern and, therefore, we return the size of the pattern, * since there can not be any index bigger than this value */ return p.size(); } /** * Search for an item in the same itemset that the previous one appeared * @param pattern Pattern where we are goin to search for the item * @param itemPair Item to search for * @param beginning Index from which we are going to start to search for * @return the index where the item appears, or null if this index does not * exist */ private Integer searchForInTheSameItemset(Pattern pattern, Item itemPair, Integer beginning) { //From the beginning index and on for (int i = beginning; i < pattern.size(); i++) { ItemAbstractionPair currentPair = pattern.getIthElement(i); Abstraction_Qualitative qualitativeAbstraction = (Abstraction_Qualitative) currentPair.getAbstraction(); //If the item has not an equal relation if (!qualitativeAbstraction.hasEqualRelation()) { /* * We have finished without finding the item, since we have * already change of itemset */ return null; } else { /* * If, conversely, there is an equal relation, we check if this * item is equal to which we are searching for */ if (currentPair.getItem().equals(itemPair)) { //In that case we return the index position return i; } } } return null; } /** * Method that informs if for a pattern, two positions correspond * to a same itemset or not * @param pattern Pattern in which we check the two positions * @param p1 First position * @param p2 Second position * @return True if they are in different itemsets, False otherwise */ private boolean areInDifferentItemsets(Pattern pattern, Integer p1, Integer p2) { //For all the elements between positions p1 and p2 for (int i = p1+1; i <= p2 && i < pattern.size(); i++) { ItemAbstractionPair currentPair = pattern.getIthElement(i); Abstraction_Qualitative qualitativeAbs = (Abstraction_Qualitative) currentPair.getAbstraction(); /* * If the ith element does not have an equal relation, we conclude * that p1 and p2 are not in the same itemset and we can finish */ if(!qualitativeAbs.hasEqualRelation()) return true; } //If we get this point that means p1 and p2 are in the same itemset return false; } }