package ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.CandidateInSequenceFinder;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.Item;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.Sequence;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.SequenceDatabase;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.abstractions.ItemAbstractionPair;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.creators.AbstractionCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.gsp_AGP.items.patterns.Pattern;
/**
* This is an implementation of the counting of support phase addressed in GSP algorithm.
* This class is one of the two method continuously repeated by means of the GSP's main loop.
* Here, from a set of (k+1)-sequences candidates we check which of those sequences are actually frequent and which can be ruled out.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SPMF is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
class SupportCounting {
/**
* Original database where we have to look for each candidate.
*/
private SequenceDatabase database;
/**
* Indexation map. A tool for the next candidate generation step.
*/
private Map<Item, Set<Pattern>> indexationMap;
private AbstractionCreator abstractionCreator;
/**
* The only constructor
* @param database the original sequence database
* @param creador
*/
public SupportCounting(SequenceDatabase database, AbstractionCreator creador) {
this.database = database;
this.abstractionCreator = creador;
this.indexationMap = new HashMap<Item, Set<Pattern>>();
}
/**
* Main method. For all of the elements from the candidate set, we check if
* they are or not frequent.
* @param candidateSet the candidate set
* @param k the level where we are checking
* @param minSupportAbsolute the absolute minimum support, i.e. the minimum number of
* sequences where a candidate have to appear
* @return the set of frequent patterns.
*/
public Set<Pattern> countSupport(List<Pattern> candidateSet, int k, double minSupportAbsolute) {
indexationMap.clear();
//For each sequence of the original database
for (Sequence sequence : database.getSequences()) {
//we check for each candidate if it appears in that sequence
checkCandidateInSequence(sequence, k, candidateSet);
}
Set<Pattern> result = new LinkedHashSet<Pattern>();
//We keep all the frequent candidates and we put them in the indexation map
for (Pattern candidate : candidateSet) {
if (candidate.getSupport() >= minSupportAbsolute) {
result.add(candidate);
putInIndexationMap(candidate);
}
}
candidateSet = null;
//We end returning the frequent candidates, i.e. the frequent k-sequence set
return result;
}
/**
* We check, for a sequence, if each candidate from the candidate set it appears or not
* @param sequence a sequence
* @param k he level where we are checking
* @param candidateSet the candidate set
*/
private void checkCandidateInSequence(Sequence sequence, int k, List<Pattern> candidateSet) {
//For each candidate
for (Pattern candidate : candidateSet) {
//We define a list of k positions, all initialized at itemset 0, item 0, i.e. first itemset, first item.
List<int[]> position = new ArrayList<int[]>(k);
for (int i = 0; i < k; i++) {
position.add(new int[]{0,0});
}
CandidateInSequenceFinder finder = new CandidateInSequenceFinder(abstractionCreator);
//we check if the current candidate appears in the sequence
abstractionCreator.isCandidateInSequence(finder, candidate, sequence, k, 0, position);
if (finder.isPresent()) {
/*if we have a positive result, we add the sequence Id to the list
* of appearances associated with the candidate pattern
*/
candidate.addAppearance(sequence.getId());
}
}
}
/**
* Method to create the indexation map useful for the next step of
* generation of candidates
* @param entry
*/
private void putInIndexationMap(Pattern entry) {
ItemAbstractionPair pair = entry.getIthElement(0);
Set<Pattern> correspondence = indexationMap.get(pair.getItem());
if (correspondence == null) {
correspondence = new LinkedHashSet<Pattern>();
indexationMap.put(pair.getItem(), correspondence);
}
correspondence.add(entry);
}
/**
* Get the indexation map associated with the frequent k-sequence set
* @return the indexation map
*/
public Map<Item, Set<Pattern>> getIndexationMap() {
return indexationMap;
}
}