/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Discretizers.UCPD; import java.util.Vector; import keel.Algorithms.Genetic_Rule_Learning.Globals.Parameters; /** * <p> * This class implements the algorithm to find the large itemsets of a dataset * </p> * * @author Written by Jose A. Saez (University of Granada), 21/12/2009 * @version 1.0 * @since JDK1.6 */ public class FrequentItemsets { static private int minSupport; // minimal support of itemset static private int instances[][]; // all the instances static private int numAtt; // number of attributes static private int numInstances; // number of instances static private Vector<Itemset> lastCandidates; // last candidates array static private Vector<Itemset> candidates; // candidates array static private Vector<Itemset> frequentItemsets; // the frequent itemsets //****************************************************************************************************** /** * <p> * It computes the frequent itemsets and returns them * </p> * @param examples matrix of instances * @param numValues number of different values of each attribute */ static public Vector<Itemset> getFrequentItemsets(int[][] examples, int[] numValues){ frequentItemsets = new Vector<Itemset>(); // initialize parameters instances = examples; numInstances = instances.length; numAtt = instances[0].length; minSupport = Parameters.minSupport; // compute the large 1-itemsets computeLarge1Itemsets(numValues); for(int i = 0 ; i < lastCandidates.size() ; ++i) frequentItemsets.add(lastCandidates.get(i)); // compute the large k-itemsets while(lastCandidates.size() > 0){ computeCandidates(); // compute the candidate itemsets in this pass computeFrecuence(); // compute the frequence of each candidate itemset and remove it if necessary } return frequentItemsets; } //****************************************************************************************************** /** * <p> * It computes the large 1-itemsets from the instances array and it saves them into the array lastCandidates * </p> * @param numValues number of different values of each attribute */ static public void computeLarge1Itemsets(int[] numValues){ int i, j, p; // loop indexes // computes the number of occurrences of each value of each attribute int[][] numOccurrences = new int[numAtt][]; for(i = 0 ; i < numAtt; ++i) numOccurrences[i] = new int[numValues[i]]; for(i = 0 ; i < numAtt; ++i) for(j = 0 ; j < numValues[i] ; ++j) numOccurrences[i][j] = 0; for(i = 0 ; i < numInstances; ++i) for(j = 0 ; j < numAtt ; ++j) numOccurrences[j][instances[i][j]]++; // computes the large 1-itemsets lastCandidates = new Vector<Itemset>(); for(i = 0 ; i < numAtt; ++i) for(j = 0 ; j < numValues[i] ; ++j) if(numOccurrences[i][j] >= minSupport){ int[] aux = new int[numAtt]; for(p = 0 ; p < numAtt ; ++p) aux[p] = -1; aux[i] = j; Itemset it = new Itemset(aux,numAtt,i); lastCandidates.add(it); } } //****************************************************************************************************** /** * <p> * It computes the candidate itemsets from lastCandidates and saves them into array candidates * </p> */ static public void computeCandidates(){ int i, j; // loop indexes candidates = new Vector<Itemset>(); for(i = 0 ; i < lastCandidates.size()-1 ; ++i){ for(j = i+1 ; j < lastCandidates.size() ; ++j ){ Itemset nuevo = lastCandidates.get(i).combine(lastCandidates.get(j)); if(nuevo != null) candidates.add(nuevo); } } } //****************************************************************************************************** /** * <p> * It computes the frecuence of each candidate itemset and accepts them if exceeds minSupport value. * It returns the accepted itemsets into the array lastCandidates * </p> */ static private void computeFrecuence(){ int i, j; // loop indexes lastCandidates = new Vector<Itemset>(); // compute the occurrence of each itemset int[] occurrences = new int[candidates.size()]; for(i = 0 ; i < candidates.size() ; ++i) occurrences[i] = 0; for(i = 0 ; i < candidates.size() ; ++i) for(j = 0 ; j < numInstances; ++j) if(candidates.get(i).into(instances[j])) occurrences[i]++; // accept the itemsets that exceed minSupport for(i = 0 ; i < candidates.size() ; ++i) if(occurrences[i] >= minSupport) lastCandidates.add(candidates.get(i)); // save the itemsets if(lastCandidates.size() > 0){ for(i = 0 ; i < lastCandidates.size() ; ++i) frequentItemsets.add(lastCandidates.get(i)); } } }