/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Associative_Classification.ClassifierCBA2;
import java.util.*;
/**
*
* This class mines the frecuent non-fuzzy itemsets and the non-fuzzy classification association rules
*
*
* @author Written by Jesus Alcala (University of Granada) 09/02/2010
* @version 1.0
* @since JDK1.5
*/
public class Apriori {
ArrayList<Itemset> L2;
double minpsup, minpconf;
double minpSupPerClas[];
int nClasses, nVariables, prune;
long time;
RuleBase ruleBase;
myDataset train;
DataBase dataBase;
int limitRules;
int numberRules;
/**
* <p>
* Default Constructor
* </p>
*/
public Apriori() {
}
/**
* <p>
* Parameters Constructor: Generates an Apriori objects from a list of parameters
* </p>
* @param dataBase Raw training information useful in the class
* @param train Contains the train data set with the whole information to execute the algorithm
* @param minpsup The minimum support for the problem
* @param minpconf The minimum confidence for the problem
* @param prune Value which decides whether prune or not the rule set
* @param limitRules Maximum number of rules to generate (0 = no_limit)
*/
public Apriori(DataBase dataBase, myDataset train, double minpsup, double minpconf, int prune, int limitRules) {
this.train = train;
this.dataBase = dataBase;
this.minpsup = minpsup;
this.minpconf = minpconf;
this.prune = prune;
this.nClasses = this.train.getnClasses();
this.nVariables = this.train.getnInputs();
this.limitRules = limitRules;
this.minpSupPerClas = new double[this.nClasses];
for (int i=0; i < this.nClasses; i++) this.minpSupPerClas[i] = train.frecuentClass(i) * this.minpsup;
this.L2 = new ArrayList<Itemset> ();
}
/**
* <p>
* Sets the minimum confidence and support thresholds
* </p>
* @param minpsup The minimum support for the problem
* @param minpconf The minimum confidence for the problem
*/
public void setSupportConfidence (double minpsup, double minpconf) {
this.minpsup = minpsup;
this.minpconf = minpconf;
for (int i=0; i < this.nClasses; i++) this.minpSupPerClas[i] = train.frecuentClass(i) * this.minpsup;
}
/**
* <p>
* Generates the Rule Base with the whole Classification Association Rules set
* by using the Apriori Method
* </p>
* @return RuleBase The whole CAR rule set
*/
public RuleBase generateCAR () {
this.ruleBase = new RuleBase(this.dataBase, this.train);
this.numberRules = 0;
this.time = 0;
this.generateL2();
this.generateLarge (this.L2);
System.gc();
return (this.ruleBase);
}
private void generateL2() {
int i, j, k;
Item item;
Itemset itemset;
this.L2.clear();
itemset = new Itemset(0);
for (i=0; i < this.nVariables && this.numberRules < this.limitRules; i++) {
if (this.dataBase.numLabels(i) > 1) {
for (j=0; j < this.dataBase.numLabels(i) && this.numberRules < this.limitRules; j++) {
item = new Item(i, j);
itemset.add(item);
for (k=0; k < this.nClasses && this.numberRules < this.limitRules; k++) {
itemset.setClas(k);
itemset.calculateSupports(this.train);
if (itemset.getSupportClass() > this.minpSupPerClas[k]) {
this.L2.add(itemset.clone());
this.numberRules++;
}
}
itemset.remove(0);
}
}
}
this.generateRules(this.L2, this.L2);
}
private void generateLarge (ArrayList<Itemset> Lk) {
int i, j, size;
ArrayList<Itemset> Lnew;
Itemset newItemset, itemseti, itemsetj;
size = Lk.size();
if (size > 1 && this.numberRules < this.limitRules) {
if ((Lk.get(0)).size() < this.dataBase.numVariablesUsed()) {
Lnew = new ArrayList<Itemset> ();
for (i = 0; i < size-1 && this.numberRules < this.limitRules; i++) {
itemseti = Lk.get(i);
for (j = i+1; j < size && this.numberRules < this.limitRules; j++) {
itemsetj = Lk.get(j);
if (this.isCombinable(itemseti, itemsetj)) {
newItemset = itemseti.clone();
newItemset.add((itemsetj.get(itemsetj.size()-1)).clone());
newItemset.calculateSupports(this.train);
if (newItemset.getSupportClass() >= this.minpSupPerClas[newItemset.getClas()]) {
Lnew.add(newItemset);
this.numberRules++;
}
}
}
}
this.generateRules(Lnew, Lk);
if (this.numberRules < this.limitRules) {
this.numberRules -= Lk.size();
Lk.clear();
System.gc();
this.generateLarge(Lnew);
}
}
}
}
private boolean isCombinable(Itemset itemseti, Itemset itemsetj) {
int i;
Item itemi, itemj;
if (itemseti.getClas() != itemsetj.getClas()) return (false);
if (itemseti.size() != itemsetj.size()) return (false);
for (i=0; i < itemseti.size()-1; i++) {
itemi = itemseti.get(i);
itemj = itemsetj.get(i);
if ((itemi.getVariable() != itemj.getVariable()) || (itemi.getValue() != itemj.getValue())) return (false);
}
itemi = itemseti.get(itemseti.size()-1);
itemj = itemsetj.get(itemsetj.size()-1);
if (itemi.getVariable() >= itemj.getVariable()) return (false);
return (true);
}
private boolean isPrune(Itemset itemset, ArrayList<Itemset> Lf) {
int i;
Itemset itemseti;
for (i=0; i < Lf.size() && Lf.size() > 1; i++) {
itemseti = Lf.get(i);
if ((itemseti.isSubItemset(itemset)) && (itemseti.getPer() < itemset.getPer())) return (true);
}
return (false);
}
private void generateRules(ArrayList<Itemset> Lk, ArrayList<Itemset> Lf) {
int i, j;
Itemset itemseti, itemsetj;
double confidence, bestConfidence;
boolean stop;
Collections.sort(Lk);
for (i=0; i < Lk.size() && this.numberRules < this.limitRules;) {
itemseti = Lk.get(i);
if (itemseti.getSupport() > 0.0) bestConfidence = itemseti.getSupportClass() / itemseti.getSupport();
else bestConfidence = 0.0;
stop = false;
for (j=i+1; j < Lk.size() && !stop;) {
itemsetj = Lk.get(j);
if (itemseti.isEqualAnt(itemsetj)) {
if (itemsetj.getSupport() > 0.0) confidence = itemsetj.getSupportClass() / itemsetj.getSupport();
else confidence = 0.0;
if (confidence > bestConfidence) {
bestConfidence = confidence;
itemseti = itemsetj;
}
j++;
}
else stop = true;
}
if (bestConfidence >= this.minpconf) {
if (this.prune > 0 && itemseti.size() > 1) {
if (!this.isPrune(itemseti, Lf)) {
this.ruleBase.add(itemseti, this.time);
this.time++;
this.numberRules++;
}
}
else {
this.ruleBase.add(itemseti, this.time);
this.time++;
this.numberRules++;
}
}
i = j;
}
}
}