/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/* -------------------------------------------------------------------------- */
/* */
/* APRIORI-TFP CMAR */
/* (CLASSIFICATION BASED ON MULTIPLE ASSOCIATION RULES) */
/* */
/* Frans Coenen */
/* */
/* Tuesday 2 March 2004 */
/* (Bug fixes: 7/2/2005) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
/* Class structure
AssocRuleMining
|
+-- TotalSupportTree
|
+-- PartialSupportTree
|
+--AprioriTFPclass
|
+-- AprioriTFP_CMAR */
// Java packages
package keel.Algorithms.Associative_Classification.ClassifierCMAR;
import java.util.*;
import java.io.*;
/**
* Methods to produce classification rules using Wenmin Li, Jiawei Han and
Jian Pei's CMAR (Classification based on Multiple associate Rules) algorithm
but founded on Apriori-TFP. Assumes that input dataset is orgnised such that
classifiers are at the end of each record. Note: number of classifiers value is
stored in the <TT>numClasses</TT> field.
*
*
*
* @author Frans Coenen 2 March 2004
* @author Modified by Jesus Alcala (University of Granada) 09/02/2010
* @author Modified by Sergio Almecija (University of Granada) 23/05/2010
* @version 1.0
* @since JDK1.5
*/
public class AprioriTFP_CMAR extends AprioriTFPclass {
/* ------ FIELDS ------ */
// CONSTANTS
/** The maximum number of CARs */
private final int MAX_NUM_CARS = 1000000;
// OTHER FIELDS
/** The number of CARs generated so far. */
private int numCarsSoFar = 0;
/* ------ CONSTRUCTORS ------ */
/**
* Processes command line arguments.
* @param minConf Minimum confidence threshold
* @param minSup Minimum support threshold
* @param delta Minimum coverage threshold
*/
public AprioriTFP_CMAR(double minConf, double minSup, int delta) {
super(minConf, minSup, delta);
}
/* ------ METHODS ------ */
/* START CMAR CLASSIFICATION */
/** Starts CMAR classifier generation proces. <P> Proceeds as follows:<OL>
<LI>Generate all CARs using Apriori-TFP and place selected CARs into linked
list of rules.
<LI>Prune list according the cover stratgey.
<LI>Test classification using Chi-Squared Weighting approach.</OL>
*/
public void startCMARclassification() {
System.out.println("START APRIORI-TFP CMAR\n" + "--------------------------");
// Generate all CARs using Apriori-TFP and place selected CARs into
// linked list of rules.
startCARgeneration();
// Prune linked list of rules using "cover" principal
currentRlist.outputNumCMARrules();
System.out.println("prune CARS");
currentRlist.pruneUsingCover(copyItemSet(dataArray));
// Test classification using the test set.
// return(testClassification());
}
/** Starts CMAR classifier generation proces (version with full output).
<P> Proceeds as follows:<OL>
<LI>Generate all CARs using Apriori-TFP and place selected CARs into linked
list of rules.
<LI>Prune list according the cover stratgey.
<LI>Test classification using Chi-Squared Weighting approach.</OL>
@return The classification accuracy (%). */
public double startCMARclassificationWithOutput() {
System.out.println("START APRIORI-TFP CMAR\n" + "--------------------------");
// Generate all CARs using Apriori-TFP and place selected CARs into
// linked list of rules.
startCARgeneration();
// Prune linked list of rules using "cover" principal
currentRlist.outputNumCMARrules();
System.out.println("prune CARS");
currentRlist.pruneUsingCover(copyItemSet(dataArray));
// Test classification using the test set.
return(testClassificationWithOutput());
}
/** Commences process of genertaing CARS using apriori TFP. <P> For each
rule generated add to rule list if: (i) Chi-Squared value is
above a specified critical threshold (5% by default), and (ii) the CR tree
does not contain a more general rule with a higher ordering. Rule added to
rule list according to ranking (ordering). */
private void startCARgeneration() {
// Calculate minimum support threshold in terms of number of
// records in the training set.
outputSuppAndConf();
minSupport = numRowsInTrainingSet * support / 100.0;
System.out.println("Num rows in training set = " + numRows + ", reduced minimum support = " + minSupport);
currentRlist.setNumRows(numRows);
currentRlist.setNumClasses(numClasses);
currentRlist.setNumOneItemSets(numOneItemSets);
// Set rule list to null. Note that startRuleList is defined in the
// AssocRuleMining parent class and is also used to store Association
// Rules (ARS) with respect ARM.
currentRlist.startRulelist = null;
numCarsSoFar = 0;
// Create P-tree
createPtree();
// Generate T-tree and generate CARS
createTotalSupportTree();
}
/*----------------------------------------------------------------------- */
/* */
/* APRIORI-TFP CMAR WITH TEN CROSS VALIDATION (TCV) */
/* */
/*----------------------------------------------------------------------- */
/* COMMEMCE TEN CROSS VALIDATION WITH OUTPUT */
/** Start Ten Cross Validation (TCV) process with output of individual
accuracies. */
/*
public void commenceTCVwithFullOutput() {
double[][] parameters = new double[10][4];
System.out.println("START TCV APRIORI-TFP CMAR CLASSIFICATION\n" +
"------------------------------------");
// Loop through tenths data sets
for (int index=0;index<10;index++) {
System.out.println("[--- " + index + " ---]");
// Create training and test sets
createTrainingAndTestDataSets(index);
// Set global rule list reference to null
currentRlist.setStartCMARrulelistToNull();
// Mine data, produce T-tree and generate CRs
parameters[index][0] = startCMARclassificationWithOutput();
// Output and storage
currentRlist.outputNumCMARrules();
currentRlist.outputCMARrules();
parameters[index][1] = countNumFreqSets();
parameters[index][2] = numUpdates;
parameters[index][3] = currentRlist.getNumCMAR_CRs();
}
// Determine totals
double totalAccu = 0;
double totalNumFreqSets = 0;
double totalNumUpdates = 0;
double totalNumCRs = 0;
System.out.println("---------------------------------------");
for (int index=0;index<parameters.length;index++) {
System.out.println("(" + (index+1) + ") Accuracy = " +
twoDecPlaces(parameters[index][0]) + ", Num. Freq. Sets = " +
twoDecPlaces(parameters[index][1]) + ", Num Updates = " +
twoDecPlaces(parameters[index][2]) + ", Num CRs = " +
twoDecPlaces(parameters[index][3]));
// Totals
totalAccu = totalAccu+parameters[index][0];
totalNumFreqSets = totalNumFreqSets+parameters[index][1];
totalNumUpdates = totalNumUpdates+parameters[index][2];
totalNumCRs = totalNumCRs+parameters[index][3];
}
// Calculate averages
averageAccuracy = totalAccu/10;
averageNumFreqSets = totalNumFreqSets/10;
averageNumUpdates = totalNumUpdates/10;
averageNumCRs = totalNumCRs/10;
// Output avergaes
System.out.println("---------------------------------------");
System.out.println("Average Accuracy = " +
twoDecPlaces(averageAccuracy) + ", Num. Freq. Sets = " +
twoDecPlaces(averageNumFreqSets) + ", Average Num Updates = " +
twoDecPlaces(averageNumUpdates) + ", Average Num CRs = " +
twoDecPlaces(averageNumCRs));
System.out.println("========================================");
}
*/ /* COMMEMCE TEN CROSS VALIDATION WITH OUTPUT */
/** Start Ten Cross Validation (TCV) process with output of individual
accuracies. */
/*
public void commenceTCVwithOutput() {
double[][] parameters = new double[10][4];
System.out.println("START TCV APRIORI-TFP CMAR CLASSIFICATION\n" +
"------------------------------------");
// Loop through tenths data sets
for (int index=0;index<10;index++) {
System.out.println("[--- " + index + " ---]");
// Create training and test sets
createTrainingAndTestDataSets(index);
// Set global rule list reference to null
currentRlist.setStartCMARrulelistToNull();
// Mine data, produce T-tree and generate CRs
parameters[index][0] = startCMARclassification();
// Output and storage
currentRlist.outputNumCMARrules();
parameters[index][1] = countNumFreqSets();
parameters[index][2] = numUpdates;
parameters[index][3] = currentRlist.getNumCMAR_CRs();
}
// Determine totals
double totalAccu = 0;
double totalNumFreqSets = 0;
double totalNumUpdates = 0;
double totalNumCRs = 0;
for (int index=0;index<parameters.length;index++) {
System.out.println("(" + (index+1) + ") Accuracy = " +
twoDecPlaces(parameters[index][0]) + ", Num. Freq. Sets = " +
twoDecPlaces(parameters[index][1]) + ", Num Updates = " +
twoDecPlaces(parameters[index][2]) + ", Num CRs = " +
twoDecPlaces(parameters[index][3]));
// Totals
totalAccu = totalAccu+parameters[index][0];
totalNumFreqSets = totalNumFreqSets+parameters[index][1];
totalNumUpdates = totalNumUpdates+parameters[index][2];
totalNumCRs = totalNumCRs+parameters[index][3];
}
// Calculate averages
averageAccuracy = totalAccu/10;
averageNumFreqSets = totalNumFreqSets/10;
averageNumUpdates = totalNumUpdates/10;
averageNumCRs = totalNumCRs/10;
// Output avergaes
System.out.println("---------------------------------------");
System.out.println("Average Accuracy = " +
twoDecPlaces(averageAccuracy) + ", Num. Freq. Sets = " +
twoDecPlaces(averageNumFreqSets) + ", Average Num Updates = " +
twoDecPlaces(averageNumUpdates) + ", Average Num CRs = " +
twoDecPlaces(averageNumCRs));
}
*/
/*----------------------------------------------------------------------- */
/* */
/* CLASSIFICATION ASSOCIATION RULE (CAR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/* GENERATE CLASSIFICATION ASSOCIATION RULES */
/** Initiates process of generating Classification Association Rules (CARS),
Loops through top level of T-tree as part of the CAR generation process.
<P>CARs differ from ARs in that they have only a single consequent and that
the number of admissable consequents is limited. Note that classifiers are
assumed to be listed at the end of the attribute list.
@param start the identification number of the first classifier to be
considered. */
private void generateCARs(int level) {
// Loop
for (int index=numOneItemSets-numClasses+1; index<=numOneItemSets; index++) {
if (startTtreeRef[index]!=null && startTtreeRef[index].childRef!=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] consequent = new short[1];
consequent[0] = (short) index;
generateCARs(null,index,level-1,consequent, startTtreeRef[index].childRef);
}
}
}
}
/* GENERATE CLASSIFICATION ASSOCIATION RULES */
/** Continues process of generating classificationh association rules from
a T-tree by recursively looping through T-tree level by level.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param level the current level in the T-tree
@param consequent the current consequent (classifier) for the CAR.
@param linkRef the reference to the current array lavel in the T-tree. */
protected void generateCARs(short[] itemSetSofar, int size, int level, short[] consequent, TtreeNode[] linkRef) {
// If no more nodes return
if (linkRef == null) return;
// Check number of CARS generated so far
/* if (numCarsSoFar>MAX_NUM_CARS) {
System.out.println("Number of CARs (" + numCarsSoFar + ") generted so far exceeds limit of " + MAX_NUM_CARS + ", generation process stopped!");
return;
}
*/
// At right level
if (level==1) {
for (int index=1; index < size; index++) {
// Check if node exists
if (linkRef[index] != null) {
// Generate Antecedent
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Determine confidence
double suppForAntecedent = (double) getSupportForItemSetInTtree(tempItemSet);
double confidenceForCAR = getConfidence(suppForAntecedent, linkRef[index].support);
// Add CAR to linked list structure if confidence greater
// than minimum confidence threshold.
if (confidenceForCAR >= confidence) {
numCarsSoFar++;
double suppForConcequent = (double) getSupportForItemSetInTtree(consequent);
currentRlist.insertRinRlistCMARranking(tempItemSet, consequent,suppForAntecedent,suppForConcequent, linkRef[index].support,confidenceForCAR);
}
}
}
return;
}
// Wrong level, Otherwise process
for (int index=1; index < size; index++) {
// Check if node exists
if (linkRef[index] != null && linkRef[index].childRef!=null) {
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Proceed down child branch
generateCARs(tempItemSet,index,level-1,consequent, linkRef[index].childRef);
}
}
}
/*------------------------------------- */
/* */
/* T-TREE METHODS */
/* */
/*------------------------------------- */
/* CREATE T-TREE LEVEL N */
/** Commences the process of determining the remaining levels in the T-tree
(other than the top level), level by level in an "Apriori" manner. <P>
Follows an add support, prune, generate loop until there are no more levels
to generate. */
protected void createTtreeLevelN() {
int nextLevel=2;
// Loop while a further level exists
while (nextLevelExists) {
// Add support
addSupportToTtreeLevelN(nextLevel);
// Prune unsupported candidate sets
pruneLevelN(startTtreeRef,nextLevel);
// Generate CARs
generateCARs(nextLevel);
// Check number of frequent sets generated so far
if (numFrequentsets>MAX_NUM_FREQUENT_SETS) {
System.out.println("Number of frequent sets (" + numFrequentsets + ") generted so far " + "exceeds limit of " + MAX_NUM_FREQUENT_SETS + ", generation process stopped!");
break;
}
// Attempt to generate next level
nextLevelExists=false;
generateLevelN(startTtreeRef,nextLevel,null);
nextLevel++;
}
//End
System.out.println("Levels in T-tree = " + nextLevel);
}
/* ---------------------------------------------------------------- */
/* */
/* TEST CLASSIFICATION */
/* */
/* ---------------------------------------------------------------- */
/* TEST CLASSIFICATION */
/** Tests the generated classification rules using test sets and return
percentage accuracy.
@param the perecentage accuarcy. */
private double testClassification() {
int correctClassCounter = 0;
int wrongClassCounter = 0;
int unclassifiedCounter = 0;
// Check if test data exists, if not return' 0'
if (testDataArray==null) {
System.out.println("WARNING: No test data");
return(0);
}
// Check if any classification rules have been generated, if not
// return'0'.
if (currentRlist.startCMARrulelist==null) {
System.out.println("No classification rules generated!");
return(0);
}
// Loop through test set
for(int index=0; index < testDataArray.length; index++) {
// Note: classifyRecord methods are contained in the
// AssocRuleMining class. To calssify without default use
// classifyRecord, with defualt use classifyRecordDefault.
short classResult = currentRlist.classifyRecordWCS(testDataArray[index]);
if (classResult==0) unclassifiedCounter++;
else {
short classActual = getLastElement(testDataArray[index]);
if (classResult == classActual) correctClassCounter++;
else wrongClassCounter++;
}
}
// Calculate abd return classification accuracy
double accuracy = ((double) correctClassCounter * 100.0 / (double) testDataArray.length);
System.out.println("Accuracy = " + twoDecPlaces(accuracy) + "%");
// Return
return(accuracy);
}
/** Tests the generated classification rules using test sets and return
percentage accuracy (version with full output).
@param the perecentage accuarcy. */
private double testClassificationWithOutput() {
int correctClassCounter = 0;
int wrongClassCounter = 0;
int unclassifiedCounter = 0;
// Check if test data exists, if not return' 0'
if (testDataArray==null) {
System.out.println("WARNING: No test data");
return(0);
}
// Check if any classification rules have been generated, if not
// return'0'.
if (currentRlist.startCMARrulelist==null) {
System.out.println("No classification rules generated!");
return(0);
}
// Loop through test set
for(int index=0; index < testDataArray.length; index++) {
// Note: classifyRecord methods are contained in the
// AssocRuleMining class. To calssify without default use
// classifyRecord, with defualt use classifyRecordDefault.
short classResult = currentRlist.classifyRecordWCS(testDataArray[index]);
if (classResult==0) unclassifiedCounter++;
else {
short classActual = getLastElement(testDataArray[index]);
if (classResult == classActual) correctClassCounter++;
else wrongClassCounter++;
}
}
// Calculate abd return classification accuracy
double accuracy = ((double) correctClassCounter*100.0/(double) testDataArray.length);
System.out.println("Correct classifications = " + correctClassCounter);
System.out.println("unclassified = " + unclassifiedCounter);
System.out.println("Wrong classifications = " + wrongClassCounter);
System.out.println("Number of test cases = " + testDataArray.length);
System.out.println("Accuracy = " + twoDecPlaces(accuracy) + "%");
// Return
return(accuracy);
}
}