/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.UnsupervisedLearning.AssociationRules.FuzzyRuleLearning.Fingrams;
/**
* <p>
* @author Written by Alvaro Lopez
* @version 1.0
* @since JDK1.6
* </p>
*/
import java.io.*;
import java.util.*;
import org.core.*;
public class FingramsProcess {
/**
* <p>
* It provides the implementation of the algorithm to be run in a process
* </p>
*/
private myDataset dataset;
private DataBase database;
private ArrayList<Itemset> outputVariables;
/**
* <p>
* It creates a new process for the algorithm by setting up its parameters
* </p>
*
* @param dataset
* The instance of the dataset for dealing with its records
* @param nEvaluations
* The maximum number of evaluations to reach before stopping the
* genetic learning
* @param popSize
* The maximum size of population to handle after each generation
* @param nBitsGene
* The number of bit digits for encoding a displacement within a
* gene
* @param phi
* It represents the value used for decreasing the "L" threshold
* (CURRENTLY NOT USED)
* @param d
* It indicates the value for controlling the Parent Centric BLX
* crossover
* @param nFuzzyRegionsForNumericAttributes
* The number of fuzzy regions with which numeric attributes are
* evaluated
* @param useMaxForOneFrequentItemsets
* It indicates whether the max operator must be used while
* discovering 1-Frequent Itemsets
* @param minSupport
* The user-specified minimum support for the mined association
* rules
* @param minConfidence
* The user-specified minimum confidence for the mined
* association rules
*/
public FingramsProcess(myDataset dataset, DataBase database) {
this.dataset = dataset;
this.database = database;
this.outputVariables = new ArrayList<Itemset>();
}
/**
* <p>
* It runs the algorithm for mining association rules
* </p>
*/
public int generateFile(String ruleBaseFile, double blankThreshold,
String fingramsFile, double minimumLift) {
int variable, value, nRules;
boolean first;
Itemset antecedent, consequent, rule;
Item aux;
String stringRules = new String("");
String dataAnt, dataCons, dataRule, text, line;
StringTokenizer data;
File file = null;
FileReader fr = null;
BufferedReader br = null;
this.outputVariables.clear();
PrintWriter fingrams_writer;
try {
fingrams_writer = new PrintWriter(fingramsFile);
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
return -1;
}
for (int i = 0; i < dataset.getnVars(); i++)
this.outputVariables.add(new Itemset());
nRules = 0;
int numberMinimumLiftRules=0;
try {
// Apertura del fichero y creacion de BufferedReader para poder
// hacer una lectura comoda (disponer del metodo readLine()).
file = new File(ruleBaseFile);
fr = new FileReader(file);
br = new BufferedReader(fr);
// Lectura del fichero
br.readLine();
br.readLine();
// ///////////////////
while ((line = br.readLine()) != null) {
data = new StringTokenizer(line, " = \" ");
if (data.nextToken().equalsIgnoreCase("<rule")) {
nRules++;
antecedent = new Itemset();
br.readLine();
data = new StringTokenizer(br.readLine(), " = \" ");
first = true;
while (data.nextToken().equalsIgnoreCase("<attribute")) {
data.nextToken();
text = data.nextToken();
variable = this.dataset.posVariable(text);
data.nextToken();
text = data.nextToken();
value = this.database.posValue(variable, text);
antecedent.add(new Item(variable, value));
data = new StringTokenizer(br.readLine(), " = \" ");
}
br.readLine();
data = new StringTokenizer(br.readLine(), " = \" ");
consequent = new Itemset();
variable=0;
while (data.nextToken().equalsIgnoreCase("<attribute")) {
data.nextToken();
text = data.nextToken();
variable = this.dataset.posVariable(text);
data.nextToken();
text = data.nextToken();
value = this.database.posValue(variable, text);
consequent.add(new Item(variable, value));
//this.outputVariables.get(variable).addNew(new Item(variable, value));
data = new StringTokenizer(br.readLine(), " = \" ");
}
rule = new Itemset();
rule.addItemset(antecedent);
rule.addItemset(consequent);
dataRule = rule.calculateSupport(dataset, database,
blankThreshold);
dataAnt = antecedent.calculateSupport(dataset, database,
blankThreshold);
dataCons = consequent.calculateSupport(dataset, database,
blankThreshold);
double actualLift = (rule.getSupport() / antecedent
.getSupport()) / consequent.getSupport();
if (actualLift >= minimumLift) {
numberMinimumLiftRules++;
this.outputVariables.get(variable).addNew(consequent.get(0));
}
br.readLine();
}
}
System.err.println("Number of initial rules: " + (nRules));
String outputFile = new String("Association\n" + this.outputLine()
+ "\nBlank threshold: " + blankThreshold + "\n\nRules: "
+ numberMinimumLiftRules + "\nExamples: "+dataset.getnTrans() + "\n\n");
fingrams_writer.print("" + outputFile);
fingrams_writer.flush();
// //////////////////////*/
file = new File(ruleBaseFile);
fr = new FileReader(file);
br = new BufferedReader(fr);
// Lectura del fichero
br.readLine();
br.readLine();
nRules = 0;
while ((line = br.readLine()) != null) {
stringRules = "";
data = new StringTokenizer(line, " = \" ");
if (data.nextToken().equalsIgnoreCase("<rule")) {
System.err.print("Rule: " + (nRules));
stringRules = stringRules + "Rule" + nRules + ": IF";
nRules++;
antecedent = new Itemset();
br.readLine();
data = new StringTokenizer(br.readLine(), " = \" ");
first = true;
while (data.nextToken().equalsIgnoreCase("<attribute")) {
data.nextToken();
text = data.nextToken();
if (first) {
stringRules = stringRules + " " + text + " is ";
first = false;
} else
stringRules = stringRules + " AND " + text + " is ";
variable = this.dataset.posVariable(text);
data.nextToken();
text = data.nextToken();
stringRules = stringRules + text;
value = this.database.posValue(variable, text);
antecedent.add(new Item(variable, value));
data = new StringTokenizer(br.readLine(), " = \" ");
}
stringRules = stringRules + " THEN";
consequent = new Itemset();
br.readLine();
data = new StringTokenizer(br.readLine(), " = \" ");
first = true;
while (data.nextToken().equalsIgnoreCase("<attribute")) {
data.nextToken();
text = data.nextToken();
if (first) {
stringRules = stringRules + " " + text + " is ";
first = false;
} else
stringRules = stringRules + " AND " + text + " is ";
variable = this.dataset.posVariable(text);
data.nextToken();
text = data.nextToken();
stringRules = stringRules + text;
value = this.database.posValue(variable, text);
consequent.add(new Item(variable, value));
this.outputVariables.get(variable).addNew(
new Item(variable, value));
data = new StringTokenizer(br.readLine(), " = \" ");
}
stringRules = stringRules + "\n";
rule = new Itemset();
rule.addItemset(antecedent);
rule.addItemset(consequent);
dataRule = rule.calculateSupport(dataset, database,
blankThreshold);
dataAnt = antecedent.calculateSupport(dataset, database,
blankThreshold);
dataCons = consequent.calculateSupport(dataset, database,
blankThreshold);
stringRules += "Rule Support => ("
+ (rule.getSupport() * dataset.getnTrans()) + ") ("
+ rule.getSupport() + ") => ";
if (!dataRule.equalsIgnoreCase(""))
stringRules += dataRule + "\n";
else
stringRules += "There are no items \n";
stringRules += "Antecedent Support => ("
+ (antecedent.getSupport() * dataset.getnTrans())
+ ") (" + antecedent.getSupport() + ") => ";
if (!dataAnt.equalsIgnoreCase(""))
stringRules += dataAnt + "\n";
else
stringRules += "There are no items \n";
stringRules += "Consequent Support => ("
+ (consequent.getSupport() * dataset.getnTrans())
+ ") (" + consequent.getSupport() + ") => ";
if (!dataCons.equalsIgnoreCase(""))
stringRules += dataCons + "\n";
else
stringRules += "There are no items \n";
double actualLift = (rule.getSupport() / antecedent
.getSupport()) / consequent.getSupport();
System.err.println("; Lift: "+actualLift);
if (actualLift >= minimumLift) {
// fingrams_writer = new PrintWriter(fingramsFile);
fingrams_writer.print(stringRules + "\n");
fingrams_writer.flush();
}
br.readLine();
}
}
} catch (Exception e) {
e.printStackTrace();
return -1;
} finally {
fingrams_writer.flush();
fingrams_writer.close();
System.err.println("Number of final rules with lift higher than "+minimumLift+": "+numberMinimumLiftRules);
System.err.println("File succesfully writed");
System.err.flush();
// En el finally cerramos el fichero, para asegurarnos
// que se cierra tanto si todo va bien como si salta
// una excepcion.
try {
if (null != fr) {
fr.close();
}
} catch (Exception e2) {
e2.printStackTrace();
return -1;
}
}
return 0;
}
public String outputLine() {
Itemset itemset;
Item item;
String[] nameVariables = this.dataset.names();
String line = "";
for (int i = 0; i < dataset.getnVars(); i++) {
itemset = this.outputVariables.get(i);
if (itemset.size() > 0) {
line = line + nameVariables[i] + "(";
for (int j = 0; j < itemset.size(); j++) {
item = itemset.get(j);
line = line
+ this.database.print(item.getVariable(),
item.getValue());
if (j < (itemset.size() - 1))
line = line + ",";
}
line = line + ");";
}
}
if (line==""){
line="---";
}
return (line+"\n");
}
}