/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Rule_Learning.ART;
import java.io.*;
import java.util.*;
import javax.swing.JFileChooser;
import keel.Dataset.Attributes;
/**
A Java implementation of the ART algorithm
@author Ines de la Torre Quesada (UJA)
@version 1.0 (28-02-2010)
*/
public class ART extends Algorithm
{
/** Root of the decomposition tree. */
Node root = new Node();
/** Total number of Nodes in the tree */
int NumberOfNodes;
/** Number of Leafs in the tree */
int NumberOfLeafs;
/** Maximum LHS itemset size*/
int MaxSize;
/** Minimum support threshold*/
double MinSupp = 0.1;
/** Minimum confidence threshold*/
double MinConf = 1;
/** Constructor.
*
* @param paramFile The parameters file.
*
*/
public ART( String paramFile )
{
boolean salir = false;
try {
// starts the time
long startTime = System.currentTimeMillis();
// Sets the options of the execution.
StreamTokenizer tokenizer = new StreamTokenizer( new BufferedReader( new FileReader( paramFile ) ) );
initTokenizer( tokenizer) ;
setOptions( tokenizer );
// Initializes the dataset.
modelDataset = new Dataset( modelFileName, true );
/*check if there are continous attributes*/
if(Attributes.hasRealAttributes() || Attributes.hasIntegerAttributes())
{
System.err.println("ART can only handle nominal attributes." );
//System.exit(-1);
salir = true;
}
if (!salir){
trainDataset = new Dataset( trainFileName, false );
testDataset = new Dataset( testFileName, false );
NumberOfNodes = 1; //La raiz es el primer nodo del arbol
NumberOfLeafs = 0;
// Executes the algorithm.
generateTree();
// Prints the results generates by the algorithm.
printTrain();
printTest();
printResult();
}
} catch ( Exception e ){
e.printStackTrace();
// System.exit(-1);
}
}
/** Function to read the options from the execution file and assign the values to the parameters.
*
* @param options The StreamTokenizer that reads the parameters file.
*
* @throws Exception If the format of the file is not correct.
*/
protected void setOptions( StreamTokenizer options ) throws Exception
{
options.nextToken();
// Checks that the file starts with the token algorithm.
if ( options.sval.equalsIgnoreCase( "algorithm" ) )
{
options.nextToken();
options.nextToken();
//if (!options.sval.equalsIgnoreCase( "ID3" ) )
// throw new Exception( "The name of the algorithm is not correct." );
options.nextToken();
options.nextToken();
//options.nextToken();
//options.nextToken();
// Reads the names of the input files.
if ( options.sval.equalsIgnoreCase( "inputData" ) )
{
options.nextToken();
options.nextToken();
modelFileName = options.sval;
if ( options.nextToken() != StreamTokenizer.TT_EOL )
{
trainFileName = options.sval;
options.nextToken();
testFileName = options.sval;
if( options.nextToken() != StreamTokenizer.TT_EOL )
{
trainFileName = modelFileName;
options.nextToken();
}
}
}
else
throw new Exception( "The file must start with the word inputData." );
while ( true )
{
if( options.nextToken() == StreamTokenizer.TT_EOF )
throw new Exception( "No output file provided." );
if ( options.sval == null )
continue;
else if ( options.sval.equalsIgnoreCase( "outputData" ) )
break;
}
/* Reads the names of the output files*/
options.nextToken();
options.nextToken();
trainOutputFileName = options.sval;
options.nextToken();
testOutputFileName = options.sval;
options.nextToken();
resultFileName = options.sval;
}
else
throw new Exception( "The file must start with the word algorithm followed of the name of the algorithm." );
}
/**
* Run the algorithm.
*
*/
public void generateTree(){
Vector data = new Vector(getItemsets());
MaxSize = trainDataset.numAttributes()-1;
root.setData(data);
art(data,root);
}
/**
* Funcion art que construye el �rbol
* @param data Vector que contiene los itemset a clasificar
* @param nodo Nodo del �rbol a expandir
*
*/
private void art(Vector data, Node nodo){
int k = 1;
double corte = MinConf - MinSupp;
TBAR tbar = new TBAR(MaxSize,MinSupp,data,trainDataset.attributes);
Vector<Vector<Rule>> conjuntos;
Vector<Rule> conjunto = new Vector(), c;
Vector<Node> children;
double confidence;
int maxSupp, supp = 0;
Vector datos;
//Mientras el tama�o del antecedente sea menor o igual al numero de
//atributos y el arbol este vacio
while(k <= MaxSize && nodo.getAttributes().size()==0){
//extraccion de reglas: tbar
conjuntos = tbar.ruleExtraction(k);
//Si existen reglas por las que ramificar
if(conjuntos.size()>0){
confidence = conjuntos.get(0).get(0).getConfidence();
if(confidence >= corte){ //Las reglas estan por encima del valor de corte
//Seleccion de reglas (como tienen la misma confianza, nos
//quedamos con el conjunto que abarque mas ejemplos de training)
maxSupp = 0;
for(int i=0; i<conjuntos.size(); i++){
supp = 0;
c = conjuntos.get(i);
for(int j = 0; j<c.size(); j++)
supp+=c.get(j).getSupport();
if(supp > maxSupp){
maxSupp = supp;
conjunto = c;
}
} //En conjunto ya tenemos las reglas para ramificar el arbol
//Ramificacion del arbol
nodo.setAttributes(conjunto.get(0).getAttributes());
nodo.setValues(null);
nodo.setSupport(data.size());
children = new Vector();
//para cada regla, crear un nuevo nodo hoja
for(int i=0; i<conjunto.size(); i++){
Node n = new Node();
n.setAttributes(null);
n.setValues(conjunto.get(i).getValues());
n.setClas(conjunto.get(i).getClas());
n.setSupport(conjunto.get(i).getSupport());
n.setParent(nodo);
children.add(n);
NumberOfNodes+=1;
}
nodo.setChildren(children);
NumberOfLeafs+=children.size();
//Eliminar de data los ejemplos cubiertos por las reglas
datos = uncoveredData(data, nodo);
nodo.setData(null);
//Subarbol para rama else
if(datos.size()>0){
//crear nodo hoja para rama else
Node n = new Node();
n.setParent(nodo);
n.setData(datos);
n.setSupport(datos.size());
children.add(n);
NumberOfNodes+=1;
art(datos,nodo.getChildren(nodo.numChildren()-1));
}
}else k++;
}else{
k++;
}
}
if(nodo.getAttributes().size()==0){ //Si no se ha construido arbol
int index = this.mostFrequentClass(data); //indice de la clase mas frecuente
//etiquetar con la clase mas frecuente
nodo.setClas(index);
nodo.setSupport(data.size());
}
}
/**
* Funcion que devuelve el indice de la clase mas frecuente
* @return indice de la clase mas frecuente
* @param data Los datos de los que hay que extraer la informacion
*/
private int mostFrequentClass(Vector<Itemset> data){
Attribute a = this.trainDataset.getClassAttribute();
int[] frequencies = new int[a.numValues()];
double index;
int max = 0;
int clas = -1;
for(int i=0; i<frequencies.length; i++){
frequencies[i] = 0;
}
for(int i=0; i<data.size(); i++){
index = data.get(i).getClassValue();
frequencies[(int)index]++;
}
for(int i=0; i<frequencies.length; i++){
if(frequencies[i]>max){
max = frequencies[i];
clas = i;
}
}
return clas;
}
/**
* Funcion que devuelve los datos no cubiertos por el nodo
* @return vector con los datos no cubiertos
* @param data Los datos que hay que determinar si estan cubiertos
* @param n Nodo recien expandido
*/
private Vector uncoveredData(Vector data, Node n){
Vector datos = new Vector();
Vector<Integer> ats;
Vector<Integer> vals;
Itemset item;
boolean enc;
int j;
ats = n.getAttributes();
for(int i=0; i<data.size(); i++){
item = (Itemset)data.get(i);
j = 0;
enc = false;
while(j<n.numChildren() && !enc){
vals = n.getChildren(j).getValues();
if(covered(ats,vals,item)){
n.getChildren(j).addData(item);
enc = true;
}
else j++;
}
if(!enc) //Si la regla no esta cubierta
datos.add(item);
}
return datos;
}
/**
* Funcion que determina si un itemset esta cubierto por unos valores de los atributos o no
* @return true si el itemset satisface todos los valores (false en caso contrario)
* @param ats Vector que contiene los atributos a evaluar
* @param vals Vector que contiene los valores de los atributos a evaluar
* @param item Itemset a evaluar
*
*
*/
private boolean covered(Vector<Integer> ats, Vector<Integer> vals, Itemset item){
boolean cover = true;
int i=0;
while(i<ats.size() && cover){
if(item.getValue(ats.get(i)) != vals.get(i)){
cover = false;
}else i++;
}
return cover;
}
/** Function to write the decision tree in the form of rules.
*
* @param node The current node.
* @param tab The indentation of the current rule.
*
* @return The tree in form of rules.
*/
public String writeTree( Node node, String tab ){
int outputattr = modelDataset.getClassIndex();
String cadena = "";
Attribute classAtt = modelDataset.getClassAttribute();
String attName = classAtt.name();
Vector<Integer> ats;
Vector<Integer> vals;
try{
// Print a leaf node.
if ( node.numChildren() == 0 ) {
String value = classAtt.value(node.getClas());
// Print a rule.
if(node.getParent()!=null){
ats = node.getParent().getAttributes();
vals = node.getValues();
if(vals.size() != 0){
cadena += tab + "if( ";
for(int i=0; i<ats.size(); i++){
cadena+=modelDataset.getAttribute(ats.get(i)).name() +
"==" + modelDataset.getAttribute(ats.get(i)).value(vals.get(i))+ " ) and (";
}
cadena = cadena.substring(0, cadena.length()-5);
cadena+= " then {\n";
}
}
cadena+= tab+ "\t" + attName + " = \"" + value + "\"\n";
return cadena;
}
for(int i=0; i<node.numChildren(); i++){
if(i==0){
cadena += writeTree( node.getChildren().get(i), tab);
}else{
cadena += writeTree( node.getChildren().get(i), tab + "\t" );
}
cadena += tab + "}else{\n";
}
cadena = cadena.substring(0, cadena.length()-7);
cadena+= "\n" + tab + "}";
return cadena;
}catch( Exception e ){
System.out.println( "Error writing tree" );
}
return cadena;
}
/** Function to evaluate the class which the itemset must have according to the classification of the tree.
*
* @param itemset The itemset to evaluate.
* @param node The node that is evaluated at this time.
*
* @return The index of the class index predicted.
*/
public int evaluateItemset( Itemset itemset, Node node ) {
int outputattr = modelDataset.getClassIndex();
boolean correct = false;
String aux = null;
Attribute classAtt = modelDataset.getClassAttribute();
try {
// if the node is a final leaf
if ( node.numChildren() == 0 ){
return node.getClas();
}
}catch ( Exception e){
return Integer.parseInt( aux.toString() );
}
// Evaluate the children of the node.
int i=0;
boolean enc = false;
while(i<node.numChildren()-1 && !enc){
if(this.covered(node.getAttributes(),node.getChildren(i).getValues(),itemset))
enc = true;
else i++;
}
if(enc)
return(evaluateItemset(itemset,node.getChildren(i)));
else
return(evaluateItemset(itemset,node.getChildren(node.numChildren()-1)));
}
/** Function to get all the itemsets of the dataset.
*
* @return The itemsets.
*/
private Vector getItemsets()
{
Vector itemsets = new Vector( modelDataset.numItemsets());
for ( int i = 0; i < modelDataset.numItemsets(); i++ )
itemsets.addElement( modelDataset.itemset( i ) );
return itemsets;
}
/** Writes the tree and the results of the training and the test in the file.
*
* @exception If the file cannot be written.
*/
public void printResult() throws IOException
{
long totalTime = ( System.currentTimeMillis() - startTime ) / 1000;
long seconds = totalTime % 60;
long minutes = ( ( totalTime - seconds ) % 3600 ) / 60;
String tree = "";
PrintWriter resultPrint;
tree += writeTree( root, "" );
tree += "\n@TotalNumberOfNodes " + NumberOfNodes;
tree += "\n@NumberOfLeafs " + NumberOfLeafs;
tree += "\n\n@NumberOfItemsetsTraining " + trainDataset.numItemsets();
tree += "\n@NumberOfCorrectlyClassifiedTraining " + correct;
tree += "\n@PercentageOfCorrectlyClassifiedTraining " + (float)(correct*100.0)/(float)trainDataset.numItemsets() + "%" ;
tree += "\n@NumberOfInCorrectlyClassifiedTraining " + (trainDataset.numItemsets()-correct);
tree += "\n@PercentageOfInCorrectlyClassifiedTraining " + (float)((trainDataset.numItemsets()-correct)*100.0)/(float)trainDataset.numItemsets() + "%" ;
tree += "\n\n@NumberOfItemsetsTest " + testDataset.numItemsets();
tree += "\n@NumberOfCorrectlyClassifiedTest " + testCorrect;
tree += "\n@PercentageOfCorrectlyClassifiedTest " + (float)(testCorrect*100.0)/(float)testDataset.numItemsets() + "%" ;
tree += "\n@NumberOfInCorrectlyClassifiedTest " + (testDataset.numItemsets()-testCorrect);
tree += "\n@PercentageOfInCorrectlyClassifiedTest " + (float)((testDataset.numItemsets()-testCorrect)*100.0)/(float)testDataset.numItemsets() + "%" ;
tree += "\n\n@ElapsedTime " + ( totalTime - minutes * 60 - seconds ) / 3600 + ":" + minutes / 60 + ":" + seconds;
resultPrint = new PrintWriter( new FileWriter ( resultFileName ) );
resultPrint.print( getHeader() + "\n@decisiontree\n\n" + tree );
resultPrint.close();
}
/**
* Evaluates the training dataset and writes the results in the file.
*
*/
public void printTrain(){
String text = getHeader();
for ( int i = 0; i < trainDataset.numItemsets(); i++ ){
try{
Itemset itemset = trainDataset.itemset( i );
int cl = evaluateItemset( itemset, root );
if ( cl == (int) itemset.getValue( trainDataset.getClassIndex() ) )
correct++;
text += trainDataset.getClassAttribute().value( cl ) + " " +
trainDataset.getClassAttribute().value( ( (int) itemset.getClassValue()) ) + "\n";
}
catch ( Exception e ){
System.err.println( e.getMessage() );
}
}
try{
PrintWriter print = new PrintWriter( new FileWriter ( trainOutputFileName ) );
print.print( text );
print.close();
}catch ( IOException e ){
System.err.println( "Can not open the training output file: " + e.getMessage() );
}
}
/** Evaluates the test dataset and writes the results in the file.
*
*/
public void printTest(){
String text = getHeader();
for ( int i = 0; i < testDataset.numItemsets(); i++){
try{
int cl = (int) evaluateItemset( testDataset.itemset( i ), root );
Itemset itemset = testDataset.itemset( i );
if ( cl == (int) itemset.getValue( testDataset.getClassIndex() ) )
testCorrect++;
text += testDataset.getClassAttribute().value( ( (int) itemset.getClassValue()) ) + " " +
testDataset.getClassAttribute().value( cl )+ "\n";
}
catch ( Exception e ){
System.err.println( e.getMessage());
}
}
try{
PrintWriter print = new PrintWriter( new FileWriter ( testOutputFileName ) );
print.print( text );
print.close();
}catch ( IOException e ){
System.err.println( "Can not open the training output file." );
}
}
/** Main function.
*
* @param args The parameters file.
*/
public static void main(String[] args) {
if ( args.length != 1){
System.err.println("\nError: you have to specify the parameters file\n\tusage: java -jar ART.jar parameterfile.txt" );
System.exit(-1);
}else{
ART art = new ART(args[0]);
}
}
}//art