/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Decision_Trees.PUBLIC;
import java.util.StringTokenizer;
import java.util.ArrayList;
import java.util.ArrayDeque;
import java.io.*;
import org.core.Fichero;
/**
*
* File: PUBLIC.java
*
* The PUBLIC algorithm builds a decision tree model integrating the steps of
* building and pruning in one phase, and after the model is built, the
* classification is done according to that model.
*
* The params that can be used with this algorithm are basically two:
* n, the number of nodes expanded between the pruning method is called
* PUBLIC(1), PUBLIC(S), PUBLIC(V) are the variants of the algorithm depending
* on the estimation on the lower bound for the subtree
*
* @author Written by Victoria Lopez Morales (University of Granada) 13/03/2009
* @version 0.1
* @since JDK1.5
*/
public class PUBLIC {
// Tree
/**
* Nodes of the tree built with the PUBLIC algorithm with minimal information
*/
private TreeNode root;
/**
* Nodes of the tree built with the PUBLIC algorithm with complete information (including datasets)
*/
private ArrayList <Node> all_nodes;
/**
* Queue used during the building of the tree, needed for building and pruning
*/
private ArrayDeque <Node> queue;
// Files
/**
* Array of files that include the name of the output file for train, test and other output
*/
private String outFile[];
/**
* Name of the file that contains the test instances
*/
private String testFile;
/**
* Name of the file that contains the original train instances
*/
private String trainFile;
/**
* Name of the file that contains the reference instantes (current train instances)
*/
private String referenceFile;
// Datasets
/**
* Dataset containing all the test instances
*/
private myDataset testDataset;
/**
* Dataset containing all the original train instances
*/
private myDataset trainDataset;
/**
* Dataset containing all the reference instances (current train instances)
*/
private myDataset referenceDataset;
// Timing
/**
* Number used to store the time of the beginning of the algorithm
*/
private long initialTime;
/**
* Seconds used to classify all the training instances
*/
private double classificationTrainTime;
/**
* Seconds used to classify all the test instances
*/
private double classificationTestTime;
/**
* Seconds used to build the tree
*/
private double buildingTime;
// Classified
/**
* Number of correctly classified train instances
*/
private int correctTrain;
/**
* Number of incorrectly classified train instances
*/
private int failTrain;
/**
* Number of correctly classified test instances
*/
private int correctTest;
/**
* Number of incorrectly classified test instances
*/
private int failTest;
// Other parameters
/**
* User parameter: number of nodes that have to be proccessed between the call to the prune procedure
*/
private int nodesBetweenPrune;
/**
* User parameter: kind of prune estimation used in the prune procedure
*/
private char publicPruneEstimation;
/**
* Creates a PUBLIC instance by reading the script file that contains all the information needed
* for running the algorithm
*
* @param script The configuration script which contains the parameters of the algorithm
*/
public PUBLIC (String script) {
// We start time-counting
initialTime = System.currentTimeMillis();
// Read of the script file
readConfiguration(script); // Names of the input and output files
readParameters(script); // Parameters for the PUBLIC algorithm
// Reading datasets
try {
trainDataset = new myDataset(trainFile, 1);
testDataset = new myDataset(testFile, 3);
referenceDataset = new myDataset(referenceFile, 2);
} catch (Exception e) {
System.err.println(e);
System.exit(1);
}
// Start building the tree
buildTree();
}
/**
* This method builds the tree with all the data stored in the class, in a process that integrates
* in one the building and the pruning phases, obtaining finally a tree that can be used to
* classify different instances
*/
private void buildTree () {
Node auxnode;
TreeNode auxtreenode;
ArrayList <Node> nodes;
Split best_split;
int numnodes, numnodesproccessed;
long buildTime;
all_nodes = new ArrayList <Node> ();
// (1) Initialize root node using data set S
System.out.println("\nInitializing root node");
buildTime = System.currentTimeMillis();
auxnode = initializeRootNode ();
System.out.println("Root node initialized");
numnodes = 1;
numnodesproccessed = 0;
// (2) Initialize queue Q to contain root node
queue = new ArrayDeque <Node>();
queue.add(auxnode);
all_nodes.add(auxnode);
// (3) While Q is not empty do
while (!queue.isEmpty()) {
// (4) Dequeue the first node N in Q
auxnode = queue.poll();
System.out.println("\nBeginning node processing...");
// (5) if N is not pure
if (!auxnode.isPure()) {
// (6) for each attribute A
// (7) Evaluate splits on attribute A
best_split = auxnode.evaluateAllSplits();
// (8) Use best split to split node N into N1 and N2
nodes = auxnode.split(best_split, numnodes);
if (nodes == null) {
// The split cannot be done
auxtreenode = root.getNode(auxnode.getIdentifier());
auxtreenode.setLeaf(true);
auxtreenode.setOutputClass(auxnode.getMajorOutputClass());
numnodesproccessed++;
}
else {
// Arrange the tree to this information
auxtreenode = root.getNode(auxnode.getIdentifier());
auxtreenode.setLeft(new TreeNode(numnodes+1, null, null, false, -1, null));
auxtreenode.setRight(new TreeNode(numnodes+2, null, null, false, -1, null));
auxtreenode.setCondition(new Split(best_split));
// (9) Append N1 and N2 to Q
queue.add((Node)nodes.get(0));
queue.add((Node)nodes.get(1));
all_nodes.add((Node)nodes.get(0));
all_nodes.add((Node)nodes.get(1));
numnodes += 2;
numnodesproccessed++;
}
}
else {
// This node is pure, we set it as a leaf node
auxtreenode = root.getNode(auxnode.getIdentifier());
auxtreenode.setLeaf(true);
auxtreenode.setOutputClass(auxnode.getOutputClass());
numnodesproccessed++;
}
if (numnodesproccessed%nodesBetweenPrune == 0) {
// Start pruning
System.out.println("\nBeginning pruning...");
computeCostPrunePublic(root);
System.out.println("Pruning phase finished!");
}
}
// Before finishing the tree, make sure that the tree is perfectly pruned
System.out.println("\nBeginning final prune...");
computeCostPrunePublic(root);
System.out.println("Last prune finished!");
// Check the time spent during the tree building
buildingTime = (double)(System.currentTimeMillis()-buildTime)/1000.0;
System.out.println("\nBuilding of the tree finished!!");
System.out.println(numnodes + " nodes generated");
}
/**
* This method performs the classification for all the instances: the train and the test sets
*/
public void execute () {
System.out.println();
System.out.println("Beginning classification...");
System.out.println();
// Classify the train set
print(referenceDataset, outFile[0], 0);
// Classify the test set
print(testDataset, outFile[1], 1);
// Print other results like the performance of the algorithm and the tree
printResults(trainDataset, outFile[2]);
System.out.println("Classification FINISHED!!");
System.out.println();
System.out.println(getStatistical());
} // end-method
/**
* Reads the configuration script, and extracts its contents.
*
* @param script Name of the configuration script
*/
protected void readConfiguration (String script) {
String fichero, linea, token;
StringTokenizer lineasFichero, tokens;
byte line[];
int i, j;
outFile = new String[3];
fichero = Fichero.leeFichero (script);
lineasFichero = new StringTokenizer (fichero,"\n\r");
lineasFichero.nextToken();
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
token = tokens.nextToken();
// Getting the names of training and test files
// reference file will be used as comparison
line = token.getBytes();
for (i=0; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
trainFile = new String (line,i,j-i);
for (i=j+1; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
referenceFile = new String (line,i,j-i);
for (i=j+1; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
testFile = new String (line,i,j-i);
//Getting the path and base name of the results files
linea = lineasFichero.nextToken();
tokens = new StringTokenizer (linea, "=");
tokens.nextToken();
token = tokens.nextToken();
//Getting the names of output files
line = token.getBytes();
for (i=0; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
outFile[0] = new String (line,i,j-i);
for (i=j+1; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
outFile[1] = new String (line,i,j-i);
for (i=j+1; line[i]!='\"'; i++);
i++;
for (j=i; line[j]!='\"'; j++);
outFile[2] = new String (line,i,j-i);
} //end-method
/**
* Reads the configuration script, to extract the parameter's values
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Fichero.leeFichero (script);
fileLines = new StringTokenizer (file,"\n\r");
// Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//fileLines.nextToken();
// Getting the number of nodes generated between prune phases
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
nodesBetweenPrune = Integer.parseInt(tokens.nextToken().substring(1));
if (nodesBetweenPrune < 1) {
System.err.println("Error: The minimum number of nodes that are generated between prunes is 1");
System.exit(-1);
}
// Getting the version of the PUBLIC algorithm
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
publicPruneEstimation = tokens.nextToken().substring(1).charAt(7);
if ((publicPruneEstimation != '1') && (publicPruneEstimation != 'S') && (publicPruneEstimation != 'V')) {
System.err.println("Error: The different ways to estimate the cost of the tree for pruning are PUBLIC(1), PUBLIC(S) or PUBLIC(V)");
System.exit(-1);
}
} //end-method
/**
* Initializes the root node of the tree from the current train dataset
*
* @return the root node of the tree
*/
private Node initializeRootNode () {
Node auxnode;
// Create a node with the whole dataset
auxnode = new Node (trainDataset, 1);
// Mark the node created as the root of the tree
root = new TreeNode (1, null, null, false, -1, null);
return auxnode;
}
/**
* Computes the estimated cost for the tree and prunes it accordingly to the MDL principle
*
* @param node Tree node from which we are going to calculate the cost and prune
* @return the estimated cost of encoding the tree
*/
private double computeCostPrunePublic (TreeNode node) {
double minCost1, minCost2, minCostN, aux, costV1, costV2;
Node aux_node = null;
boolean found = false;
// Get the corresponding node
for (int i=0; i<all_nodes.size() && !found; i++) {
aux_node = (Node)all_nodes.get(i);
if (aux_node.getIdentifier() == node.getIdentifier()) {
found = true;
}
}
if ((node.getLeft() == null)&&(node.getRight() == null)&&(node.isLeaf() == false)) {
// This is a "yet to be expanded" leaf, get its lower bound cost
switch (publicPruneEstimation) {
case '1':
return 1;
case 'S':
return computeMinCostS(aux_node);
case 'V':
costV1 = computeMinCostV(aux_node);
costV2 = computeMinCostV2(aux_node);
if (costV1 < costV2)
return costV1;
else
return costV2;
default:
System.err.println("The prune estimation selected isn't correct");
System.exit(-1);
break;
}
}
// Check if the tree is correctly built
if (((node.getLeft() == null) && (node.getRight() != null)) || ((node.getLeft() != null) && (node.getRight() == null))) {
System.err.println("The node " + node.getIdentifier() + " is badly built");
System.exit(-1);
}
if (node.isLeaf()) {
// This is a "pruned" or "not expandable" leaf
return (C(aux_node) + 1);
}
minCost1 = computeCostPrunePublic(node.getLeft());
minCost2 = computeCostPrunePublic(node.getRight());
minCostN = C_split(node, aux_node) + minCost1 + minCost2;
aux = C(aux_node) + 1;
if (aux < minCostN)
minCostN = aux;
if (minCostN == aux) {
ArrayList <Integer> nodesToRemove;
boolean removed;
System.out.println("Node " + node.getIdentifier() + " and its children are pruned");
// Prune child nodes N1 and N2 from tree
// Delete nodes N1 and N2 and all their descendants from Q
nodesToRemove = node.deleteDescendants(node.getIdentifier());
for (int i=0; i<nodesToRemove.size(); i++) {
removed = false;
for (int j=0; j<all_nodes.size() && !removed; j++) {
if (all_nodes.get(j).getIdentifier() == nodesToRemove.get(i)) {
queue.remove((Node)all_nodes.get(j));
all_nodes.remove(j);
removed = true;
}
}
}
// Mark node N as pruned
node.setLeaf(true);
node.setOutputClass(aux_node.getMajorOutputClass());
}
return minCostN;
}
/**
* Computes the cost of encoding data records used to estimate the cost of the tree
*
* @param aux_node Node to which we are going to compute the cost of encoding data records
* @return cost of encoding data records in the corresponding node
*/
private double C(Node aux_node) {
double cost = 0;
int ni;
// Calculate the first member of the cost
for (int i=0; i<trainDataset.getNumClasses(); i++) {
ni = aux_node.getNumItemsClassI(i);
if (ni != 0)
cost += (double) ni * (Math.log((double)aux_node.getNumRegisters()/(double)ni)/Math.log(2.0));
}
// Calculate the second member of the cost
cost += (((double)(aux_node.getNumClasses()-1))/2.0) * (Math.log((double)aux_node.getNumRegisters()/2.0)/Math.log(2.0));
// Calculate the third member of the cost
cost += (Math.log((double)Math.pow(Math.PI,(double)aux_node.getNumClasses()/2.0)/(double)gamma(aux_node.getNumClasses(), 2))/Math.log(2.0));
return cost;
}
/**
* Computes the gamma function for a fractional number
*
* @param dividend Dividend of the fractional number for which we are computing the gamma function
* @param divisor Divisor of the fractional number for which we are computing the gamma function
* @return the gamma value of the fractional number given
*/
private double gamma (int dividend, int divisor) {
double gamma;
if (divisor == 2) {
if ((dividend%2) == 0) {
// The number is divisible
gamma = factorial((dividend/divisor)-1);
}
else {
// The number is not divisible
if (dividend != 1)
gamma = Math.sqrt(Math.PI) * ((double)double_factorial(dividend-2)/Math.pow(2.0,(dividend-1)/2.0));
else
gamma = Math.sqrt(Math.PI);
}
return gamma;
}
else {
System.err.println("This gamma function only computes integers or numbers divided by two");
System.exit(-1);
return 0.0;
}
}
/**
* Computes the factorial of a number
*
* @param x Number to which we are computing the factorial
* @return the factorial of the number
*/
private int factorial (int x) {
int aux;
aux=1;
if (x==0)
aux=1;
else
aux = aux * factorial(x-1);
return aux;
}
/**
* Computes the double factorial of a number
*
* @param x Number to which we are computing the factorial
* @return the double factorial of the number
*/
private int double_factorial (int x) {
int aux;
aux = 1;
if (x == 1) {
aux = 1;
}
else {
aux = aux * double_factorial(x-2);
}
return aux;
}
/**
* Computes the cost of the encoding of splitting a node
*
* @param node Node of the tree with minimal information
* @param aux_node Node of the tree with complete information
* @return Cost of encoding a split for the node
*/
private double C_split (TreeNode node, Node aux_node) {
double cost;
cost = Math.log((double)trainDataset.getNumAtr())/Math.log(2.0);
if (trainDataset.getAttributes().get(node.getCondition().getAttribute()).isNominal()) {
// The attribute is categorical
cost += Math.log((Math.pow(2.0, (double)trainDataset.getAttributes().get(node.getCondition().getAttribute()).getValues().size()))-2.0)/Math.log(2.0);
}
else {
// The attribute is nominal
int aux;
aux = aux_node.getDifferentValuesAttributeI(node.getCondition().getAttribute());
cost += Math.log(((double)aux)-1)/Math.log(2.0);
}
return cost;
}
/**
* Computes a lower bound of the cost for the node based in the posibility of a split
*
* @param N node for which we are estimating a cost
* @return lower bound of the cost for the node
*/
private double computeMinCostS (Node N) {
double aux, tmpCost;
int s;
ArrayList<ArrayList<Integer>> ni;
// Obtain a list with n1,...,nk in decreasing order
ni = N.getDecreasedNI();
if (ni.get(0).size() == 1) {
// if k = 1 return (C(S) + 1)
return (C(N) + 1);
}
s = 0;
tmpCost = 2 * s + 1 + s * (Math.log((double)trainDataset.getNumAtr())/Math.log(2.0));
for (int i=s+2; i < ni.get(0).size(); i++) {
tmpCost += ni.get(1).get(i);
}
while (((s + 1) < (ni.get(0).size()-1)) && (ni.get(1).get(s+2) > (2 + (Math.log((double)trainDataset.getNumAtr())/Math.log(2.0))))) {
tmpCost = tmpCost + 2 + (Math.log((double)trainDataset.getNumAtr())/Math.log(2.0)) - ni.get(1).get(s+2);
s++;
}
aux = C(N) + 1;
if (tmpCost < aux)
aux = tmpCost;
return aux;
}
/**
* Computes a lower bound of the cost for the node based in the posibility of a split with aditional
* information
*
* @param N node for which we are estimating a cost
* @return lower bound of the cost for the node
*/
private double computeMinCostV (Node N) {
double aux, max, tmpCost, minCost, auxCost;
int k, s;
ArrayList <ArrayList <Integer>> ni;
k = N.getNumClasses();
if (k == 1) {
// if k = 1 return (C(S) + 1)
return (C(N) + 1);
}
// Obtain a list with the k classes in decreasing order of ni - V(Si)
ni = N.getDecreasedNIV();
s = 1;
tmpCost = 1;
for (int i=0; i < k; i++) {
tmpCost += ni.get(1).get(i);
}
minCost = tmpCost;
while (s <= k) {
tmpCost = tmpCost + 2 + (Math.log((double)trainDataset.getNumAtr())/Math.log(2.0)) - (ni.get(1).get(s-1) - N.V(s-1));
max = 0;
for (int w=s+1; w<=k; w++) {
if (ni.get(1).get(w-1) > max) {
max = ni.get(1).get(w-1);
}
}
auxCost = tmpCost - max;
if (auxCost < minCost) {
minCost = auxCost;
}
s++;
}
aux = C(N) + 1;
if (minCost < aux)
aux = minCost;
return aux;
}
/**
* Computes a lower bound of the cost for the node based in the posibility of a split with aditional
* information in a different way that computeMinCostV2
*
* @param N node for which we are estimating a cost
* @return lower bound of the cost for the node
*/
private double computeMinCostV2 (Node N) {
double aux, tmpCost, minCost, auxCost;
int k, s;
ArrayList <ArrayList <Integer>> ni;
int [] B;
k = N.getNumClasses();
if (k == 1) {
// if k = 1 return (C(S) + 1)
return (C(N) + 1);
}
B = new int [2*k];
ni = N.getDecreasedNI();
// Initialize B
for (int i=0; i<k; i++) {
B[2*i+1] = (int)N.V(i);
B[2*i] = ni.get(1).get(i) - B[2*i+1];
}
// Sort array B in decreasing order of B[i]
for(int i=0; i<2*k -1; i++){
int current = B[i];
int w=i;
for(int j=i+1; j<B.length;j++){
if(current < B[j]){
w = j;
current = B[j];
}
}
B[w] = B[i];
B[i] = current;
}
s = 0;
tmpCost = 1;
for (int i=0; i < k; i++) {
tmpCost += ni.get(1).get(i);
}
minCost = tmpCost;
while (s < (2*k-1)) {
tmpCost = tmpCost + 2 + (Math.log((double)trainDataset.getNumAtr())/Math.log(2.0)) - B[s];
auxCost = tmpCost - B[s+1];
if (auxCost < minCost) {
minCost = auxCost;
}
s++;
}
aux = C(N) + 1;
if (minCost < aux)
aux = minCost;
return aux;
}
/**
* Gets the general information about the dataset in a string form
*
* @param dat Dataset from which we are obtaining the general information
* @return a string with the general information about the dataset
*/
private String getHeader (myDataset dat) {
String header;
ArrayList <myAttribute> attributes;
myAttribute output;
attributes = dat.getAttributes();
output = dat.getOutputAttribute();
// Get information about the dataset and the attributes
header = "@relation " + dat.getName() + "\n";
for (int i=0; i<attributes.size(); i++) {
switch (attributes.get(i).getAttributeType()) {
case 1: header += "@attribute " + attributes.get(i).getName() + " integer[" + (int)attributes.get(i).getMin() + "," + (int)attributes.get(i).getMax() + "]\n";
break;
case 2: header += "@attribute " + attributes.get(i).getName() + " real[" + attributes.get(i).getMin() + "," + attributes.get(i).getMax() + "]\n";
break;
case 3: header += "@attribute " + attributes.get(i).getName() + " {";
for (int j=0; j<attributes.get(i).getValues().size()-1; j++) {
header += attributes.get(i).getValue(j) + ",";
}
header += attributes.get(i).getValue(attributes.get(i).getValues().size()-1) + "}\n";
break;
}
}
// Get information about the output attribute
switch (output.getAttributeType()) {
case 1: header += "@attribute " + output.getName() + " integer[" + (int)output.getMin() + "," + (int)output.getMax() + "]\n";
break;
case 2: header += "@attribute " + output.getName() + " real[" + output.getMin() + "," + output.getMax() + "]\n";
break;
case 3: header += "@attribute " + output.getName() + " {";
for (int j=0; j<output.getValues().size()-1; j++) {
header += output.getValue(j) + ",";
}
header += output.getValue(output.getValues().size()-1) + "}\n";
break;
}
return header;
}
/**
* Classifies a given item with the information stored in the tree
*
* @param item Data attribute values for the item we are classifying
* @param atts Attributes in the data set that are used for building the tree and describing the
* instance given
* @return the class asigned to the item given
*/
public int evaluateItem (double [] item, ArrayList <myAttribute> atts) {
return root.evaluate(item, atts);
}
/**
* Prints in a file the result of the classification made with the tree generated by the PUBLIC
* algorithm. This can be done over the train set or the test set.
*
* @param data Dataset that we are classifying
* @param filename Name of the file that is going to store the results
* @param type 0 if we are working with a train set, 1 if we are working with a test set
*/
public void print (myDataset data, String filename, int type) {
String text = getHeader(data);
double item[];
int correct, fail;
long time;
text += "@data\n";
item = new double[data.getNumAtr()];
correct = 0;
fail = 0;
// Check the time spent
time = System.currentTimeMillis();
for (int i = 0; i < data.getNumIns(); i++) {
// Evaluate all the instances
try {
item = data.getDataItem (i);
int cl = (int) evaluateItem(item, data.getAttributes());
if (cl == (int) data.getOutputI(i)) {
correct++;
}
else {
fail++;
}
text += data.getOutputAttribute().getValue((int)data.getOutputI(i)) + " " + data.getOutputAttribute().getValue(cl)+ "\n";
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
// Print the corresponding results
if (type == 0) {
classificationTrainTime = (double)(System.currentTimeMillis()-time)/1000.0;
correctTrain = correct;
failTrain = fail;
}
else if (type == 1) {
classificationTestTime = (double)(System.currentTimeMillis()-time)/1000.0;
correctTest = correct;
failTest = fail;
}
else {
System.err.println("Wrong dataset for printing results");
System.exit(-1);
}
try {
PrintWriter print = new PrintWriter(new FileWriter(filename));
print.print(text);
print.close();
} catch (IOException e) {
System.err.println("Can not open the output file " + filename + ": " + e.getMessage());
}
}
/**
* Gets the general information about the performance of the algorithm. This information includes
* the number of nodes and leafs of the tree, the performance in training and test and the time
* spent in the operations.
*
* @return a string with all the important information about the performance of the algorithm
*/
private String getStatistical () {
String text = "";
text = text + "@TotalNumberOfNodes " + root.getNumNodes() + "\n";
text = text + "@NumberOfLeafs " + root.getLeafs() + "\n\n";
text = text + "@NumberOfItemsetsTraining " + referenceDataset.getNumIns() + "\n";
text = text + "@NumberOfCorrectlyClassifiedTraining " + correctTrain + "\n";
text = text + "@PercentageOfCorrectlyClassifiedTraining " + ((double)correctTrain*100.0/(double)referenceDataset.getNumIns()) + "%\n";
text = text + "@NumberOfIncorrectlyClassifiedTraining " + failTrain + "\n";
text = text + "@PercentageOfIncorrectlyClassifiedTraining " + ((double)failTrain*100.0/(double)referenceDataset.getNumIns()) + "%\n\n";
text = text + "@NumberOfItemsetsTest " + testDataset.getNumIns() + "\n";
text = text + "@NumberOfCorrectlyClassifiedTest " + correctTest + "\n";
text = text + "@PercentageOfCorrectlyClassifiedTest " + ((double)correctTest*100.0/(double)testDataset.getNumIns()) + "%\n";
text = text + "@NumberOfIncorrectlyClassifiedTest " + failTest + "\n";
text = text + "@PercentageOfIncorrectlyClassifiedTest " + ((double)failTest*100.0/(double)testDataset.getNumIns()) + "%\n\n";
text = text + "@TotalElapsedTime " + (double)(System.currentTimeMillis()-initialTime)/1000.0 + "s\n";
text = text + "@BuildingElapsedTime " + buildingTime + "s\n";
text = text + "@ClassificationTrainElapsedTime " + classificationTrainTime + "s\n";
text = text + "@ClassificationTestElapsedTime " + classificationTestTime + "s\n";
return text;
}
/**
* Prints in a file the result of the classification made with the tree generated by the PUBLIC
* algorithm, this means, the tree itself and the general information about it
*
* @param data Dataset that we are working with
* @param filename Name of the file that is going to store the results
*/
public void printResults (myDataset data, String filename) {
String text = getHeader(data);
text += "@inputs\n";
for (int i=0; i<data.getAttributes().size(); i++) {
text = text + data.getAttributes().get(i).getName() + " ";
}
text = text + "\n@outputs " + data.getOutputAttribute().getName() + "\n@data\n\n@decisiontree\n\n" + root.printTree(data.getAttributes(), data.getOutputAttribute()) + "\n";
text += getStatistical ();
try {
PrintWriter print = new PrintWriter(new FileWriter(filename));
print.print(text);
print.close();
} catch (IOException e) {
System.err.println("Can not open the output file " + filename + ": " + e.getMessage());
}
}
}