/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Manuel Moreno (Universidad de C�rdoba) 01/07/2008
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.CART;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.StreamTokenizer;
import keel.Algorithms.Decision_Trees.CART.dataset.DataSetManager;
import keel.Algorithms.Decision_Trees.CART.impurities.IImpurityFunction;
import keel.Algorithms.Neural_Networks.NNEP_Common.data.DoubleTransposedDataSet;
import keel.Algorithms.Neural_Networks.NNEP_Common.data.KeelDataSet;
/**
* Main class for CART algorithm.
* CART: Classification And Regression Trees (Breiman and al., 1984) CART are binary trees
*
*/
public abstract class RunCART
{
/** Algorithm */
protected CART cartAlgorithm;
/** The name of the file that contains the information to build the model. */
protected static String modelFileName = "";
/** The name of the file that contains the information to make the training. */
protected static String trainFileName = "";
protected DoubleTransposedDataSet trainData;
/** The name of the file that contains the information to make the test. */
protected static String testFileName = "";
protected DoubleTransposedDataSet testData;
/** The name of the train output file. */
protected static String trainOutputFileName;
/** The name of the test output file. */
protected static String testOutputFileName;
/** The name of the result file. */
protected static String resultFileName;
/** Number of parameters of the algorithm. */
private int nParam = 3;
/** The instant of starting the algorithm. */
private long startTime;
/** Maximum allowed depth of the tree */
private int maxDepth;
/** Impurity function to use. Regression or Classification depends on this */
protected static IImpurityFunction impurityFunction;
/**
* Default constructor
* @param file parameter file
* @param regression set at true if tree is used for regression
* and if false is used for classification
*/
public RunCART(String file, boolean regression) {
// starts the time
startTime = System.currentTimeMillis();
try {
/* Sets the options of the execution */
StreamTokenizer tokenizer = new StreamTokenizer(new BufferedReader(new
FileReader(file)));
initTokenizer(tokenizer);
setOptions(tokenizer);
System.out.println("trainFileName: " +trainFileName + " testFileName: "+ testFileName);
// open the file data
KeelDataSet trainKeel = new KeelDataSet(trainFileName);
trainData = new DoubleTransposedDataSet();
trainData.read(DataSetManager.readSchema(trainFileName), trainKeel);
KeelDataSet testKeel = new KeelDataSet(testFileName);
testData = new DoubleTransposedDataSet();
testData.read(DataSetManager.readSchema(testFileName), testKeel);
// create the algorithm giving the building patterns
cartAlgorithm = new CART(trainData);
// configure the algorithm
cartAlgorithm.setImpurityFunction(impurityFunction);
cartAlgorithm.setRegression(regression);
cartAlgorithm.setMaxDepth(maxDepth);
// Build tree
cartAlgorithm.build_tree();
// Prune tree
cartAlgorithm.prune_tree();
System.out.println("Algorithm finished ("+(System.currentTimeMillis()-startTime)+")");
} catch (Exception e) {
e.printStackTrace();
}
}
/** Function to initialize the stream tokenizer.
*
* @param tokenizer The tokenizer.
*/
private void initTokenizer(StreamTokenizer tokenizer) {
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, ' ');
tokenizer.wordChars(' ' + 1, '\u00FF');
tokenizer.whitespaceChars(',', ',');
tokenizer.quoteChar('"');
tokenizer.quoteChar('\'');
tokenizer.ordinaryChar('=');
tokenizer.ordinaryChar('{');
tokenizer.ordinaryChar('}');
tokenizer.ordinaryChar('[');
tokenizer.ordinaryChar(']');
tokenizer.eolIsSignificant(true);
}
/** Function to read the options from the execution file and assign
* the values to the parameters.
*
* @param options The StreamTokenizer that reads the parameters file.
*
* @throws Exception If the format of the file is not correct.
*/
protected void setOptions(StreamTokenizer options) throws Exception {
options.nextToken();
/* Checks that the file starts with the token algorithm */
if (options.sval.equalsIgnoreCase("algorithm")) {
options.nextToken();
options.nextToken();
/* Check algorithm name
if (!options.sval.equalsIgnoreCase("CART")) {
throw new Exception("The name of the algorithm is not correct.");
}
*/
options.nextToken();
options.nextToken();
/* Reads the names of the input files*/
if (options.sval.equalsIgnoreCase("inputData")) {
options.nextToken();
options.nextToken();
modelFileName = options.sval;
if (options.nextToken() != StreamTokenizer.TT_EOL) {
trainFileName = options.sval;
options.nextToken();
testFileName = options.sval;
if (options.nextToken() != StreamTokenizer.TT_EOL) {
trainFileName = modelFileName;
options.nextToken();
}
}
} else {
throw new Exception("No file test provided.");
}
/* Reads the names of the output files*/
while (true) {
if (options.nextToken() == StreamTokenizer.TT_EOF) {
throw new Exception("No output file provided.");
}
if (options.sval == null) {
continue;
} else if (options.sval.equalsIgnoreCase("outputData")) {
break;
}
}
options.nextToken();
options.nextToken();
trainOutputFileName = options.sval;
options.nextToken();
testOutputFileName = options.sval;
options.nextToken();
resultFileName = options.sval;
if (!getNextToken(options)) {
throw new Exception("No instances provided.");
}
if (options.ttype == StreamTokenizer.TT_EOF) {
return;
}
for (int k = 0; k < nParam; k++) {
/* Reads the maxDepth parameter */
if (options.sval.equalsIgnoreCase("maxDepth")) {
options.nextToken();
options.nextToken();
if (Integer.parseInt(options.sval) > 0) {
maxDepth = Integer.parseInt(options.sval);
}
if (!getNextToken(options)) {
return;
} else {
continue;
}
}
/* Any other parameter should be added here */
} // end for
} else {
throw new Exception("The file must start with the word " +
"algorithm followed of the name of the algorithm.");
}
}
/** Puts the tokenizer in the first token of the next line.
*
* @param tokenizer The tokenizer which reads this function.
*
* @return True if reaches the end of file. False otherwise.
*
* @throws Exception If cannot read the tokenizer.
*/
private boolean getNextToken(StreamTokenizer tokenizer) {
try {
if (tokenizer.nextToken() == StreamTokenizer.TT_EOF) {
return false;
} else {
tokenizer.pushBack();
while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
;
}
while (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
;
}
if (tokenizer.sval == null) {
return false;
} else {
return true;
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
return false;
}
}
}