/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.M5;
import java.lang.*;
import java.io.*;
/**
* Class for handing options
*/
public final class InformationHandler implements Serializable {
boolean smooth; // =true, smoothed result; otherwise, unsmoothed result
int randomSeed; // random seed for cross-validation
int classcol; // class column
int verbosity; // verbosity level, 0-2
int model; // output model type could be linearRegression (1),
// regressionTree (2), modelTree (3)
int numFolds; // the number of folds for cross-validation
double pruningFactor; // pruning factor a in (n+ak)/(n-k)
String trainFile; // name of the training file
String testFile; // name of the test file
int lmNo; // linear model number, falling into which tested instances will be printed
double deviation; // the global standard deviation of the class attribute of the instances, used for splitting stopping, scale determination of the class attribute
final static String VERSION = "v1.1";
public InformationHandler(M5Instances inst) {
smooth = true;
randomSeed = 1;
classcol = inst.classIndex();
verbosity = 0;
model = M5TreeNode.MODEL_TREE;
numFolds = 10;
pruningFactor = 2;
trainFile = null;
testFile = null;
lmNo = 0;
}
/**
* Constructs an object to store command line options and other necessary
* information
* @param argv command line auguments
*/
public InformationHandler(String[] argv) {
int count;
char sw;
String rest = new String();
rest = null;
classcol = -3;
pruningFactor = 1.0;
randomSeed = 1;
model = M5TreeNode.MODEL_TREE;
trainFile = null;
testFile = null;
numFolds = 0;
lmNo = 0;
if (argv.length < 1 || argv[0].startsWith("-") != true) {
M5.errorMsg("no training file specified. See -help.");
}
count = 0;
while (count < argv.length && argv[count].startsWith("-") == true &&
argv[count].length() >= 2) {
sw = argv[count].charAt(1); // sw = switch
if (argv[count].length() > 2) {
rest = argv[count].substring(2); // rest = rest of string after sw
} else if (argv[count].length() == 2 && count + 1 < argv.length) {
if (argv[count + 1].startsWith("-") == false) {
count++;
rest = argv[count].toString();
}
}
switch (sw) {
case 'c':
if (rest != null) {
if (rest.charAt(0) > 48 && rest.charAt(0) < 58) {
classcol = Integer.parseInt(rest) - 1;
} else if (rest.charAt(0) == 'f') {
classcol = 0;
} else if (rest.charAt(0) == 'l') {
classcol = -1;
} else {
classcol = -2;
}
}
break;
case 'f':
if (rest != null) {
pruningFactor = Double.valueOf(rest).doubleValue();
if (pruningFactor < -0.01 || pruningFactor > 10.01) {
M5.errorMsg(
"pruning factor out of limit (0.0 - 10.0).\n" +
"Default value 1.0. (0.0 - 3.0) is the " +
"recommended range.");
}
}
break;
case 'h':
printValidOptions();
case 'L':
lmNo = Integer.parseInt(rest);
break;
case 'o':
model = M5TreeNode.MODEL_TREE;
if (rest != null) {
switch (rest.charAt(0)) {
case '1':
case 'l':
case 'L':
model = M5TreeNode.LINEAR_REGRESSION;
break;
case '2':
case 'r':
case 'R':
model = M5TreeNode.REGRESSION_TREE;
break;
case '3':
case 'm':
case 'M':
model = M5TreeNode.MODEL_TREE;
break;
default:
M5.errorMsg("unknown model type -o " + rest +
" . See -help");
System.exit(1);
}
}
break;
case 's':
numFolds = 10;
if (rest != null) {
randomSeed = Integer.parseInt(rest);
if (randomSeed < 0) {
M5.errorMsg("randomization seed must be >= 0. " +
"Default value is 1.");
}
}
break;
case 't':
if (rest != null) {
trainFile = rest.substring(0);
} else {
trainFile = null;
}
break;
case 'T':
if (rest != null) {
testFile = rest.substring(0);
} else {
testFile = null;
}
break;
case 'v':
if (rest != null) {
verbosity = Integer.parseInt(rest);
if (verbosity < 0 || verbosity > 2) {
M5.errorMsg(
"verbosity level should range within (0-2). " +
"See -help.");
}
}
break;
case 'x':
numFolds = 10;
if (rest != null) {
numFolds = Integer.parseInt(rest);
if (numFolds <= 1 || numFolds > 100) {
M5.errorMsg(
"fold number for cross-validation must be within" +
" (2 - 100). See -help.");
}
}
break;
default:
if (rest == null) {
System.out.println("M5' error: Invalid option -" + sw);
} else {
M5.errorMsg("invalid option -" + sw + " " + rest);
}
System.exit(1);
}
rest = null;
count++;
}
if (trainFile == null) {
M5.errorMsg("no training file specified. See -help.");
}
}
/**
* Initializes for constucting model trees
* @param inst a dataset
* @exception Exception if something goes wrong
*/
public final void initialize(M5Instances inst) throws Exception {
FileInputStream inputStream;
int i, j;
int[] index = null;
if (numFolds > inst.numInstances()) {
M5.errorMsg("fold number for cross-validation greater than the " +
"number of instances.");
}
if (classcol == -3 || classcol == -1) {
classcol = inst.numAttributes() - 1;
}
if (inst.classAttribute().isNominal() == true) {
M5.errorMsg("class column must be real or integer attribute.");
}
if (verbosity < 0 && (testFile == null || numFolds >= 1)) {
verbosity = 0;
}
}
/**
* Prints information stored in an 'InformationHandler' object, basically containing
* command line options
* @param inst a dataset
* @exception Exception if something goes wrong
*/
public final String toString(M5Instances inst) throws Exception {
StringBuffer text = new StringBuffer();
text.append(" Options:\n\n");
text.append(" Training file : " + trainFile + "\n");
if (testFile != null) {
text.append(" Test file : " + testFile +
"\n");
}
text.append(" Class attribute : " +
inst.classAttribute().name()
+ " (column " + (classcol + 1) + ")\n");
if (numFolds > 1) {
text.append(" Cross-Validation: " + numFolds +
"-fold with random seed " + randomSeed + "\n");
}
text.append(" Verbosity level : " + verbosity + "\n");
if (model == M5TreeNode.LINEAR_REGRESSION) {
text.append(" Output model : linear regression" +
"\n");
}
if (model == M5TreeNode.REGRESSION_TREE) {
text.append(" Output model : regression tree" + "\n");
}
if (model == M5TreeNode.MODEL_TREE) {
text.append(" Pruning factor : " + pruningFactor + "\n");
text.append(" Output model : model tree\n");
}
text.append("\n");
return text.toString();
}
/**
* Prints valid command line options and simply explains the output
*/
public final void printValidOptions() {
System.out.println("Usage:");
System.out.println(" M5Java [-options]\n");
System.out.println("Options:");
System.out.println(" -c (<num>|first|last) column to predict values " +
"(default last)");
System.out.println(
" -f <num> pruning factor 0.0 - 10.0 " +
"(default 1.0)");
System.out.println(" -h displays this help");
System.out.println(" -o <l|m|r> output model: linear, " +
"model tree, or regression tree");
System.out.println(" -s <num> random seed for " +
"cross-validation only. No randomization");
System.out.println(" while 0 (default 1)");
System.out.println(" -t <file> training set file ");
System.out.println(" -T <file> test set file");
System.out.println(" -v <num> verbosity level 0,1,2 " +
"(default 0)");
System.out.println(" -x <num> cross validation " +
"(default 10-fold)\n");
System.out.println("Definitions:");
System.out.println(
" Correlation coefficient: correlation between actual " +
"values and predictions");
System.out.println(
" Mean absolute error: average absolute prediction " +
"error");
System.out.println(
" Root mean squared error: square root of the average " +
"squared prediction error");
System.out.println(
" Relative absolute error: ratio of the mean absolute " +
"residuals to the absolute");
System.out.println(" deviation of the target values");
System.out.println(" Root relative squared error: square root of the " +
"ratio of the variance of the ");
System.out.println(
" residuals to the variance of the target values\n");
System.out.println(
" Note: 100% relative error is the same as would be " +
"obtained by predicting a");
System.out.println(" simple average\n");
System.out.println("Description:");
System.out.println(
" An unsmoothed prediction is calculated directly by " +
"the function at the leaf.");
System.out.println(
" A smoothed prediction uses the value calculated at " +
"the leaf of the tree,");
System.out.println(
" and passes it back up the tree, smoothing at each " +
"higher node.\n");
System.out.println(" Let");
System.out.println(
"\tp' be the model passed up to the next higher node,");
System.out.println("\tp be the model passed to this node from below,");
System.out.println("\tq be the model at this node,");
System.out.println(
"\tn be the number of training instances that reach " +
"the node below,");
System.out.println("\tk be a constant (default value 15),\n");
System.out.println(" then the smoothed model at this node is:\n");
System.out.println("\tp' = (n*p+k*q) / (n+k)\n");
System.out.println("Version:");
System.out.println("\t" + InformationHandler.VERSION);
System.exit(1);
}
}