/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.M5;
import java.io.*;
import java.util.*;
public final class M5 {
/** The root node */
private M5TreeNode m_root[];
/** The options */
private InformationHandler options;
/** No smoothing? */
private boolean m_UseUnsmoothed = false;
/** Pruning factor */
private double m_PruningFactor = 2;
/** Type of model */
private int m_Model = M5TreeNode.MODEL_TREE;
/** Verbosity */
private int m_Verbosity = 0;
/** Filter for replacing missing values. */
private ReplaceMissingValuesFilter m_ReplaceMissingValuesFilter;
/** Filter for replacing nominal attributes with numeric binary ones. */
private NominalToBinaryFilter m_NominalToBinaryFilter;
public static final int MODEL_LINEAR_REGRESSION = M5TreeNode.
LINEAR_REGRESSION;
public static final int MODEL_REGRESSION_TREE = M5TreeNode.REGRESSION_TREE;
public static final int MODEL_MODEL_TREE = M5TreeNode.MODEL_TREE;
public static final Association[] TAGS_MODEL_TYPES = {
new Association(MODEL_LINEAR_REGRESSION, "Simple linear regression"),
new Association(MODEL_REGRESSION_TREE, "Regression tree"),
new Association(MODEL_MODEL_TREE, "Model tree")
};
static String trainFileName, testFileName, testOutFileName,
trainOutFileName, outputFileName;
static String type, unsmoothed, pruningFactor, verbosity;
static StringBuffer lista = new StringBuffer();
/**
* Name: initTokenizer
* Sets configures the tokenizer that reads the input file in "Keel".
*
* @param tokenizer: the tokenizer we want to configure.
*/
private static void initTokenizer(StreamTokenizer tokenizer) {
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, ' ');
tokenizer.wordChars(' ' + 1, '\u00FF');
tokenizer.whitespaceChars(',', ',');
tokenizer.quoteChar('"');
tokenizer.quoteChar('\'');
tokenizer.ordinaryChar('=');
tokenizer.ordinaryChar('{');
tokenizer.ordinaryChar('}');
tokenizer.ordinaryChar('[');
tokenizer.ordinaryChar(']');
tokenizer.eolIsSignificant(true);
}
/**
* Name: getNextToken
* Lets the tokenizer to take the next toke at the start of a new line.
* If it is not possible, returns false. Else, return true.
*
* @param tokenizer: the tokenizer.
*
* Returns a boolean that is true if there is a next token to read
*/
private static boolean getNextToken(StreamTokenizer tokenizer) {
try {
//if next token is end of file, returns false
if (tokenizer.nextToken() == StreamTokenizer.TT_EOF) {
return false;
} else {
tokenizer.pushBack();
//looking for the end of line
while (tokenizer.nextToken()
!= StreamTokenizer.TT_EOL) {
;
}
//looking for a new token
while (tokenizer.nextToken()
== StreamTokenizer.TT_EOL) {
;
}
//if there is no a new token, return false
if (tokenizer.sval == null) {
return false;
} else {
return true;
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
return false;
}
}
/**
* Name: setOptions
* Sets the option of the execution of the algorithm, reading them from an input file.
*
* @param option: the tokenizer used to tokenize the input file.
*/
protected static void setOptions(StreamTokenizer options) throws Exception {
//read the next token
options.nextToken();
//the word read must be "algorithm"
if (options.sval.equalsIgnoreCase("algorithm")) {
options.nextToken();
options.nextToken();
//the name of the algorithm must be "gsp"
if (!options.sval.equalsIgnoreCase("M5")) {
throw new Exception("The name of the " +
"algorithm is not correct.");
}
options.nextToken();
options.nextToken();
//read the data file name
if (options.sval.equalsIgnoreCase("inputData")) {
options.nextToken();
options.nextToken();
trainFileName = options.sval;
options.nextToken();
options.nextToken();
testFileName = options.sval;
getNextToken(options);
} else {
throw new Exception("The file must " +
"start with the word inputData.");
}
//reads the output file name
if (options.sval.equalsIgnoreCase("outputData")) {
options.nextToken();
options.nextToken();
trainOutFileName = options.sval;
options.nextToken();
testOutFileName = options.sval;
options.nextToken();
outputFileName = options.sval;
getNextToken(options);
} else {
throw new Exception("The file must start " +
"with the word outputData.");
}
if (options.ttype == StreamTokenizer.TT_EOF) {
return;
}
//read the parameter values
for (int k = 0; k < 4; k++) {
//reading the minimum support
if (options.sval.equalsIgnoreCase(
"type")) {
options.nextToken();
options.nextToken();
type = options.sval;
if (!getNextToken(options)) {
return;
} else {
continue;
}
}
//reading the maximum number of gaps
if (options.sval.equalsIgnoreCase("pruningFactor")) {
options.nextToken();
options.nextToken();
pruningFactor = options.sval;
if (!getNextToken(options)) {
return;
} else {
continue;
}
}
if (options.sval.equalsIgnoreCase("unsmoothed")) {
options.nextToken();
options.nextToken();
unsmoothed = options.sval;
if (!getNextToken(options)) {
return;
} else {
continue;
}
}
if (options.sval.equalsIgnoreCase("verbosity")) {
options.nextToken();
options.nextToken();
verbosity = options.sval;
if (!getNextToken(options)) {
return;
} else {
continue;
}
}
}
} else {
throw new Exception("The file must start with " +
"the word \"algorithm=\" followed by the " +
"name of the algorithm.");
}
}
public static String getHeader(String fileName) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new
FileInputStream(fileName)));
StringBuffer sb = new StringBuffer();
String line = "", aux = "";
line = br.readLine();
if (line.length() >= 5) {
aux = line.substring(0, 5);
}
while (!aux.equalsIgnoreCase("@data")) {
if (!line.startsWith("%") && (line.length() > 1)) {
sb.append(line + "\n");
}
line = br.readLine();
if (line.length() >= 5) {
aux = line.substring(0, 5);
}
}
sb.append("@data\n");
br.close();
return sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
return "";
}
public static String getHeaderNoData(String fileName) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new
FileInputStream(fileName)));
StringBuffer sb = new StringBuffer();
String line = "", aux = "";
line = br.readLine();
if (line.length() >= 5) {
aux = line.substring(0, 5);
}
while (!aux.equalsIgnoreCase("@data")) {
sb.append(line + "\n");
line = br.readLine();
if (line.length() >= 5) {
aux = line.substring(0, 5);
}
}
br.close();
return sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
return "";
}
/**
* Construct a model tree by training instances
*
* @param inst training instances
* @exception Exception if the classifier can't be built
*/
public final void buildClassifier(M5Instances inst) throws Exception {
if (inst.checkForStringAttributes()) {
throw new Exception("Can't handle string attributes!");
}
options = new InformationHandler(inst);
options.model = m_Model;
options.smooth = !m_UseUnsmoothed;
options.pruningFactor = m_PruningFactor;
options.verbosity = m_Verbosity;
if (!inst.classAttribute().isNumeric()) {
throw new Exception("Class has to be numeric.");
}
inst = new M5Instances(inst);
inst.deleteWithMissingClass();
m_ReplaceMissingValuesFilter = new ReplaceMissingValuesFilter();
m_ReplaceMissingValuesFilter.setInputFormat(inst);
inst = ReplaceMissingValuesFilter.useFilter(inst,
m_ReplaceMissingValuesFilter);
m_NominalToBinaryFilter = new NominalToBinaryFilter();
m_NominalToBinaryFilter.setInputFormat(inst);
inst = NominalToBinaryFilter.useFilter(inst, m_NominalToBinaryFilter);
m_root = new M5TreeNode[2];
options.deviation = stdDev(inst.classIndex(), inst);
m_root[0] = new M5TreeNode(inst, null, options); // build an empty tree
m_root[0].split(inst); // build the unpruned initial tree
m_root[0].numLeaves(0); // set tree leaves' number of the unpruned treee
m_root[1] = m_root[0].copy(null); // make a copy of the unpruned tree
m_root[1].prune(); // prune the tree
if (options.model != M5TreeNode.LINEAR_REGRESSION) {
m_root[1].smoothen(); // compute the smoothed linear models at the leaves
m_root[1].numLeaves(0); // set tree leaves' number of the pruned tree
}
}
/**
* Classifies the given test instance.
*
* @param ins the instance to be classified
* @return the predicted class for the instance
* @exception Exception if the instance can't be classified
*/
public double classifyInstance(M5Instance ins) throws Exception {
m_ReplaceMissingValuesFilter.input(ins);
m_ReplaceMissingValuesFilter.batchFinished();
ins = m_ReplaceMissingValuesFilter.output();
m_NominalToBinaryFilter.input(ins);
m_NominalToBinaryFilter.batchFinished();
ins = m_NominalToBinaryFilter.output();
double prueba = m_root[1].predict(ins, !m_UseUnsmoothed);
//System.out.println(prueba);
//lista.append(""+prueba+"\n");
return prueba;
}
/**
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(4);
newVector.addElement(new Information("\tType of model to be used.\n" +
"\tl: linear regression\n" +
"\tr: regression tree\n" +
"\tm: model tree\n" +
"\t(default: m)",
"-O", 1, "-O <l|r|m>"));
newVector.addElement(new Information("\tUse unsmoothed tree.", "C", 0,
"-U"));
newVector.addElement(new Information("\tPruning factor (default: 2).",
"-F", 1, "-F <double>"));
newVector.addElement(new Information("\tVerbosity (default: 0).",
"-V", 1, "-V <0|1|2>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String modelString = M5StaticUtils.getOption('O', options);
if (modelString.length() != 0) {
if (modelString.equals("l")) {
setModelType(new SelectedAssociation(MODEL_LINEAR_REGRESSION,
TAGS_MODEL_TYPES));
} else if (modelString.equals("r")) {
setModelType(new SelectedAssociation(MODEL_REGRESSION_TREE,
TAGS_MODEL_TYPES));
} else if (modelString.equals("m")) {
setModelType(new SelectedAssociation(MODEL_MODEL_TREE,
TAGS_MODEL_TYPES));
} else {
throw new Exception("Don't know model type " + modelString);
}
} else {
setModelType(new SelectedAssociation(MODEL_MODEL_TREE,
TAGS_MODEL_TYPES));
}
setUseUnsmoothed(M5StaticUtils.getFlag('U', options));
if (m_Model != M5TreeNode.MODEL_TREE) {
setUseUnsmoothed(true);
}
String pruningString = M5StaticUtils.getOption('F', options);
if (pruningString.length() != 0) {
setPruningFactor((new Double(pruningString)).doubleValue());
} else {
setPruningFactor(2);
}
String verbosityString = M5StaticUtils.getOption('V', options);
if (verbosityString.length() != 0) {
setVerbosity(Integer.parseInt(verbosityString));
} else {
setVerbosity(0);
}
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
String[] options = new String[7];
int current = 0;
switch (m_Model) {
case MODEL_MODEL_TREE:
options[current++] = "-O";
options[current++] = "m";
if (m_UseUnsmoothed) {
options[current++] = "-U";
}
break;
case MODEL_REGRESSION_TREE:
options[current++] = "-O";
options[current++] = "r";
break;
case MODEL_LINEAR_REGRESSION:
options[current++] = "-O";
options[current++] = "l";
break;
}
options[current++] = "-F";
options[current++] = "" + m_PruningFactor;
options[current++] = "-V";
options[current++] = "" + m_Verbosity;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Converts the output of the training process into a string
*
* @return the converted string
*/
public final String toString() {
try {
StringBuffer text = new StringBuffer();
double absDev = absDev(m_root[0].instances.classIndex(),
m_root[0].instances);
if (options.verbosity >= 1 &&
options.model != M5TreeNode.LINEAR_REGRESSION) {
switch (m_root[0].model) {
case M5TreeNode.LINEAR_REGRESSION:
break;
case M5TreeNode.REGRESSION_TREE:
text.append("@Unpruned training regression tree:\n");
break;
case M5TreeNode.MODEL_TREE:
text.append("@Unpruned training model tree:\n");
break;
}
if (m_root[0].type == false) {
text.append("\n");
}
text.append(m_root[0].treeToString(0, absDev) + "\n");
text.append("@Models at the leaves:\n\n");
// the linear models at the leaves of the unpruned tree
text.append(m_root[0].formulaeToString(false) + "\n"); ;
}
if (m_root[0].model != M5TreeNode.LINEAR_REGRESSION) {
switch (m_root[0].model) {
case M5TreeNode.LINEAR_REGRESSION:
break;
case M5TreeNode.REGRESSION_TREE:
text.append("@Pruned training regression tree:\n");
break;
case M5TreeNode.MODEL_TREE:
text.append("@Pruned training model tree:\n");
break;
}
if (m_root[1].type == false) {
text.append("\n");
}
text.append(m_root[1].treeToString(0, absDev) + "\n"); //the pruned tree
text.append("@Models at the leaves:\n");
if ((m_root[0].model != M5TreeNode.LINEAR_REGRESSION) &&
(m_UseUnsmoothed)) {
text.append(
"@Unsmoothed linear models at the leaves of the pruned tree (simple):\n");
// the unsmoothed linear models at the leaves of the pruned tree
text.append(m_root[1].formulaeToString(false) + "\n");
}
if ((m_root[0].model == M5TreeNode.MODEL_TREE) &&
(!m_UseUnsmoothed)) {
text.append(
"@Smoothed linear models at the leaves of the pruned tree (complex):\n");
text.append(m_root[1].formulaeToString(true) + "\n");
// the smoothed linear models at the leaves of the pruned tree
}
} else {
text.append("@Training linear regression model:\n");
text.append(m_root[1].unsmoothed.toString(m_root[1].instances,
0) + "\n\n");
// print the linear regression model
}
text.append("@Number of Rules: " + m_root[1].numberOfLinearModels());
return text.toString();
} catch (Exception e) {
return "can't print m5' tree";
}
}
/**
* return the number of linear models
* @return the number of linear models
*/
public double measureNumLinearModels() {
return m_root[1].numberOfLinearModels();
}
/**
* return the number of leaves in the tree
* @return the number leaves in the tree (same as # linear models &
* # rules)
*/
public double measureNumLeaves() {
return measureNumLinearModels();
}
/**
* return the number of rules
* @return the number of rules (same as # linear models &
* # leaves in the tree)
*/
public double measureNumRules() {
return measureNumLinearModels();
}
/**
* Returns an enumeration of the additional measure names
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(3);
newVector.addElement("measureNumLinearModels");
newVector.addElement("measureNumLeaves");
newVector.addElement("measureNumRules");
return newVector.elements();
}
/**
* Returns the value of the named measure
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @exception IllegalArgumentException if the named measure is not supported
*/
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.compareTo("measureNumRules") == 0) {
return measureNumRules();
} else if (additionalMeasureName.compareTo("measureNumLinearModels") ==
0) {
return measureNumLinearModels();
} else if (additionalMeasureName.compareTo("measureNumLeaves") == 0) {
return measureNumLeaves();
} else {
throw new IllegalArgumentException(additionalMeasureName
+ " not supported (M5)");
}
}
/**
* Get the value of UseUnsmoothed.
*
* @return Value of UseUnsmoothed.
*/
public boolean getUseUnsmoothed() {
return m_UseUnsmoothed;
}
/**
* Set the value of UseUnsmoothed.
*
* @param v Value to assign to UseUnsmoothed.
*/
public void setUseUnsmoothed(boolean v) {
m_UseUnsmoothed = v;
}
/**
* Get the value of PruningFactor.
*
* @return Value of PruningFactor.
*/
public double getPruningFactor() {
return m_PruningFactor;
}
/**
* Set the value of PruningFactor.
*
* @param v Value to assign to PruningFactor.
*/
public void setPruningFactor(double v) {
m_PruningFactor = v;
}
/**
* Get the value of Model.
*
* @return Value of Model.
*/
public SelectedAssociation getModelType() {
return new SelectedAssociation(m_Model, TAGS_MODEL_TYPES);
}
/**
* Set the value of Model.
*
* @param newMethod Value to assign to Model.
*/
public void setModelType(SelectedAssociation newMethod) {
if (newMethod.getTags() == TAGS_MODEL_TYPES) {
m_Model = newMethod.getSelectedTag().getID();
}
}
/**
* Get the value of Verbosity.
*
* @return Value of Verbosity.
*/
public int getVerbosity() {
return m_Verbosity;
}
/**
* Set the value of Verbosity.
*
* @param v Value to assign to Verbosity.
*/
public void setVerbosity(int v) {
m_Verbosity = v;
}
public static M5 forName(String classifierName, String[] options) throws
Exception {
return (M5) M5StaticUtils.forName(M5.class,
classifierName,
options);
}
/**
* Tests if enumerated attribute(s) exists in the instances
* @param inst instances
* @return true if there is at least one; false if none
*/
public final static boolean hasEnumAttr(M5Instances inst) {
int j;
boolean b = false;
for (j = 0; j < inst.numAttributes(); j++) {
if (inst.attribute(j).isNominal() == true) {
b = true;
}
}
return b;
}
/**
* Tests if missing value(s) exists in the instances
* @param inst instances
* @return true if there is missing value(s); false if none
*/
public final static boolean hasMissing(M5Instances inst) {
int i, j;
boolean b = false;
for (i = 0; i < inst.numInstances(); i++) {
for (j = 0; j < inst.numAttributes(); j++) {
if (inst.instance(i).isMissing(j) == true) {
b = true;
}
}
}
return b;
}
/**
* Returns the sum of the instances values of an attribute
* @param attr an attribute
* @param inst instances
* @return the sum value
*/
public final static double sum(int attr, M5Instances inst) {
int i;
double sum = 0.0;
for (i = 0; i <= inst.numInstances() - 1; i++) {
sum += inst.instance(i).value(attr);
}
return sum;
}
/**
* Returns the squared sum of the instances values of an attribute
* @param attr an attribute
* @param inst instances
* @return the squared sum value
*/
public final static double sqrSum(int attr, M5Instances inst) {
int i;
double sqrSum = 0.0, value;
for (i = 0; i <= inst.numInstances() - 1; i++) {
value = inst.instance(i).value(attr);
sqrSum += value * value;
}
return sqrSum;
}
/**
* Returns the standard deviation value of the instances values of an attribute
* @param attr an attribute
* @param inst instances
* @return the standard deviation value
*/
public final static double stdDev(int attr, M5Instances inst) {
int i, count = 0;
double sd, va, sum = 0.0, sqrSum = 0.0, value;
for (i = 0; i <= inst.numInstances() - 1; i++) {
count++;
value = inst.instance(i).value(attr);
sum += value;
sqrSum += value * value;
}
if (count > 1) {
va = (sqrSum - sum * sum / count) / count;
va = Math.abs(va);
sd = Math.sqrt(va);
} else {
sd = 0.0;
}
return sd;
}
/**
* Returns the absolute deviation value of the instances values of an attribute
* @param attr an attribute
* @param inst instances
* @return the absolute deviation value
*/
public final static double absDev(int attr, M5Instances inst) {
int i;
double average = 0.0, absdiff = 0.0, absDev;
for (i = 0; i <= inst.numInstances() - 1; i++) {
average += inst.instance(i).value(attr);
}
if (inst.numInstances() > 1) {
average /= (double) inst.numInstances();
for (i = 0; i <= inst.numInstances() - 1; i++) {
absdiff += Math.abs(inst.instance(i).value(attr) - average);
}
absDev = absdiff / (double) inst.numInstances();
} else {
absDev = 0.0;
}
return absDev;
}
/**
* Returns the variance value of the instances values of an attribute
* @param attr an attribute
* @param inst instances
* @return the variance value
*/
public final static double variance(int attr, M5Instances inst) {
int i, count = 0;
double value, sum = 0.0, sqrSum = 0.0, va;
for (i = 0; i <= inst.numInstances() - 1; i++) {
value = inst.instance(i).value(attr);
sum += value;
sqrSum += value * value;
count++;
}
if (count > 0) {
va = (sqrSum - sum * sum / count) / count;
} else {
va = 0.0;
}
return va;
}
/**
* Rounds a double
* @param value the double value
* @return the double rounded
*/
public final static long roundDouble(double value) {
long roundedValue;
roundedValue = value > 0 ? (long) (value + 0.5) :
-(long) (Math.abs(value) + 0.5);
return roundedValue;
}
/**
* Returns the largest (closest to positive infinity) long integer value that is not greater than the argument.
* @param value the double value
* @return the floor integer
*/
public final static long floorDouble(double value) {
long floorValue;
floorValue = value > 0 ? (long) value : -(long) (Math.abs(value) + 1);
return floorValue;
}
/**
* Rounds a double and converts it into a formatted right-justified String.
* It is like %f format in C language.
* @param value the double value
* @param width the width of the string
* @param afterDecimalPoint the number of digits after the decimal point
* @return the double as a formatted string
*/
public final static String doubleToStringF(double value, int width,
int afterDecimalPoint) {
StringBuffer stringBuffer;
String resultString;
double temp;
int i, dotPosition;
long precisionValue;
if (afterDecimalPoint < 0) {
afterDecimalPoint = 0;
}
precisionValue = 0;
temp = value * Math.pow(10.0, afterDecimalPoint);
if (Math.abs(temp) < Long.MAX_VALUE) {
precisionValue = roundDouble(temp);
if (precisionValue == 0) {
resultString = String.valueOf(0);
stringBuffer = new StringBuffer(resultString);
stringBuffer.append(".");
for (i = 1; i <= afterDecimalPoint; i++) {
stringBuffer.append("0");
}
resultString = stringBuffer.toString();
} else {
resultString = String.valueOf(precisionValue);
stringBuffer = new StringBuffer(resultString);
dotPosition = stringBuffer.length() - afterDecimalPoint;
while (dotPosition < 0) {
stringBuffer.insert(0, 0);
dotPosition++;
}
stringBuffer.insert(dotPosition, ".");
if (stringBuffer.charAt(0) == '.') {
stringBuffer.insert(0, 0);
}
resultString = stringBuffer.toString();
}
} else {
resultString = new String("NaN"); ;
}
// Fill in space characters.
stringBuffer = new StringBuffer(Math.max(width, resultString.length()));
for (i = 0; i < stringBuffer.capacity() - resultString.length(); i++) {
stringBuffer.append(' ');
}
stringBuffer.append(resultString);
return stringBuffer.toString();
}
/**
* Rounds a double and converts it into a formatted right-justified String. If the double is not equal to zero and not in the range [10e-3,10e7] it is returned in scientific format.
* It is like %g format in C language.
* @param value the double value
* @param width the width of the string
* @param precision the number of valid digits
* @return the double as a formatted string
*/
public final static String doubleToStringG(double value, int width,
int precision) {
StringBuffer stringBuffer;
String resultString;
double temp;
int i, dotPosition, exponent = 0;
long precisionValue;
if (precision <= 0) {
precision = 1;
}
precisionValue = 0;
exponent = 0;
if (value != 0.0) {
exponent = (int) floorDouble(Math.log(Math.abs(value)) /
Math.log(10));
temp = value * Math.pow(10.0, precision - exponent - 1);
precisionValue = roundDouble(temp); // then output value = precisionValue * pow(10,exponent+1-precision)
if (precision - 1 !=
(int) (Math.log(Math.abs(precisionValue) + 0.5) / Math.log(10))) {
exponent++;
precisionValue = roundDouble(precisionValue / 10.0);
}
}
if (precisionValue == 0) { // value = 0.0
resultString = String.valueOf("0");
} else {
if (precisionValue >= 0) {
dotPosition = 1;
} else {
dotPosition = 2;
}
if (exponent < -3 || precision - 1 + exponent > 7) { // Scientific format.
resultString = String.valueOf(precisionValue);
stringBuffer = new StringBuffer(resultString);
stringBuffer.insert(dotPosition, ".");
stringBuffer = deleteTrailingZerosAndDot(stringBuffer);
stringBuffer.append("e").append(String.valueOf(exponent));
resultString = stringBuffer.toString();
} else { //
resultString = String.valueOf(precisionValue);
stringBuffer = new StringBuffer(resultString);
for (i = 1; i <= -exponent; i++) {
stringBuffer.insert(dotPosition - 1, "0");
}
if (exponent <= -1) {
stringBuffer.insert(dotPosition, ".");
} else if (exponent <= precision - 1) {
stringBuffer.insert(dotPosition + exponent, ".");
} else {
for (i = 1; i <= exponent - (precision - 1); i++) {
stringBuffer.append("0");
}
stringBuffer.append(".");
}
// deleting trailing zeros and dot
stringBuffer = deleteTrailingZerosAndDot(stringBuffer);
resultString = stringBuffer.toString();
}
}
// Fill in space characters.
stringBuffer = new StringBuffer(Math.max(width, resultString.length()));
for (i = 0; i < stringBuffer.capacity() - resultString.length(); i++) {
stringBuffer.append(' ');
}
stringBuffer.append(resultString);
return stringBuffer.toString();
}
/**
* Deletes the trailing zeros and decimal point in a stringBuffer
* @param stringBuffer string buffer
* return string buffer with deleted trailing zeros and decimal point
*/
public final static StringBuffer deleteTrailingZerosAndDot(StringBuffer
stringBuffer) {
while (stringBuffer.charAt(stringBuffer.length() - 1) == '0' ||
stringBuffer.charAt(stringBuffer.length() - 1) == '.') {
if (stringBuffer.charAt(stringBuffer.length() - 1) == '0') {
stringBuffer.setLength(stringBuffer.length() - 1);
} else {
stringBuffer.setLength(stringBuffer.length() - 1);
break;
}
}
return stringBuffer;
}
/**
* Returns the smoothed values according to the smoothing formula (np+kq)/(n+k)
* @param p a double, normally is the prediction of the model at the current node
* @param q a double, normally is the prediction of the model at the up node
* @param n the number of instances at the up node
* @param k the smoothing constance, default =15
* @return the smoothed value
*/
public final static double smoothenValue(double p, double q, int n, int k) {
return (n * p + k * q) / (double) (n + k);
}
/**
* Returns the correlation coefficient of two double vectors
* @param y1 double vector 1
* @param y2 double vector 2
* @param n the length of two double vectors
* @return the correlation coefficient
*/
public final static double correlation(double y1[], double y2[], int n) {
int i;
double av1 = 0.0, av2 = 0.0, y11 = 0.0, y22 = 0.0, y12 = 0.0, c;
if (n <= 1) {
return 1.0;
}
for (i = 0; i < n; i++) {
av1 += y1[i];
av2 += y2[i];
}
av1 /= (double) n;
av2 /= (double) n;
for (i = 0; i < n; i++) {
y11 += (y1[i] - av1) * (y1[i] - av1);
y22 += (y2[i] - av2) * (y2[i] - av2);
y12 += (y1[i] - av1) * (y2[i] - av2);
}
if (y11 * y22 == 0.0) {
c = 1.0;
} else {
c = y12 / Math.sqrt(Math.abs(y11 * y22));
}
return c;
}
/**
* Tests if two double values are equal to each other
* @param a double 1
* @param b double 2
* @return true if equal; false if not equal
*/
public final static boolean eqDouble(double a, double b) {
if (Math.abs(a) < 1e-10 && Math.abs(b) < 1e-10) {
return true;
}
double c = Math.abs(a) + Math.abs(b);
if (Math.abs(a - b) < c * 1e-10) {
return true;
} else {
return false;
}
}
/**
* Prints error message and exits
* @param err error message
*/
public final static void errorMsg(String err) {
System.out.print("Error: ");
System.out.println(err);
System.exit(1);
}
/**
* Prints sepearating line
*/
public final static String separatorToString() {
return "--------------------------------------------------------------------------------\n";
}
/**
* Prints the head lines of the output
*/
public final static String headToString() {
StringBuffer text = new StringBuffer();
text.append("M5Java version " + InformationHandler.VERSION + "\n");
return text.toString();
}
/**
* Main method for M5' algorithm
*
* @param argv command line arguments
*/
public static void main(String[] argv) {
try {
StreamTokenizer tokenizer =
new StreamTokenizer(
new BufferedReader(new FileReader(argv[0])));
initTokenizer(tokenizer);
setOptions(tokenizer);
String[] opt;
String strOpt = "-t " + trainFileName + " -T " + testFileName +
" -O " + type + " -F " + pruningFactor + " -V " +
verbosity;
//copyHeaders();
if (unsmoothed.equalsIgnoreCase("true")) {
strOpt += " -U";
}
opt = strOpt.split(" ");
String strOut = EvaluateModel.evaluateModel(new M5(), opt);
//System.out.println(strOut);
PrintWriter pw = new PrintWriter(new FileOutputStream(
outputFileName));
pw.print(strOut);
pw.flush();
pw.close();
/*PrintWriter listando = new PrintWriter(new FileOutputStream("probando.txt"));
listando.print(lista.toString());
listando.close();*/
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
}