/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Associative_Classification.ClassifierCMAR;
import java.io.IOException;
import org.core.*;
/**
* It contains the implementation of the CMAR algorithm
*
* @author Written by Jesus Alcala (University of Granada) 09/02/2010
* @author Modified by Sergio Almecija (University of Granada) 23/05/2010
* @version 1.0
* @since JDK1.5
*/
public class CMAR {
myDataset train, val, test;
String outputTr, outputTst, fileDB, fileRB, fileTime, fileHora, data;
DataBase dataBase;
AprioriTFP_CMAR newClassification;
long startTime, totalTime;
//We may declare here the algorithm's parameters
double minConf, minSup;
int delta;
private boolean somethingWrong = false; //to check if everything is correct.
/**
* It reads the data from the input files (training, validation and test) and parse all the parameters
* from the parameters array.
* @param parameters parseParameters It contains the input files, output files and parameters
*/
public CMAR(parseParameters parameters) {
this.startTime = System.currentTimeMillis();
this.train = new myDataset();
this.val = new myDataset();
this.test = new myDataset();
try {
System.out.println("\nReading the training set: " + parameters.getTrainingInputFile());
this.train.readClassificationSet(parameters.getTrainingInputFile(), true);
System.out.println("\nReading the validation set: " + parameters.getValidationInputFile());
this.val.readClassificationSet(parameters.getValidationInputFile(), false);
System.out.println("\nReading the test set: " + parameters.getTestInputFile());
this.test.readClassificationSet(parameters.getTestInputFile(), false);
}
catch (IOException e) {
System.err.println("There was a problem while reading the input data-sets: " + e);
this.somethingWrong = true;
}
//We may check if there are some numerical attributes, because our algorithm may not handle them:
//somethingWrong = somethingWrong || train.hasNumericalAttributes();
this.somethingWrong = this.somethingWrong || this.train.hasMissingAttributes();
this.outputTr = parameters.getTrainingOutputFile();
this.outputTst = parameters.getTestOutputFile();
this.fileDB = parameters.getOutputFile(0);
this.fileRB = parameters.getOutputFile(1);
this.data = parameters.getTrainingInputFile();
this.fileTime = (parameters.getOutputFile(1)).substring(0,(parameters.getOutputFile(1)).lastIndexOf('/')) + "/time.txt";
this.fileHora = (parameters.getOutputFile(1)).substring(0,(parameters.getOutputFile(1)).lastIndexOf('/')) + "/hora.txt";
//Now we parse the parameters
this.minConf = Double.parseDouble(parameters.getParameter(0)) * 100.0;
this.minSup = Double.parseDouble(parameters.getParameter(1)) * 100.0;
this.delta = Integer.parseInt(parameters.getParameter(2));
}
/**
* It launches the algorithm
*/
public void execute() {
if (this.somethingWrong) { //We do not execute the program
System.err.println("An error was found, either the data-set has missing values.");
System.err.println("Please remove the examples with missing data or apply a MV preprocessing.");
System.err.println("Aborting the program");
//We should not use the statement: System.exit(-1);
}
else {
//We do here the algorithm's operations
this.dataBase = new DataBase(this.train);
// Create instance of class ClassificationPRM
this.newClassification = new AprioriTFP_CMAR(this.minConf, this.minSup, this.delta);
// Read data to be mined from file (method in AssocRuleMining class)
this.newClassification.inputDataSet(this.train, this.dataBase);
// Reorder input data according to frequency of single attributes
// excluding classifiers. Proceed as follows: (1) create a conversion
// array (with classifiers left at end), (2) reorder the attributes
// according to this array. Do not throw away unsupported attributes
// as when data set is split (if distribution is not exactly even) we
// may have thrown away supported attributes that contribute to the
// generation of CRs. NB Never throw away classifiers even if
// unsupported!
// newClassification.idInputDataOrdering(this.dataBase); // ClassificationAprioriT
//newClassification.recastInputData(); // AssocRuleMining
// Create training data set (method in ClassificationAprioriT class)
// assuming a 50:50 split
this.newClassification.testDataSet(this.test, this.dataBase);
// newClassification.createTrainingAndTestDataSets();
// Mine data, produce T-tree and generate CRs
newClassification.startCMARclassification();
// newClassification.outputDuration(time1, (double) System.currentTimeMillis());
// Output
// this.newClassification.outputFrequentSets();
// this.newClassification.outputNumFreqSets();
// this.newClassification.outputNumUpdates();
// this.newClassification.outputStorage();
//newClassification.outputTtree();
// System.out.println("Accuracy = " + accuracy);
// newClassification.getCurrentRuleListObject().outputNumCMARrules();
// Two methiods for outputting rules, second should only be used
// when input data set has been reordered.
//newClassification.getCurrentRuleListObject().outputCMARrulesWithReconversion();
this.dataBase.saveFile(this.fileDB);
this.newClassification.getCurrentRuleListObject().outputCMARrules(this.fileRB);
// this.ruleBase.saveFile(this.fileRB);
//Finally we should fill the training and test output files
doOutputTra(this.outputTr);
doOutputTst(this.outputTst);
totalTime = System.currentTimeMillis() - startTime;
this.writeTime();
System.out.println("Algorithm Finished");
}
}
/**
* It writes the time the algorithm takes on classify a given dataset.
*/
public void writeTime() {
long aux, seg, min, hor;
String stringOut = new String("");
stringOut = "" + totalTime / 1000 + " " + data + "\n";
Files.addToFile(this.fileTime, stringOut);
totalTime /= 1000;
seg = totalTime % 60;
totalTime /= 60;
min = totalTime % 60;
hor = totalTime / 60;
stringOut = "";
if (hor < 10) stringOut = stringOut + "0"+ hor + ":";
else stringOut = stringOut + hor + ":";
if (min < 10) stringOut = stringOut + "0"+ min + ":";
else stringOut = stringOut + min + ":";
if (seg < 10) stringOut = stringOut + "0"+ seg;
else stringOut = stringOut + seg;
stringOut = stringOut + " " + data + "\n";
Files.addToFile(this.fileHora, stringOut);
}
/**
* It generates the output file from a given dataset and stores it in a file
* @param dataset myDataset input dataset
* @param filename String the name of the file
*/
private void doOutputTra(String filename) {
String output = new String("");
output = this.train.copyHeader(); //we insert the header in the output file
//We write the output for each example
for (int i = 0; i < this.train.getnData(); i++) {
//for classification:
output += this.train.getOutputAsString(i) + " " + this.classificationOutput(this.newClassification.dataArray[i]) + "\n";
}
Files.writeFile(filename, output);
}
/**
* It generates the output file from a given dataset and stores it in a file
* @param dataset myDataset input dataset
* @param filename String the name of the file
*/
private void doOutputTst(String filename) {
String output = new String("");
output = this.test.copyHeader(); //we insert the header in the output file
// if (this.newClassification.currentRlist.startCMARrulelist != null) {
//We write the output for each example
for (int i = 0; i < this.test.getnData(); i++) {
//for classification:
output += this.test.getOutputAsString(i) + " " + this.classificationOutput(this.newClassification.testDataArray[i]) + "\n";
}
// }
Files.writeFile(filename, output);
}
/**
* It returns the algorithm classification output given an input example
* @param example double[] The input example
* @return String the output generated by the algorithm
*/
private String classificationOutput(short[] example) {
String output = new String("?");
/**
Here we should include the algorithm directives to generate the
classification output from the input example
*/
int clas = this.newClassification.currentRlist.classifyRecordWCS(example);
if (clas > 0) {
// output = "" + clas;
output = train.getOutputValue(clas-this.dataBase.getOrderClas());
}
return output;
}
}