/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Statistical_Classifiers.Naive_Bayes;
/**
* <p>Title: Algorithm</p>
*
* <p>Description: It contains the implementation of the algorithm Naive-Bayes</p>
*
* <p>Company: KEEL </p>
*
* @author Alberto Fern�ndez (University of Granada) 02/07/2007
* @version 1.0
* @since JDK1.5
*/
import java.io.IOException;
import org.core.*;
public class Algorithm {
myDataset train, val, test;
String outputTr, outputTst, output;
double classProb[];
double attrProb[][][]; //atribute value, atribute position, class
int counts[][][]; //atribute value, atribute position, class
int nClasses;
//We may declare here the algorithm's parameters
private boolean somethingWrong = false; //to check if everything is correct.
/**
* Default constructor
*/
public Algorithm() {
}
/**
* It reads the data from the input files (training, validation and test) and parse all the parameters
* from the parameters array.
* @param parameters parseParameters It contains the input files, output files and parameters
*/
public Algorithm(parseParameters parameters) {
train = new myDataset();
val = new myDataset();
test = new myDataset();
try {
System.out.println("\nReading the training set: " +
parameters.getTrainingInputFile());
train.readClassificationSet(parameters.getTrainingInputFile(), true);
System.out.println("\nReading the validation set: " +
parameters.getValidationInputFile());
val.readClassificationSet(parameters.getValidationInputFile(), false);
System.out.println("\nReading the test set: " +
parameters.getTestInputFile());
test.readClassificationSet(parameters.getTestInputFile(), false);
} catch (IOException e) {
System.err.println(
"There was a problem while reading the input data-sets: " +
e);
somethingWrong = true;
}
//We may check if there are some numerical attributes, because our algorithm may not handle them:
somethingWrong = somethingWrong || train.hasNumericalAttributes();
//somethingWrong = somethingWrong || train.hasMissingAttributes();
outputTr = parameters.getTrainingOutputFile();
outputTst = parameters.getTestOutputFile();
output = parameters.getOutputFile(0);
//Now we parse the parameters, for example:
/*
seed = Long.parseLong(parameters.getParameter(0));
iterations = Integer.parseInt(parameters.getParameter(1));
crossOverProb = Double.parseDouble(parameters.getParameter(2));
*/
//...
}
/**
* It launches the algorithm
*/
public void execute() {
if (somethingWrong) { //We do not execute the program
System.err.println("An error was found, the data-set have numerical attributes. Please use a discretizer.");
System.err.println("Aborting the program");
//We should not use the statement: System.exit(-1);
} else {
//We do here the algorithm's operations
nClasses = train.getnOutputs();
computeProbabilites();
//Finally we should fill the training and test output files
doOutput(this.val, this.outputTr);
doOutput(this.test, this.outputTst);
generateOutputInfo();
System.out.println("Algorithm Finished");
}
}
/**
* It generates the output file from a given dataset and stores it in a file
* @param dataset myDataset input dataset
* @param filename String the name of the file
*/
private void doOutput(myDataset dataset, String filename) {
String output = new String("");
output = dataset.copyHeader(); //we insert the header in the output file
//We write the output for each example
for (int i = 0; i < dataset.getnData(); i++) {
//for classification:
output += dataset.getOutputAsString(i) + " " +
this.classificationOutput(dataset.getExample(i),dataset.getMissing(i)) + "\n";
}
Fichero.escribeFichero(filename, output);
}
/**
* It returns the algorithm classification output given an input example
* @param example double[] The input example
* @param missing boolean [] A vector that stores the possible missing attributes of the examples
* @return String the output generated by the algorithm
*/
private String classificationOutput(double[] example, boolean [] missing) {
String output = new String("?");
/**
Here we should include the algorithm directives to generate the
classification output from the input example
*/
//We compute P(C_i | X_j)
double probClasses[] = new double[nClasses];
double probExampleClass[] = new double[nClasses];
double probExample = 0.0;
for (int i = 0; i < nClasses; i++) {
probExampleClass[i] = computeProbExampleClass(example, missing, i);
probExample += probExampleClass[i] * this.classProb[i];
}
for (int i = 0; i < nClasses; i++) {
probClasses[i] = (probExampleClass[i] * this.classProb[i]) /
probExample;
}
double max = 0.0;
int finalClass = -1;
for (int i = 0; i < nClasses; i++) {
if (max < probClasses[i]) {
max = probClasses[i];
finalClass = i;
}
}
if (finalClass != -1) {
output = train.getOutputValue(finalClass);
}
return output;
}
/**
* It computes the prior probabilities of the different classes and attribute values corresponding to a class
*/
private void computeProbabilites() {
computeClassProb(); //First the class probabilities
computeAttrProb(); //Then the probability of the attributes to be in a certain class
}
/**
* Here we compute the prior class probabilities
*/
private void computeClassProb() {
classProb = new double[nClasses];
train.computeInstancesPerClass();
for (int i = 0; i < nClasses; i++) {
classProb[i] = 1.0 * train.numberInstances(i) / train.getnData();
}
}
/**
* Here we compute the probability of an attribute value to be in a certain class
*/
private void computeAttrProb() {
double example[];
int clas;
attrProb = new double[nClasses][train.getnInputs()][1];
counts = new int[nClasses][train.getnInputs()][1];
for (int i = 0; i < nClasses; i++) {
for (int j = 0; j < train.getnInputs(); j++) {
attrProb[i][j] = new double[train.numberValues(j)];
counts[i][j] = new int[train.numberValues(j)];
}
}
for (int i = 0; i < train.getnData(); i++) {
example = train.getExample(i);
clas = train.getOutputAsInteger(i);
for (int j = 0; j < train.getnInputs(); j++) {
if (! train.isMissing(i,j)){
attrProb[clas][j][(int) example[j]]++;
}
}
}
int contador[][] = new int[nClasses][train.getnInputs()];
for (int i = 0; i < attrProb.length; i++) {
for (int j = 0; j < attrProb[i].length; j++) {
for (int k = 0; k < attrProb[i][j].length; k++) {
counts[i][j][k] = (int)attrProb[i][j][k]; //for output
attrProb[i][j][k]++; //Laplace
contador[i][j] += attrProb[i][j][k];
}
}
}
for (int i = 0; i < attrProb.length; i++) {
for (int j = 0; j < attrProb[i].length; j++) {
for (int k = 0; k < attrProb[i][j].length; k++) {
attrProb[i][j][k] /= contador[i][j];
}
}
}
}
/**
* This function computes the probability of an example to be in a certain class
* @param example double[] The attribute values of the example
* @param missing boolean [] A vector that stores the possible missing attributes of the examples
* @param clas int The class to check
* @return double The computed probability
*/
private double computeProbExampleClass(double[] example, boolean [] missing, int clas) {
double prob = 1.0;
for (int i = 0; i < example.length; i++) {
if (!missing[i]){
prob *= attrProb[clas][i][(int) example[i]];
}
}
return prob;
}
/**
* Here we generate some info about the counts of each value for the pair attribute-class
*/
private void generateOutputInfo(){
String string = new String("");
for (int i = 0; i < nClasses; i++){
string += "\nClass " + train.getOutputValue(i) +
": Prior probability = " + classProb[i] + "\n";
for (int j = 0; j < counts[i].length; j++){
string += train.varName(j)+": Discrete Estimator. Counts = ";
int contador = 0;
for (int k = 0; k < counts[i][j].length; k++){
string += counts[i][j][k] + " ";
contador += counts[i][j][k];
}
string += "(Total = "+contador+")\n";
}
string += "\n\n";
}
Fichero.escribeFichero(output,string);
}
}