/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. Sánchez (luciano@uniovi.es)
J. Alcalá-Fdez (jalcala@decsai.ugr.es)
S. García (sglopez@ujaen.es)
A. Fernández (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* ISAAK: Busca el tag <aqui> para saber donde estan las probabilidades ;)
*
*/
package keel.Algorithms.Semi_Supervised_Learning.Basic;
import keel.Algorithms.Lazy_Learning.LazyAlgorithm;
import keel.Dataset.Attributes;
import keel.Dataset.InstanceSet;
import java.util.Arrays;
public class NumericalNaiveBayes extends LazyAlgorithm{
//Additional structures
private boolean isNominal[];
private double stdDev [][];
private double mean [][];
private double classDistribution[];
private double classProb[];
private double attrProb[][][]; //ACV
private double likehood[];
private double likehood2[];
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public NumericalNaiveBayes (double [][] train, int [] clasesTrain, double [][] test, int [] clasesTest, int clases) {
readDataFiles(train, clasesTrain, test, clasesTest, clases);
/*
for(int i=0; i< test.length; i++){
for(int j=0; j<test[i].length; j++){
System.out.print(test[i][j]+", ");
}
System.out.println(" ");
}
*/
outFile = new String[3];
this.outFile[0] = new String("salida1");
this.outFile[1] =new String("salida2");
nClasses = clases;
//Naming the algorithm
name="NumericalNaiveBayes";
//Initialization of auxiliary structures
stdDev=new double [inputAtt][nClasses];
mean=new double [inputAtt][nClasses];
classDistribution=new double [nClasses];
classProb=new double [nClasses];
likehood=new double [nClasses];
isNominal=new boolean [inputAtt];
for(int i=0;i<inputAtt;i++){
Arrays.fill(stdDev[i], 0.0);
Arrays.fill(mean[i], 0.0);
if(Attributes.getAttribute(i).getNumNominalValues()!=-1){
isNominal[i]=true;
}
else{
isNominal[i]=false;
}
}
Arrays.fill(classDistribution, 0.0);
generateNormalModel();
computeNominalAttributesProbabilities();
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
public NumericalNaiveBayes (String script, InstanceSet train, InstanceSet test, InstanceSet refer) {
readDataFiles(script, train, test, refer);
//System.out.println("EOO");
//Naming the algorithm
name="NumericalNaiveBayes";
//Initialization of auxiliary structures
stdDev=new double [inputAtt][nClasses];
mean=new double [inputAtt][nClasses];
classDistribution=new double [nClasses];
classProb=new double [nClasses];
likehood=new double [nClasses];
isNominal=new boolean [inputAtt];
for(int i=0;i<inputAtt;i++){
Arrays.fill(stdDev[i], 0.0);
Arrays.fill(mean[i], 0.0);
if(train.getAttributeDefinitions().getInputAttribute(i).getNumNominalValues()!=-1){
isNominal[i]=true;
}
else{
isNominal[i]=false;
}
}
Arrays.fill(classDistribution, 0.0);
generateNormalModel();
computeNominalAttributesProbabilities();
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public NumericalNaiveBayes (String script) {
readDataFiles(script);
//Naming the algorithm
name="NumericalNaiveBayes";
//Initialization of auxiliary structures
stdDev=new double [inputAtt][nClasses];
mean=new double [inputAtt][nClasses];
classDistribution=new double [nClasses];
classProb=new double [nClasses];
likehood=new double [nClasses];
isNominal=new boolean [inputAtt];
for(int i=0;i<inputAtt;i++){
Arrays.fill(stdDev[i], 0.0);
Arrays.fill(mean[i], 0.0);
if(train.getAttributeDefinitions().getInputAttribute(i).getNumNominalValues()!=-1){
isNominal[i]=true;
}
else{
isNominal[i]=false;
}
}
Arrays.fill(classDistribution, 0.0);
generateNormalModel();
computeNominalAttributesProbabilities();
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* Reads configuration script, to extract the parameter's values.
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
}//end-method
/**
* Evaluates a instance to predict its class.
*
* @param example Instance evaluated
* @return Class predicted
*
*/
protected int evaluate (double example[]) {
int result=-1;
Arrays.fill(likehood, 1.0);
for(int j=0;j<inputAtt;j++){
for(int c=0;c<nClasses;c++){
if(classDistribution[c]>0){
likehood[c]*=computeLikehood(j,c,example[j]);
}
}
}
for(int c=0;c<nClasses;c++){
likehood[c]*=classProb[c];
}
/*System.out.println("*******1******");
for(int i=0;i<likehood.length;i++){
System.out.println(likehood[i]);
}
System.out.println("******2*******");
*/
/*
<aqui>
esta ya listo el array con las probabilidades (ordenadas por clases). Por ejemplo, si esto fuera iris, un posible array seria:
likehood[0]= 0.2332
likehood[1]= 0.0056
likehood[2]= 0.0031
Mucho ojo, no estan normalizadas. El NB no lo necesita, pero puede que para otros usos s� que haga falta...
*/
double max=-1;
//System.out.println("max ="+ max);
for(int i=0;i<likehood.length;i++){
//System.out.println(likehood[i]);
if(max<likehood[i]){
max=likehood[i];
result=i;
}
}
return result;
} //end-method
/**
* Evaluates a instance to predict its class.
*
* @param example Instance evaluated
* @return Class predicted
*
*/
protected double[] evaluate2 (double example[]) {
Arrays.fill(likehood, 1.0);
for(int j=0;j<inputAtt;j++){
for(int c=0;c<nClasses;c++){
if(classDistribution[c]>0){
likehood[c]*=computeLikehood(j,c,example[j]);
}
}
}
for(int c=0;c<nClasses;c++){
likehood[c]*=classProb[c];
}
// Normalize
double sumatoria=0;
for(int i=0;i<likehood.length;i++){
sumatoria+= likehood[i];
}
for(int i=0;i<likehood.length;i++){
likehood[i]/=sumatoria;
}
/* for(int i=0;i<likehood.length;i++){
System.out.println(likehood[i]);
}
System.out.println("*************");
*/
double result[] = new double[nClasses];
result = likehood.clone();
return result;
} //end-method
public double[][] getProbabilities(){
//System.out.println("OBTENIENDO PROBABILIDADES, "+ testData.length);
double [][] probabilities = new double[this.testData.length][this.nClasses];
for(int q = 0 ; q < this.testData.length ;q++){
probabilities[q] = this.evaluate2(this.testData[q]);
}
return probabilities;
}
public int[] getPredictions(){
int [] probabilities = new int[this.testData.length];
for(int q = 0 ; q < this.testData.length ;q++){
probabilities[q] = this.evaluate(this.testData[q]);
}
return probabilities;
}
private double computeLikehood(int att,int out,double value){
double likehood=0.0;
double m,sigma,exponent;
if(!isNominal[att]){
m=mean[att][out];
sigma=stdDev[att][out];
if(sigma==0){
if(value==m){
return 1.0;
}else{
return 0.0;
}
}else{
exponent=-1.0*(value-m)*(value-m)/(2.0*sigma*sigma);
likehood= 1.0/Math.sqrt(2.0*Math.PI*sigma*sigma);
likehood*=Math.exp(exponent);
}
}
else{
likehood=attrProb[att][out][real2Nom(value,att)];
}
return likehood;
}
private void generateNormalModel(){
for(int i=0;i<trainData.length;i++){
for(int j=0;j<inputAtt;j++){
if(!isNominal[j]){
mean[j][trainOutput[i]]+=trainData[i][j];
stdDev[j][trainOutput[i]]+=(trainData[i][j]*trainData[i][j]);
}
}
classDistribution[trainOutput[i]]+=1.0;
}
for(int j=0;j<inputAtt;j++){
if(!isNominal[j]){
for(int c=0;c<nClasses;c++){
if(classDistribution[c]>0){
mean[j][c]/=classDistribution[c];
stdDev[j][c]/=classDistribution[c];
stdDev[j][c]-=(mean[j][c]*mean[j][c]);
stdDev[j][c]=Math.sqrt(stdDev[j][c]);
}
}
}
}
for(int c=0;c<nClasses;c++){
classProb[c]=classDistribution[c]/trainData.length;
}
}
private int real2Nom(double real,int att){
int result;
result=(int)(real*((Attributes.getInputAttribute(att).getNominalValuesList().size())-1));
return result;
}//end-method
private void computeNominalAttributesProbabilities() {
attrProb = new double[inputAtt][nClasses][1];
for (int j = 0; j < inputAtt; j++) {
if(isNominal[j]){
for (int c = 0; c < nClasses; c++) {
attrProb[j][c] = new double[Attributes.getInputAttribute(j).getNumNominalValues()];
}
}
}
for (int i = 0; i < trainData.length; i++) {;
for (int j = 0; j < inputAtt; j++) {
if(isNominal[j]){
// System.out.println(real2Nom(trainData[i][j],j));
attrProb[j][trainOutput[i]][real2Nom(trainData[i][j],j)]+=1.0;
}
}
}
int contador[][] = new int[inputAtt][nClasses];
for (int i = 0; i < attrProb.length; i++) {
if(isNominal[i]){
for (int j = 0; j < attrProb[i].length; j++) {
for (int k = 0; k < attrProb[i][j].length; k++) {
attrProb[i][j][k]++; //Laplace correction
contador[i][j] += attrProb[i][j][k];
}
}
}
}
for (int i = 0; i < attrProb.length; i++) {
if(isNominal[i]){
for (int j = 0; j < attrProb[i].length; j++) {
for (int k = 0; k < attrProb[i][j].length; k++) {
attrProb[i][j][k] /= contador[i][j];
}
}
}
}
}
} //end-class