/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.MIL.Diverse_Density.DD;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.List;
import keel.Algorithms.MIL.AbstractMIAlgorithm;
import keel.Algorithms.MIL.Diverse_Density.Optimization.DDoptimization;
import net.sourceforge.jclec.util.dataset.IDataset;
import net.sourceforge.jclec.util.dataset.KeelDataSet;
import net.sourceforge.jclec.util.dataset.IDataset.IInstance;
/**
* Diverse Density
*
* O. Maron and T. Lozano-Perez. A Framework for Multiple Instance Learning. Neural Information Processing Systems, 10, 1998.
*/
public class DD extends AbstractMIAlgorithm
{
// ///////////////////////////////////////////////////////////////
// ---------------------------------------------------- Properties
// ///////////////////////////////////////////////////////////////
protected double[] best;
protected DDoptimization optimization = new DDoptimization(this);
/////////////////////////////////////////////////////////////////
// ---------------------- Implementing Algorithm abstract methods
/////////////////////////////////////////////////////////////////
public void execute() throws Exception
{
loadTrainDataset();
loadTestDataset();
List<Integer> biggestTrainBags = new ArrayList<Integer>();
int maxSize = 0;
for(int i = 0; i < trainInstances.size(); i++)
{
if(trainInstances.get(i).get(0).getValue(classIndex) == 1)
{
if(trainInstances.get(i).size() > maxSize)
{
biggestTrainBags.clear();
biggestTrainBags.add(i);
maxSize = trainInstances.get(i).size();
}
else if(trainInstances.get(i).size() == maxSize)
biggestTrainBags.add(i);
}
}
double[] x = new double[2*numberFeatures], aux = new double[2*numberFeatures];
double[][] y = new double[2][2*numberFeatures];
double likelihood, bestLikelihood = Double.MAX_VALUE;
for (int i = 0; i < 2*numberFeatures; i++)
{
y[0][i] = Double.NaN;
y[1][i] = Double.NaN;
}
for(int i = 0; i < biggestTrainBags.size(); i++)
{
for(int j = 0; j < trainInstances.get(biggestTrainBags.get(i)).size(); j++)
{
for (int k = 0; k < numberFeatures;k++)
{
x[2*k] = trainInstances.get(biggestTrainBags.get(i)).get(j).getValue(k+1);
x[2*k+1] = 1.0;
}
aux = optimization.minimum(x,y);
while(aux==null)
aux = optimization.minimum(optimization.getVarValues(),y);
likelihood = optimization.getMinFunction();
if(likelihood < bestLikelihood)
{
bestLikelihood = likelihood;
best = aux;
aux = new double[x.length];
}
}
}
report(trainReportFileName, trainDataset, trainInstances);
report(testReportFileName, testDataset, testInstances);
}
/////////////////////////////////////////////////////////////////
// ---------------------------------------------- Private methods
/////////////////////////////////////////////////////////////////
private void report(String reportFileName, IDataset dataset, ArrayList<ArrayList<IInstance>> instances)
{
int predictedClass = 0;
String newline = System.getProperty("line.separator");
try {
BufferedReader reader= new BufferedReader(new FileReader(((KeelDataSet) dataset).getFileName()));
BufferedWriter writer= new BufferedWriter(new FileWriter(reportFileName));
String line= reader.readLine();
while(line.compareTo("@data") != 0)
{
writer.write(line + newline);
line = reader.readLine();
}
writer.write(line + newline);
reader.close();
for(int i = 0; i < instances.size(); i++)
{
double [] dist = computeDistribution(instances.get(i));
if (dist == null) writer.write("Null distribution predicted");
double max = 0;
for (int j = 0; j < dist.length; j++)
{
if (dist[j] > max)
{
predictedClass = j;
max = dist[j];
}
}
if (max > 0)
writer.write((int)instances.get(i).get(0).getValue(classIndex) + " " + predictedClass + newline);
else
writer.write((int)instances.get(i).get(0).getValue(classIndex) + " " + "Nose pudo clasificar" + newline);
}
writer.close();
} catch (Exception e) {e.printStackTrace();}
}
private double[] computeDistribution(List<IInstance> instances)
{
int numberInstances = instances.size();
double[][] data = new double [numberInstances][numberFeatures];
for(int i = 0; i < numberInstances; i++)
for(int j = 0; j < numberFeatures; j++)
data[i][j] = instances.get(i).getValue(j+1);
double [] distribution = new double[2];
distribution[0] = 0.0;
for(int i = 0; i < numberInstances; i++)
{
double exp = 0.0;
for(int j = 0; j < numberFeatures; j++)
exp += (best[j*2]-data[i][j])*(best[j*2]-data[i][j])*best[j*2+1]*best[j*2+1];
exp = Math.exp(-exp);
distribution[0] += Math.log(1.0-exp);
}
distribution[0] = Math.exp(distribution[0]);
distribution[1] = 1.0-distribution[0];
return distribution;
}
}