/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.MIL.Diverse_Density.EMDD; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.List; import java.util.Random; import keel.Algorithms.MIL.AbstractMIAlgorithm; import keel.Algorithms.MIL.Diverse_Density.Optimization.EMDDoptimization; import net.sourceforge.jclec.util.dataset.IDataset; import net.sourceforge.jclec.util.dataset.KeelDataSet; import net.sourceforge.jclec.util.dataset.IDataset.IInstance; /** * MIEMDD * * Qi Zhang, Sally A. Goldman: EM-DD: An Improved Multiple-Instance Learning Technique. In: Advances in Neural Information Processing Systems 14, 1073-108, 2001. */ public class EMDD extends AbstractMIAlgorithm { // /////////////////////////////////////////////////////////////// // ---------------------------------------------------- Properties // /////////////////////////////////////////////////////////////// protected double[] best; protected double[][] multiInstanceData; protected EMDDoptimization optimization = new EMDDoptimization(this); ///////////////////////////////////////////////////////////////// // ----------------------------------------------- Public Methods ///////////////////////////////////////////////////////////////// public double[][] getMultiInstanceData() { return multiInstanceData; } ///////////////////////////////////////////////////////////////// // ---------------------- Implementing Algorithm abstract methods ///////////////////////////////////////////////////////////////// public void execute() throws Exception { loadTrainDataset(); loadTestDataset(); multiInstanceData = new double[trainInstances.size()][numberFeatures]; best = new double[2*numberFeatures]; double[] x = new double[2*numberFeatures]; double[][] y = new double[2][2*numberFeatures]; double[] aux = new double[2*numberFeatures]; double[] previous = new double[2*numberFeatures]; double[] bestAux = new double[2*numberFeatures]; double minError = Double.MAX_VALUE; double likelihood, previousLikelihood; for (int i = 0; i < 2*numberFeatures; i++) { y[0][i] = Double.NaN; y[1][i] = Double.NaN; } List<Integer> list = fill(trainInstances.size()-1); for (int i = 0; i < list.size(); i++) { for (int j = 0; j < trainInstances.get(list.get(i)).size(); j++) { for (int k = 0; k < numberFeatures; k++) { x[2 * k] = trainInstances.get(list.get(i)).get(j).getValue(k+1); x[2 * k + 1] = 1.0; } previousLikelihood = Double.MAX_VALUE; likelihood = Double.MAX_VALUE/10.0; for(int k = 0; k < 10 && likelihood < previousLikelihood; k++) { previousLikelihood = likelihood; for (int l = 0; l < trainInstances.size(); l++) { int insIndex = findInstance(l, x); for (int attribute = 0; attribute < numberFeatures; attribute++) multiInstanceData[l][attribute] = trainInstances.get(l).get(insIndex).getValue(attribute+1); } aux = optimization.minimum(x, y); while (aux == null) aux = optimization.minimum(optimization.getVarValues(), y); likelihood = optimization.getMinFunction(); previous = x; x = aux; } if (likelihood > previousLikelihood) best = previous; else best = x; int error = 0; double distribution[] = new double[2]; for (int k = 0; k < trainInstances.size(); k++) { List<IInstance> bag = new ArrayList<IInstance>(); for(int l = 0; l < trainInstances.get(k).size(); l++) bag.add(trainInstances.get(k).get(l)); distribution = computeDistribution(bag); if (distribution[1] >= 0.5 && trainInstances.get(k).get(0).getValue(classIndex) == 0) error++; else if (distribution[1] < 0.5 && trainInstances.get(k).get(0).getValue(classIndex) == 1) error++; } if (error < minError) { bestAux = best; minError = error; } } } best = bestAux; report(trainReportFileName, trainDataset, trainInstances); report(testReportFileName, testDataset, testInstances); } ///////////////////////////////////////////////////////////////// // ---------------------------------------------- Private methods ///////////////////////////////////////////////////////////////// private void report(String reportFileName, IDataset dataset, ArrayList<ArrayList<IInstance>> instances) { int predictedClass = 0; String newline = System.getProperty("line.separator"); try { BufferedReader reader= new BufferedReader(new FileReader(((KeelDataSet) dataset).getFileName())); BufferedWriter writer= new BufferedWriter(new FileWriter(reportFileName)); String line= reader.readLine(); while(line.compareTo("@data") != 0) { writer.write(line + newline); line = reader.readLine(); } writer.write(line + newline); reader.close(); for(int i = 0; i < instances.size(); i++) { double [] dist = computeDistribution(instances.get(i)); if (dist == null) writer.write("Null distribution predicted"); double max = 0; for (int j = 0; j < dist.length; j++) { if (dist[j] > max) { predictedClass = j; max = dist[j]; } } if (max > 0) writer.write((int)instances.get(i).get(0).getValue(classIndex) + " " + predictedClass + newline); else writer.write((int)instances.get(i).get(0).getValue(classIndex) + " " + "Nose pudo clasificar" + newline); } writer.close(); } catch (Exception e) {e.printStackTrace();} } private double[] computeDistribution(List<IInstance> instances) { int numberInstances = instances.size(); double[][] data = new double [numberInstances][numberFeatures]; for(int i = 0; i < numberInstances; i++) for(int j = 0; j < numberFeatures; j++) data[i][j] = instances.get(i).getValue(j+1); double min = Double.MAX_VALUE; double maxProb = -1.0; for(int i = 0; i < numberInstances; i++) { double exp = 0.0; for (int j = 0; j < numberFeatures; j++) exp += (data[i][j]-best[j*2])*(data[i][j]-best[j*2])*best[j*2+1]*best[j*2+1]; if (exp < min){ min = exp; maxProb = Math.exp(-exp); } } double[] distribution = new double[2]; distribution[1] = maxProb; distribution[0] = 1.0 - distribution[1]; return distribution; } private int findInstance(int bag, double[] x) { double min = Double.MAX_VALUE; int numberInstances = trainInstances.get(bag).size(); int index = 0; for (int i = 0; i < numberInstances; i++) { double ins=0.0; for (int j = 0; j < numberFeatures; j++) ins += (trainInstances.get(bag).get(i).getValue(j+1)-x[j*2])*(trainInstances.get(bag).get(i).getValue(j+1)-x[j*2])*x[j*2+1]*x[j*2+1]; if (ins < min){ min=ins; index=i; } } return index; } private ArrayList<Integer> fill(int max) { Random random = new Random(1); ArrayList<Integer> list = new ArrayList<Integer>(); int number; for(int i = 0; i < 3; i++) { do { number = random.nextInt(max); }while(list.contains(number) && trainInstances.get(number).get(0).getValue(classIndex) == 0); list.add(new Integer(number)); } return list; } }