/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.MIL.APR.GFS_Kde_APR; import java.util.ArrayList; import keel.Algorithms.MIL.APR.AbstractAPR; import net.sourceforge.jclec.util.dataset.IDataset.IInstance; public class GFS_Kde_APR extends AbstractAPR { ///////////////////////////////////////////////////////////////// // --------------------------------------------------- Properties ///////////////////////////////////////////////////////////////// private double alpha = 10.0; private double SQRT2PI = Math.sqrt(2*Math.PI); ///////////////////////////////////////////////////////////////// // ----------------------------------------------- Public Methods ///////////////////////////////////////////////////////////////// public void execute() throws Exception { loadTrainDataset(); loadTestDataset(); double[][] positiveRectangle = new double[numberFeatures][2]; for(int i = 0; i < numberFeatures; i++) { positiveRectangle[i][0] = min(i,0); // Min value for feature i from positive instances positiveRectangle[i][1] = max(i,0); // Max value for feature i from positive instances } removeNegativeInstances(positiveRectangle); greedyFeatureSelection(positiveRectangle,1); report(trainReportFileName, trainDataset, trainInstancesCopy, positiveRectangle, 0, bestFeatures); report(testReportFileName, testDataset, testInstances, positiveRectangle, 0, bestFeatures); } public void setAlpha(double alpha) { this.alpha = alpha; } ///////////////////////////////////////////////////////////////// // --------------------------------------------- Private Methods ///////////////////////////////////////////////////////////////// private void removeNegativeInstances(double[][] positiveRectangle) { int numNegativeInstances = 0; do { ArrayList<double[]> count = new ArrayList<double[]>(); ArrayList<IInstance> negativeInstances = new ArrayList<IInstance>(); for(int i = 0; i < trainInstances.size(); i++) for(int j = 0; j < trainInstances.get(i).size(); j++) if(trainInstances.get(i).get(j).getValue(classIndex) == 1 && contains(positiveRectangle, trainInstances.get(i).get(j))) { negativeInstances.add(trainInstances.get(i).get(j)); count.add(excludeCost(trainInstances.get(i).get(j),positiveRectangle)); } numNegativeInstances = negativeInstances.size(); if(numNegativeInstances != 0) { double min = Double.MAX_VALUE; int bestInstance = -1; int feature = -1; for(int i = 0; i < count.size(); i++) for(int j = 0; j < numberFeatures; j++) if(count.get(i)[j] < min) { feature = j; min = count.get(i)[j]; bestInstance = i; } removeAffectedInstances(negativeInstances.get(bestInstance), feature, positiveRectangle); for(int i = 0; i < trainInstances.size(); i++) if(trainInstances.get(i).contains(negativeInstances.get(bestInstance))) trainInstances.get(i).remove(negativeInstances.get(bestInstance)); for(int i = 0; i < numberFeatures; i++) { positiveRectangle[i][0] = min(i,0); positiveRectangle[i][1] = max(i,0); } } }while(numNegativeInstances != 0); } @SuppressWarnings("unused") private void removePositiveInstances(double[][] negativeRectangle) { int numPositiveInstances = 0; do { ArrayList<double[]> count = new ArrayList<double[]>(); ArrayList<IInstance> positiveInstances = new ArrayList<IInstance>(); for(int i = 0; i < trainInstances.size(); i++) for(int j = trainInstances.get(i).size()-1; j >= 0; j--) if(trainInstances.get(i).get(j).getValue(classIndex) == 0 && contains(negativeRectangle, trainInstances.get(i).get(j))) { positiveInstances.add(trainInstances.get(i).get(j)); count.add(excludeCost(trainInstances.get(i).get(j),negativeRectangle)); } numPositiveInstances = positiveInstances.size(); if(numPositiveInstances != 0) { double min = Double.MAX_VALUE; int bestInstance = -1; int feature = -1; for(int i = 0; i < count.size(); i++) for(int j = 0; j < numberFeatures; j++) if(count.get(i)[j] < min) { min = count.get(i)[j]; feature = j; bestInstance = i; } removeAffectedInstances(positiveInstances.get(bestInstance), feature, negativeRectangle); for(int i = 0; i < trainInstances.size(); i++) if(trainInstances.get(i).contains(positiveInstances.get(bestInstance))) trainInstances.get(i).remove(positiveInstances.get(bestInstance)); for(int i = 0; i < numberFeatures; i++) { negativeRectangle[i][0] = min(i,1); negativeRectangle[i][1] = max(i,1); } count.clear(); positiveInstances.clear(); } }while(numPositiveInstances != 0); } private double[] excludeCost(IInstance instance, double[][] rectangle) { double[][] mean = new double[trainInstances.size()][numberFeatures]; double[][] variance = new double[trainInstances.size()][numberFeatures]; for(int i = 0; i < trainInstances.size(); i++) { for(int j = 0; j < numberFeatures; j++) { mean[i][j] = variance[i][j] = 0.0; for(int k = 0; k < trainInstances.get(i).size(); k++) { mean[i][j] += trainInstances.get(i).get(k).getValue(j+1); variance[i][j] += trainInstances.get(i).get(k).getValue(j+1) * trainInstances.get(i).get(k).getValue(j+1); } mean[i][j] = mean[i][j] / trainInstances.get(i).size(); variance[i][j] = (variance[i][j] - trainInstances.get(i).size() * mean[i][j] * mean[i][j]) / (trainInstances.get(i).size()-1); } } int Class = (int) instance.getValue(classIndex); double[] cost = new double[numberFeatures]; for(int i = 0; i < numberFeatures; i++) { if(Math.abs(instance.getValue(i+1) - rectangle[i][0]) < Math.abs(rectangle[i][1] - instance.getValue(i+1))) { for(int j = 0; j < trainInstances.size(); j++) for(int k = 0; k < trainInstances.get(j).size(); k++) if(trainInstances.get(j).get(k).getValue(classIndex) != Class && trainInstances.get(j).get(k).getValue(i+1) <= instance.getValue(i+1)) cost[i] += cost(i,j,k,mean[j][i],variance[j][i]); } else { for(int j = 0; j < trainInstances.size(); j++) for(int k = 0; k < trainInstances.get(j).size(); k++) if(trainInstances.get(j).get(k).getValue(classIndex) != Class && trainInstances.get(j).get(k).getValue(i+1) >= instance.getValue(i+1)) cost[i] += cost(i,j,k,mean[j][i],variance[j][i]); } } return cost; } private void removeAffectedInstances(IInstance instance, int feature, double[][] rectangle) { int Class = (int) instance.getValue(classIndex); if(Math.abs(instance.getValue(feature+1) - rectangle[feature][0]) < Math.abs(rectangle[feature][1] - instance.getValue(feature+1))) { for(int j = 0; j < trainInstances.size(); j++) for(int k = trainInstances.get(j).size()-1; k >= 0; k--) if(trainInstances.get(j).get(k).getValue(classIndex) != Class && trainInstances.get(j).get(k).getValue(feature+1) <= instance.getValue(feature+1)) trainInstances.get(j).remove(k); } else { for(int j = 0; j < trainInstances.size(); j++) for(int k = trainInstances.get(j).size()-1; k >= 0; k--) if(trainInstances.get(j).get(k).getValue(classIndex) != Class && trainInstances.get(j).get(k).getValue(feature+1) >= instance.getValue(feature+1)) trainInstances.get(j).remove(k); } } private double cost(int attribute, int bag, int instance, double media, double varianza) { double cost = 0.0; for(int i = 0; i < trainInstances.get(bag).size(); i++) if(i != instance) cost += probabilityDensityFunction(trainInstances.get(bag).get(i).getValue(attribute+1), media, varianza); cost = - cost + alpha * probabilityDensityFunction(trainInstances.get(bag).get(instance).getValue(attribute+1), media, varianza); return cost; } private double probabilityDensityFunction(double value, double mean, double variance) { if(value == mean) return 1.0/SQRT2PI; double z = (value - mean) / Math.sqrt(variance); return Math.exp(-(z*z)/2)/SQRT2PI; } }