/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.MIL.APR.IteratedDiscrimination;
import java.util.ArrayList;
import keel.Algorithms.MIL.APR.AbstractAPR;
import net.sourceforge.jclec.util.dataset.IDataset.IInstance;
public class IteratedDiscrimination extends AbstractAPR
{
/////////////////////////////////////////////////////////////////
// --------------------------------------------------- Properties
/////////////////////////////////////////////////////////////////
private ArrayList<IInstance> instancesCovered = new ArrayList<IInstance>();
private double[][][] minmaxRectangles;
private boolean[] bagsCovered;
private double alpha = 1.0;
private double epsilon = 0.01;
private double tau = 0.99;
private double densityEstimation;
/////////////////////////////////////////////////////////////////
// ----------------------------------------------- Public Methods
/////////////////////////////////////////////////////////////////
public void execute()
{
loadTrainDataset();
loadTestDataset();
densityEstimation = densityEstimation();
minmaxRectangles = new double[2][numberFeatures][2];
for(int i = 0; i < numberFeatures; i++)
{
minmaxRectangles[0][i][0] = minmax(i,0);
minmaxRectangles[0][i][1] = maxmin(i,0);
}
double[][] positiveRectangle = iterateDiscrim(0);
report(trainReportFileName, trainDataset, trainInstancesCopy, positiveRectangle, 0, bestFeatures);
report(testReportFileName, testDataset, testInstances, positiveRectangle, 0, bestFeatures);
}
public void setAlpha(double alpha) {
this.alpha = alpha;
}
public void setEpsilon(double epsilon) {
this.epsilon = epsilon;
}
public void setTau(double tau) {
this.tau = tau;
}
/////////////////////////////////////////////////////////////////
// --------------------------------------------- Private Methods
/////////////////////////////////////////////////////////////////
private double[][] iterateDiscrim(int Class)
{
double minDistance = Double.MAX_VALUE;
int seedBag = 0, seedInstance = 0;
for(int i = 0; i < trainInstances.size(); i++)
for(int j = 0; j < trainInstances.get(i).size(); j++)
if(trainInstances.get(i).get(j).getValue(classIndex) == Class)
{
double distance = distanceRectangle(trainInstances.get(i).get(j).getValues(),minmaxRectangles[Class]);
if(distance < minDistance)
{
minDistance = distance;
seedBag = i;
seedInstance = j;
}
}
double[][] rectangle = iterate(seedBag,seedInstance, Class);
double size = size(rectangle, bestFeatures);
while(true)
{
rectangle = iterate(seedBag,seedInstance, Class);
double newSize = size(rectangle, bestFeatures);
if(size == newSize)
break;
else
size = newSize;
}
expand(rectangle);
return rectangle;
}
private void expand(double[][] rectangle)
{
double[] mean = new double[bestFeatures.size()];
double[] variance = new double[bestFeatures.size()];
for(int i = 0; i < bestFeatures.size(); i++)
{
mean[i] = variance[i] = 0.0;
int numInstances = 0;
for(int j = 0; j < trainInstances.size(); j++)
for(int k = 0; k < trainInstances.get(j).size(); k++)
{
mean[i] += trainInstances.get(j).get(k).getValue(bestFeatures.get(i)+1);
variance[i] += trainInstances.get(j).get(k).getValue(bestFeatures.get(i)+1) * trainInstances.get(j).get(k).getValue(bestFeatures.get(i)+1);
numInstances++;
}
mean[i] = mean[i] / numInstances;
variance[i] = (variance[i] - numInstances * mean[i] * mean[i]) / (numInstances-1);
rectangle[bestFeatures.get(i)][0] = densityEstimation * Math.sqrt(variance[i]) + mean[i];
rectangle[bestFeatures.get(i)][1] = mean[i] + Math.abs(mean[i] - rectangle[bestFeatures.get(i)][0]);
}
}
private double[][] iterate(int seedBag, int seedInstance, int Class)
{
bagsCovered = new boolean[trainInstances.size()];
bagsCovered[seedBag] = true;
instancesCovered = new ArrayList<IInstance>();
instancesCovered.add(trainInstances.get(seedBag).get(seedInstance));
int otherClass = 0;
if(Class == 0) otherClass = 1;
double[][] rectangle = backfitting(Class);
int maxCount = 0, bestFeature = 0;
do
{
int[] features = discriminatingFeatures(rectangle,otherClass);
maxCount = 0;
for(int i = 0; i < numberFeatures; i++)
if(features[i] > maxCount)
{
maxCount = features[i];
bestFeature = i;
}
if(maxCount > 0)
{
removeInstances(rectangle,otherClass,bestFeature);
bestFeatures.add(bestFeature);
}
}while(maxCount > 0);
return rectangle;
}
@Override
protected int[] discriminatingFeatures(double[][] rectangle, int Class)
{
int[] features = new int[numberFeatures];
for(int i = 0; i < numberFeatures; i++)
if(!bestFeatures.contains(i))
for(int j = 0; j < trainInstances.size(); j++)
for(int k = 0; k < trainInstances.get(j).size(); k++)
if(trainInstances.get(j).get(k).getValue(classIndex) == Class)
{
double distance = Math.min(Math.abs(rectangle[i][0] - trainInstances.get(j).get(k).getValue(i+1)), Math.abs(trainInstances.get(j).get(k).getValue(i+1) - rectangle[i][1]));
if(distance >= alpha *(rectangle[i][1] - rectangle[i][0]))
features[i]++;
else if(i == furtherFeature(rectangle,trainInstances.get(j).get(k)))
features[i]++;
}
return features;
}
@Override
protected void removeInstances(double[][] rectangle, int Class, int feature)
{
ArrayList<int[]> toRemove = new ArrayList<int[]>();
for(int j = 0; j < trainInstances.size(); j++)
for(int k = trainInstances.get(j).size()-1; k >= 0; k--)
if(trainInstances.get(j).get(k).getValue(classIndex) == Class)
{
double distance = Math.min(Math.abs(rectangle[feature][0] - trainInstances.get(j).get(k).getValue(feature+1)), Math.abs(trainInstances.get(j).get(k).getValue(feature+1) - rectangle[feature][1]));
if(distance >= alpha *(rectangle[feature][1] - rectangle[feature][0]))
toRemove.add(new int[]{j,k});
else if(feature == furtherFeature(rectangle,trainInstances.get(j).get(k)))
toRemove.add(new int[]{j,k});
}
for(int i = 0; i < toRemove.size(); i++)
trainInstances.get(toRemove.get(i)[0]).remove(toRemove.get(i)[1]);
}
private int furtherFeature(double[][] rectangle, IInstance instance)
{
double max = Double.MIN_VALUE;
int feature = -1;
for(int i = 0; i < numberFeatures; i++)
{
if(instance.getValue(i+1) < rectangle[i][0] || instance.getValue(i+1) > rectangle[i][1]) // SI LA INSTANCIA NEGATIVA CAE DENTRO NO LA DISCRIMINA POR LO TANTO NO ELIMINARA TODAS LAS NEGATIVAS, SE QEDARAN SI CAEN DENTRO DEL RECTANGULO
{
double distance = Math.min(Math.abs(rectangle[i][0] - instance.getValue(i+1)), Math.abs(instance.getValue(i+1) - rectangle[i][1]));
if(distance > max)
{
max = distance;
feature = i;
}
}
}
return feature;
}
private double[][] backfitting(int Class)
{
int bag = 0, instance = 0;
double APR[][] = new double[numberFeatures][2];
double minSize = Double.MAX_VALUE;
while(true)
{
minSize = Double.MAX_VALUE;
for(int i = 0; i < trainInstances.size(); i++)
if(trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
for(int j = 0; j < trainInstances.get(i).size(); j++)
if(bagsCovered[i] == false)
{
instancesCovered.add(trainInstances.get(i).get(j));
double auxAPR[][] = new double[numberFeatures][2];
for(int k = 0; k < numberFeatures; k++)
{
auxAPR[k][0] = min(instancesCovered,k);
auxAPR[k][1] = max(instancesCovered,k);
}
double size;
if(bestFeatures.size() == 0)
size = size(auxAPR);
else
size = size(auxAPR,bestFeatures);
if(size < minSize)
{
minSize = size;
bag = i;
instance = j;
}
instancesCovered.remove(instancesCovered.size()-1);
}
instancesCovered.add(trainInstances.get(bag).get(instance));
bagsCovered[bag] = true;
int revisedInstanceIndex;
IInstance revisedInstance, auxInstance = null;
for(int i = 1; i < instancesCovered.size()-1; i++)
{
double APRAT2[][] = new double[numberFeatures][2];
for(int k = 0; k < numberFeatures; k++)
{
APRAT2[k][0] = min(instancesCovered,k);
APRAT2[k][1] = max(instancesCovered,k);
}
minSize = Double.MAX_VALUE;
revisedInstanceIndex = i;
revisedInstance = instancesCovered.remove(i);
for(int j = 0; j < trainInstances.size(); j++)
if(trainInstances.get(j).contains(revisedInstance))
{
for(IInstance inst : trainInstances.get(j))
{
instancesCovered.add(inst);
double APRAT[][] = new double[numberFeatures][2];
for(int k = 0; k < numberFeatures; k++)
{
APRAT[k][0] = min(instancesCovered,k);
APRAT[k][1] = max(instancesCovered,k);
}
double size;
if(bestFeatures.size() == 0)
size = size(APRAT);
else
size = size(APRAT,bestFeatures);
if(size < minSize)
{
minSize = size;
auxInstance = inst;
}
instancesCovered.remove(instancesCovered.size()-1);
}
break;
}
instancesCovered.add(revisedInstanceIndex, auxInstance);
}
boolean finished = true;
for(int i = 0; i < bagsCovered.length; i++)
if(bagsCovered[i] == false && trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
finished = false;
if(finished)
break;
}
for(int k = 0; k < numberFeatures; k++)
{
APR[k][0] = min(instancesCovered,k);
APR[k][1] = max(instancesCovered,k);
}
return APR;
}
@SuppressWarnings("unused")
private double[][] grow(int Class)
{
int bag = 0, instance = 0;
double APR[][] = new double[numberFeatures][2];
while(true)
{
double minSize = Double.MAX_VALUE;
for(int i = 0; i < trainInstances.size(); i++)
if(trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
for(int j = 0; j < trainInstances.get(i).size(); j++)
if(bagsCovered[i] == false)
{
instancesCovered.add(trainInstances.get(i).get(j));
double auxAPR[][] = new double[numberFeatures][2];
for(int k = 0; k < numberFeatures; k++)
{
auxAPR[k][0] = min(instancesCovered,k);
auxAPR[k][1] = max(instancesCovered,k);
}
double size;
if(bestFeatures.size() == 0)
size = size(auxAPR);
else
size = size(auxAPR,bestFeatures);
if(size < minSize)
{
minSize = size;
bag = i;
instance = j;
}
instancesCovered.remove(instancesCovered.size()-1);
}
instancesCovered.add(trainInstances.get(bag).get(instance));
bagsCovered[bag] = true;
boolean finished = true;
for(int i = 0; i < bagsCovered.length; i++)
if(bagsCovered[i] == false && trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
finished = false;
if(finished)
break;
}
for(int k = 0; k < numberFeatures; k++)
{
APR[k][0] = min(instancesCovered,k);
APR[k][1] = max(instancesCovered,k);
}
return APR;
}
private double size(double[][] rectangle)
{
double size = 0;
for(int i = 0; i < rectangle.length; i++)
size += rectangle[i][1] - rectangle[i][0];
return size;
}
private double size(double[][] rectangle, ArrayList<Integer> features)
{
double size = 0;
for(int i = 0; i < features.size(); i++)
size += rectangle[features.get(i)][1] - rectangle[features.get(i)][0];
return size;
}
private double distanceRectangle(double[] values, double[][] rectangle)
{
double distance = 0;
for(int i = 0; i < rectangle.length; i++)
distance += Math.abs(values[i] - (rectangle[i][1] - rectangle[i][0])/2);
return distance;
}
protected double minmax(int attribute, int Class)
{
double min = Double.MAX_VALUE;
for(int i = 0; i < trainInstances.size(); i++)
{
if(trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
{
double max = -Double.MAX_VALUE;
for(int j = 0; j < trainInstances.get(i).size(); j++)
if(trainInstances.get(i).get(j).getValue(attribute+1) > max)
max = trainInstances.get(i).get(j).getValue(attribute+1);
if(max < min)
min = max;
}
}
return min;
}
protected double maxmin(int attribute, int Class)
{
double max = -Double.MAX_VALUE;
for(int i = 0; i < trainInstances.size(); i++)
{
if(trainInstances.get(i).size() != 0 && trainInstances.get(i).get(0).getValue(classIndex) == Class)
{
double min = Double.MAX_VALUE;
for(int j = 0; j < trainInstances.get(i).size(); j++)
if(trainInstances.get(i).get(j).getValue(classIndex) == Class && trainInstances.get(i).get(j).getValue(attribute+1) < min)
min = trainInstances.get(i).get(j).getValue(attribute+1);
if(min > max)
max = min;
}
}
return max;
}
protected double min(ArrayList<IInstance> instancesCovered, int attribute)
{
double min = Double.MAX_VALUE;
for(int i = 0; i < instancesCovered.size(); i++)
if(instancesCovered.get(i).getValue(attribute+1) < min)
min = instancesCovered.get(i).getValue(attribute+1);
return min;
}
protected double max(ArrayList<IInstance> instancesCovered, int attribute)
{
double max = -Double.MAX_VALUE;
for(int i = 0; i < instancesCovered.size(); i++)
if(instancesCovered.get(i).getValue(attribute+1) > max)
max = instancesCovered.get(i).getValue(attribute+1);
return max;
}
private double densityEstimation()
{
double probability = tau + epsilon/2.0;
double[] normalDistribution = new double[]{0.5, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.758, 0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332, 0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893, 0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 0.999, 0.9993, 0.9995, 0.9997, 0.9998, 0.9998, 0.9999, 0.9999, 1};
for(int i = 0; i < normalDistribution.length-1; i++)
if(normalDistribution[i] <= probability && normalDistribution[i+1] > probability)
return -(i+1)/10.0;
return -3.3;
}
}