/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
S. Garc�a (sglopez@ujaen.es)
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: ISW.java
*
* A implementation of a rough set based Instance Selection Wrapper class for EFS_RPS.
*
* @author Written by Joaqu�n Derrac (University of Granada) 13/11/2011
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.RST_Learning.EFS_RPS;
import java.util.Arrays;
public class ISW{
private static double data[][];
private static int FS[];
private static int featuresSelected;
private static int IS[];
private static int instancesSelected;
private static int instances;
private static int features;
private static int neighbors[];
private static int output[];
private static boolean nominal [];
private static double posRegions[];
private static int orderRegions[];
private static int implicator;
private static int tnorm;
private static double evaluations;
public static final int LUKASIEWICZ = 0;
public static final int KLEENE_DIENES = 1;
public static final int MIN = 2;
public static final int PRODUCT = 3;
public static void setImplicator(int val){
if (val==KLEENE_DIENES){
implicator=KLEENE_DIENES;
}
else{
implicator=LUKASIEWICZ;
}
}
public static void setTNorm(int val){
if (val==MIN){
tnorm=MIN;
}
else{
if(val==PRODUCT){
tnorm=PRODUCT;
}
else{
tnorm=LUKASIEWICZ;
}
}
}
/**
* Loads the training data into the classifier
*
* @param newData Data represented with continuous values
*/
public static void setData(double newData[][],boolean vecNominal []){
instances = newData.length;
features = newData[0].length;
data = new double [instances][features];
for(int i=0;i<instances;i++){
for(int j=0;j<features;j++){
data[i][j]=newData[i][j];
}
}
nominal= new boolean[features];
for(int i=0;i <features;i++){
nominal[i]=vecNominal[i];
}
FS = new int [features];
IS = new int [instances];
Arrays.fill(FS, 1);
featuresSelected=features;
Arrays.fill(IS, 1);
instancesSelected=instances;
}//end-method
/**
* Loads the training output into the classifier
*
* @param newOutput Output attribute of the training data
*/
public static void setOutput(int newOutput[]){
output=new int [data.length];
System.arraycopy(newOutput,0,output, 0, data.length);
}//end-method
public static boolean isAttribute(int att){
if(FS[att]==0){
return false;
}
return true;
}
public static void setAttribute(int att){
if(FS[att]==0){
FS[att]=1;
featuresSelected++;
}
}
public static void unsetAttribute(int att){
if(FS[att]==1){
FS[att]=0;
featuresSelected--;
}
}
public static void setAttributes(int [] atts){
featuresSelected=0;
for(int i=0;i<features;i++){
FS[i]=atts[i];
if(atts[i]==1){
featuresSelected++;
}
}
}
public static void setInstances(int [] ins){
instancesSelected=0;
for(int i=0; i< instances;i++){
IS[i]=ins[i];
if(ins[i]==1){
instancesSelected++;
}
}
}
public static int [] getAttributes(){
int newFS [];
newFS= new int [features];
for(int i=0;i<features;i++){
newFS[i]=FS[i];
}
return newFS;
}
public static int [] getInstances(){
int newIS [];
newIS= new int [instances];
for(int i=0;i<instances;i++){
newIS[i]=IS[i];
}
return newIS;
}
public static void clearAttributes(){
featuresSelected=0;
Arrays.fill(FS, 0);
}
public static void clearInstances(){
instancesSelected=0;
Arrays.fill(IS, 0);
}
public static int getnFeatures(){
return featuresSelected;
}
public static int getnInstances(){
return instancesSelected;
}
public static void setAllInstances(){
Arrays.fill(IS, 1);
}
public static void setAllAttributes(){
Arrays.fill(FS, 1);
}
private static void sortRegionsDec(){
double values[];
orderRegions=new int [posRegions.length];
values=new double [posRegions.length];
System.arraycopy(posRegions, 0, values, 0, values.length);
for(int i=0;i<posRegions.length;i++){
orderRegions[i]=i;
}
double auxV;
int auxI;
for(int i=0;i<values.length;i++){
for(int j=i+1;j<values.length;j++){
if(values[i]<values[j]){
auxV=values[i];
values[i]=values[j];
values[j]=auxV;
auxI=orderRegions[i];
orderRegions[i]=orderRegions[j];
orderRegions[j]=auxI;
}
}
}
}
private static void searchNeighbor(int index){
double dist,minD;
minD=Double.MAX_VALUE;
for(int i=0;i<instances;i++){
if(IS[i]==1){
if(index!=i){
dist = euclideanDistance(index,i);
if (dist < minD) {
minD = dist;
neighbors[index]=i;
}
}
}
}
evaluations+=1.0/instances;
}
private static void computeNeighbors(){
neighbors=new int [instances];
for(int index=0;index<instances;index++){
searchNeighbor(index);
}
}
private static double computeAcc(){
double hits;
hits=0.0;
for(int i=0;i<instances;i++){
if(output[i]==output[neighbors[i]]){
hits+=1.0;
}
}
return hits/(double)instances;
}
public static double computeISW(){
double acc,bestAcc;
int bestInstances [];
evaluations=0.0;
//sort instances according to pos region (based on all features)
getPosregionsInstances();
//generate an order for instances (decremental)
sortRegionsDec();
/* After this, there is an order defined on the instances */
bestInstances=new int [instances];
Arrays.fill(IS, 1);
Arrays.fill(bestInstances, 1);
instancesSelected=instances;
// First: all instances
computeNeighbors();
bestAcc = computeAcc();
//iterate through all instances
for(int i=0;i<instances;i++){
//remove instances
IS[orderRegions[i]]=0;
//remove more if they have the same value)
while(i+1<orderRegions.length && posRegions[orderRegions[i]]==posRegions[orderRegions[i+1]]){
i++;
IS[orderRegions[i]]=0;
}
//we need more than 0 instances!!
if(i!=instances-1){
//update neighbors table
for(int x=0;x<instances;x++){
if(IS[neighbors[x]]==0){
searchNeighbor(x);
}
}
//compute accuracy
acc = computeAcc();
if(acc>=bestAcc){
bestAcc = acc;
System.arraycopy(IS, 0, bestInstances, 0,instances);
}
}
}
System.arraycopy(bestInstances, 0, IS, 0,instances);
instancesSelected=0;
for(int i=0;i<instances;i++){
if(IS[i]==1){
instancesSelected++;
}
}
return evaluations;
}
//calculate the positive region of the features seperately.
private static void getPosregionsInstances() {
double min,impl;
posRegions = new double[instances];
for(int i=0;i<instances;i++){
min = Double.MAX_VALUE;
for(int y=0;y<data.length;y++){
if(output[i]==output[y]){
impl = calcimpl(similarity(i,y),1);
}
else{
impl = calcimpl(similarity(i,y),0);
}
if(impl<min){
min = impl;
}
}
posRegions[i] = min;
}
}
private static double similarity(int x, int y){
double [] similarity= new double[features];
double sim;
double dist;
for(int i=0;i<features;i++){
if(FS[i]==0){
similarity[i]=1.0;
}
else{
if(nominal[i]){
if(data[x][i]==data[y][i]){
dist = 0;
}
else{
dist = 1;
}
}
else{
dist =(data[x][i]-data[y][i]);
dist=dist*dist;
}
similarity[i]=1.0-dist;
}
}
sim=calctnorm(similarity);
return sim;
}
private static double calctnorm(double[] args){
double tnormd;
if(args.length==1){
return args[0];
}else{
tnormd = calctnorm(args[0],args[1]);
for(int i=2;i<args.length;i++){
tnormd = calctnorm(args[i],tnormd);
}
return tnormd;
}
}
private static double calctnorm(double a, double b){
if(tnorm==MIN){
return Math.min(a,b);
}
else if(tnorm==PRODUCT){
return a*b;
}
else if(tnorm==LUKASIEWICZ){
return Math.max(0,a+b-1);
}
else
return 0;
}
private static double calcimpl(double a, double b){
if(implicator==LUKASIEWICZ){
return Math.min(1.0,1.0-a+b);
}
else if(implicator==KLEENE_DIENES){
return Math.max(1-a, b);
}
return 0;
}
/**
* Euclidean instance between two training instances
*
* @param a First instance
* @param b Second instance
*
* @return Unsquared euclidean distance
*/
private static double euclideanDistance(int a,int b){
double length=0.0;
double value;
for (int i=0; i<data[b].length; i++) {
if(FS[i]==1){
value = data[a][i]-data[b][i];
length += value*value;
}
}
return length;
}//end-method
} //end-class