/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
*
* File: KStar.java
*
* The KStar Algorithm.
* A new instance based classifier, wich rather than using distance, defines
* a "transformation probability" from test instance to each train instance.
* It accumulates all the "transformation probabilities" and select the class
* wich has the most
*
* @author Written by Joaqu�n Derrac (University of Granada) 14/11/2008
* @version 1.0
* @since JDK1.5
*
*/
package keel.Algorithms.Lazy_Learning.KStar;
import keel.Algorithms.Lazy_Learning.LazyAlgorithm;
import java.util.*;
import org.core.*;
public class KStar extends LazyAlgorithm{
//Parameters
int selectionMethod;
double blendFactor;
//Adictional structures
double trainDistances [];
double classProb [];
ArrayList <Hashtable<Double,Double>> scaleTable;
//Constants
private static final double EPSILON = 1e-5;
private static final double ROOT_FINDER_ACCURACY= 0.01;
private static final int ROOT_FINDER_MAX_ITER=30;
private static final int RANDOM = 1;
private static final int FIXED = 2;
/**
* The main method of the class
*
* @param script Name of the configuration script
*
*/
public KStar (String script) {
readDataFiles(script);
//Naming the algorithm
name="KStar";
//Inicialization of auxiliar structures
classProb=new double[nClasses];
trainDistances=new double[trainData.length];
scaleTable=new ArrayList<Hashtable<Double, Double>>();
for(int i=0;i<trainData.length;i++){
scaleTable.add(new Hashtable<Double, Double>());
}
//Initialization of random generator
Randomize.setSeed(seed);
//Initialization stuff ends here. So, we can start time-counting
setInitialTime();
} //end-method
/**
* Reads configuration script, to extract the parameter's values.
*
* @param script Name of the configuration script
*
*/
protected void readParameters (String script) {
String file;
String line;
StringTokenizer fileLines, tokens;
file = Files.readFile (script);
fileLines = new StringTokenizer (file,"\n\r");
//Discard in/out files definition
fileLines.nextToken();
fileLines.nextToken();
fileLines.nextToken();
//Getting the seed
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
seed = Long.parseLong(tokens.nextToken().substring(1));
//Getting the selectionMethod
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
selectionMethod = tokens.nextToken().substring(1).equalsIgnoreCase("Random")?RANDOM:FIXED;
//Getting the blendFactor
line = fileLines.nextToken();
tokens = new StringTokenizer (line, "=");
tokens.nextToken();
blendFactor = Double.parseDouble(tokens.nextToken().substring(1));
}//end-method
/**
* Evaluates a instance to predict its class.
*
* @param example Instance evaluated
* @return Class predicted
*
*/
protected int evaluate (double example[]) {
int output;
double probability;
output=-1;
for(int i=0;i<classProb.length;i++){
classProb[i]=0.0;
}
probability=0.0;
//find class transformation probability distribution
for(int i=0;i<trainData.length;i++){
probability=calcTransProb(i,example);
classProb[trainOutput[i]]+=probability;
}
switch(selectionMethod){
case RANDOM:
output=findRandomOutput(example);
break;
case FIXED:
output=findFixedOutput(example);
break;
};
return output;
}
/**
* Evaluates a instance to predict its class.
* Selects the most probability class
*
* @param example Instance evaluated
* @return Class predicted
*
*/
private int findFixedOutput(double example[]){
int output=-1;
double max=Double.MIN_VALUE;
for(int i=0;i<classProb.length;i++){
if(max<classProb[i]){
max=classProb[i];
output=i;
}
}
return output;
}
/**
* Evaluates a instance to predict its class.
* Selects a random class, using before
* estimated probabilities.
*
* @param example Instance evaluated
* @return Class predicted
*
*/
private int findRandomOutput(double example[]){
int output=-1;
double sum;
double value;
boolean found=false;
sum=0.0;
//check the Prob Vector
for(int i=0;i<classProb.length;i++){
sum+=classProb[i];
}
//get a Random Value
value=Randomize.Randdouble(0.0,sum);
sum=0.0;
for(int i=0;i<classProb.length&&!found;i++){
sum+=classProb[i];
if(sum>value){
output=i;
found=true;
}
}
return output;
}
/**
* Estimates the "transformation probability" from the test instance
* to a train instance
*
* @param instance Index to train instance
* @param example Instance evaluated
* @return Transformation probability for the instance
*
*/
private double calcTransProb(int instance,double example[]){
double probability=0.0;
for(int i=0;i< example.length;i++){
probability+=calcAttTransProb(trainData[instance][i],example[i],i);
}
return probability;
}
/**
* Estimates the "transformation probability" from an attribute of the test instance
* to an attribute of a train instance
*
* @param instance Index to train instance
* @param example Instance evaluated
* @param feature Attribute to be evaluated
* @return Transformation probability for the attribute
*
*/
private double calcAttTransProb(double train,double test,int feature){
double probability=0.0;
double distance;
double scale;
//check if we had calculated that scale before
Hashtable<Double, Double> auxTable= scaleTable.get(feature);
if(auxTable.containsKey(test)){
scale=auxTable.get(test);
}
else{
scale=calcScale(train,test,feature);
scaleTable.get(feature).put(test, scale);
}
//calculate distance
distance = Math.abs( test - train );
//calculate probability
probability = PStar( distance, scale );
return probability;
}
/**
* The PStar distribution
*
* @param x In value
* @param scale Scale used
* @return Value of PStar distribution
*
*/
private double PStar(double x, double scale) {
double value;
value=scale * Math.exp( -2.0 * x * scale );
return value;
}
/**
* Calculates a scale value to select the apropiate number of
* instances to be measured by the PStar distance.
*
* @param train Value for train instance
* @param test Value for test instance
* @param feature Attribute selected
* @return Value of scale
*
*/
private double calcScale(double train,double test,int feature){
double nearest;
double lowest;
int lowestcount;
double scale;
double root;
double desiredInstances;
double bottomSphere;
double bottomRoot;
double upRoot;
double upSphere;
double actualSphere;
double zero;
double best;
boolean finish=false;
int iterations;
lowest=-1.0; //there are no lowest instance
nearest=-1.0; //there are no nearest instance
lowestcount=0;
scale=-1.0;
for (int i=0; i<trainData.length; i++) {
trainDistances[i] = Math.abs(trainData[i][feature] - test);
if ( (trainDistances[i]+EPSILON) < nearest || nearest == -1.0 ) {
if ( (trainDistances[i]+EPSILON) < lowest || lowest == -1.0 ) {
nearest = lowest;
lowest = trainDistances[i];
lowestcount = 1;
}else{
if ( Math.abs(trainDistances[i]-lowest) < EPSILON ) {
lowestcount++;
}
else{
nearest = trainDistances[i];
}
}
}
}
//check if data values are all the same
if (nearest == -1.0 || lowest == -1.0) { //
scale=1.0;
}
else{
//root finding algorithm
//initial root
root=1.0 /(nearest - lowest);
//final sphere size desired (blend % of no-lowest instances)
desiredInstances=lowestcount+((trainData.length-lowestcount)*blendFactor);
if (blendFactor == 0) {
desiredInstances += 1.0;
}
// root is bracketed in interval [bot,up]
bottomRoot = 0.0 + ROOT_FINDER_ACCURACY / 2.0;
upRoot = root * 16; // a great value
bottomSphere=calculateSphereSize(bottomRoot);
upSphere=calculateSphereSize(upRoot);
if (bottomSphere < 0) {
// Couldn't include that many
// instances - going for max possible
scale = bottomRoot;
}
if (upSphere > 0) {
// Couldn't include that few,
// going for min possible
scale=upRoot;
}
if(scale==-1.0){
scale=1.0;
//start of the iterative process
best=Double.MAX_VALUE;
iterations=0;
while(!finish){
actualSphere=calculateSphereSize(root);
zero=actualSphere-desiredInstances;
if ( Math.abs(zero) < best ) {
best = Math.abs(zero);
scale = root;
}
if ( Math.abs(zero) <= ROOT_FINDER_ACCURACY ) {
// the algorithm has converged to a solution!
finish=true;
}
if (zero > 0.0) {
bottomRoot = root;
root = (root + upRoot) / 2.0;
}
else{
upRoot = root;
root = (root + bottomRoot) / 2.0;
}
if (iterations > ROOT_FINDER_MAX_ITER) {
System.out.println("Warning: ROOT_FINDER_MAX_ITER exceeded");
}
}//end of the iterative process
}
}//end-root finding algorithm
return scale;
}
/**
* Calculates the sphereSize (number of instances affected)
* for a given scale.
*
* @param scale Scale used
* @return Sphere size
*
*/
private double calculateSphereSize(double scale) {
double sphereSize;
double pstar; // P*(b|a)
double pstarSum = 0.0; // sum(P*)
double pstarSquareSum = 0.0; // sum(P*^2)
double inc;
for (int i = 0; i < trainData.length; i++) {
pstar = PStar(trainDistances[i], scale );
inc = pstar;
pstarSum += inc;
pstarSquareSum += inc * inc;
}
if(pstarSquareSum!=0){
sphereSize = pstarSum * pstarSum / pstarSquareSum;
}
else{
sphereSize = 0.0;
}
return sphereSize;
}
} //end-class