/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.ImbalancedClassification.ImbalancedAlgorithms.GP_COACH_H;
import java.util.ArrayList;
import java.util.Collections;
import org.core.Randomize;
/**
* <p>Title: CHC </p>
*
* <p>Description: Uses a CHC algorithm to select the rules used in the GP-COACH-H algorithm </p>
*
* <p>Company: KEEL </p>
*
* @author Written by Victoria Lopez (University of Granada) 26/04/2011
* @version 1.5
* @since JDK1.5
*/
public class CHC {
private ArrayList <Rule> rule_population;
private myDataset dataset;
private GP_COACH_H gp_coach_obj;
private double raw_alpha;
private ArrayList <CHC_Chromosome> population;
private int max_eval;
private int n_eval;
private int pop_length;
private int bitsgene;
private int tuning_size;
private boolean has_low_granularity;
private boolean has_high_granularity;
private int nLabelsLow;
private int nLabelsHigh;
private double threshold;
private double best_fitness;
private int n_restart_not_improving;
/**
* Default constructor
*/
public CHC () {
}
/**
* Creates a CHC object with its parameters
*
* @param current_dataset Training dataset used in this algorithm
* @param gp_coach_data GP-COACH-H object that we will use to train an element in the training set with our rules
* @param alpha Alpha of the raw_fitness evaluation function
* @param pop Population of rules we want to select
* @param n_low Number of low granularity rules in the rule base
* @param n_high Number of high granularity rules in the rule base
* @param eval Maximum number of evaluations in the CHC algorithm
* @param popLength Size of the population in the CHC algorithm
* @param int bits_per_gene Bits per gene for the gray code associated to the real part of the CHC algorithm (lateral tuning)
*/
public CHC (myDataset current_dataset, GP_COACH_H gp_coach_data, double alpha, ArrayList <Rule> pop, int n_low, int n_high, int eval, int popLength, int bits_per_gene) {
dataset = current_dataset;
gp_coach_obj = gp_coach_data;
rule_population = pop;
max_eval = eval;
pop_length = popLength;
bitsgene = bits_per_gene;
raw_alpha = alpha;
population = new ArrayList <CHC_Chromosome> (pop_length);
best_fitness = -1.0;
nLabelsLow = n_low;
nLabelsHigh = n_high;
}
/**
* Run the CHC algorithm for the data in this population
*
* @return boolean array with the rules selected for the final population
*/
public void runCHC () {
ArrayList <CHC_Chromosome> C_population;
ArrayList <CHC_Chromosome> Cr_population;
boolean pop_changes;
n_eval = 0;
// Compute the number of fuzzy labels we have to tune
// First of all, we check the granularity for each rule
has_low_granularity = false;
has_high_granularity = false;
Rule current_rule;
int total_labels = 0;
int r;
for (r=0; ((r<rule_population.size()) && (!has_low_granularity || !has_high_granularity)); r++) {
current_rule = (Rule)rule_population.get(r);
if (current_rule.getGranularity() == nLabelsLow) {
has_low_granularity = true;
}
if (current_rule.getGranularity() == nLabelsHigh) {
has_high_granularity = true;
}
}
if (has_low_granularity) {
total_labels += nLabelsLow;
}
if (has_high_granularity) {
total_labels += nLabelsHigh;
}
tuning_size = total_labels * dataset.getnInputs();
threshold = (double)(tuning_size * bitsgene + rule_population.size())/4.0;
n_restart_not_improving = 0;
initPopulation();
evalPopulation();
do {
// Select for crossover
C_population = randomSelection();
// Cross selected individuals
Cr_population = recombine (C_population);
// Evaluate new population
evaluate (Cr_population);
// Select individuals for new population
pop_changes = selectNewPopulation (Cr_population);
// Check if we have improved or not
if (!pop_changes) {
threshold -= bitsgene;
}
// If we do not improve our current population for several trials, then we should restart the population
if (threshold < 0) {
//System.out.println("Restart!!");
restartPopulation();
threshold = (double)(tuning_size * bitsgene + rule_population.size())/4.0;
best_fitness = -1.0;
n_restart_not_improving++;
evalPopulation();
}
//System.out.println("CHC procedure: " + n_eval + " of " + max_eval + " evaluations. Best fitness is " + best_fitness);
} while ((n_eval < max_eval) && (best_fitness < 1.0) && (n_restart_not_improving <= 3));
// The evaluations have finished now, so we select the individual with best fitness
Collections.sort(population);
}
/**
* Creates several population individuals randomly. The first individual has all its values set to true
*/
private void initPopulation () {
CHC_Chromosome current_chromosome = new CHC_Chromosome (rule_population.size(), true, tuning_size, CHC_Chromosome.MIN_LATERAL_TUNING + ((double)(CHC_Chromosome.MAX_LATERAL_TUNING - CHC_Chromosome.MIN_LATERAL_TUNING)/2.0));
population.add(current_chromosome);
for (int i=1; i<pop_length; i++) {
current_chromosome = new CHC_Chromosome (rule_population.size(), tuning_size);
population.add(current_chromosome);
}
}
/**
* Evaluates the population individuals. If a chromosome was previously evaluated we do not evaluate it again
*/
private void evalPopulation () {
double ind_fitness;
for (int i = 0; i < pop_length; i++) {
if (population.get(i).not_eval()) {
population.get(i).evaluate(dataset, rule_population, gp_coach_obj, has_low_granularity, has_high_granularity, nLabelsLow, nLabelsHigh, raw_alpha);
n_eval++;
}
ind_fitness = population.get(i).getFitness();
if (ind_fitness > best_fitness) {
best_fitness = ind_fitness;
}
}
}
/**
* Selects all the members of the current population to a new population ArrayList in random order
*
* @return the current population in random order
*/
private ArrayList <CHC_Chromosome> randomSelection() {
ArrayList <CHC_Chromosome> C_population;
int [] order;
int pos, tmp;
C_population = new ArrayList <CHC_Chromosome> (pop_length);
order = new int[pop_length];
for (int i=0; i<pop_length; i++) {
order[i] = i;
}
for (int i=0; i<pop_length; i++) {
pos = Randomize.Randint(i, pop_length-1);
tmp = order[i];
order[i] = order[pos];
order[pos] = tmp;
}
for (int i=0; i<pop_length; i++) {
C_population.add(new CHC_Chromosome(((CHC_Chromosome)population.get(order[i]))));
}
return C_population;
}
/**
* Obtains the descendants of the given population by creating the most different descendant from parents which are different enough
*
* @param original_population Original parents used to create the descendants population
* @return Population of descendants of the given population
*/
private ArrayList <CHC_Chromosome> recombine (ArrayList <CHC_Chromosome> original_population) {
ArrayList <CHC_Chromosome> Cr_population;
int distHamming, n_descendants;
CHC_Chromosome main_parent, second_parent;
ArrayList <CHC_Chromosome> descendants;
n_descendants = pop_length;
if ((n_descendants%2)!=0)
n_descendants--;
Cr_population = new ArrayList <CHC_Chromosome> (n_descendants);
for (int i=0; i<n_descendants; i+=2) {
main_parent = (CHC_Chromosome)original_population.get(i);
second_parent = (CHC_Chromosome)original_population.get(i+1);
distHamming = main_parent.hammingDistance (second_parent, bitsgene);
if ((distHamming/2.0) > threshold) {
descendants = main_parent.createDescendants(second_parent);
Cr_population.add((CHC_Chromosome)descendants.get(0));
Cr_population.add((CHC_Chromosome)descendants.get(1));
}
}
return Cr_population;
}
/**
* Evaluates the given individuals. If a chromosome was previously evaluated we do not evaluate it again
*
* @param pop Population of individuals we want to evaluate
*/
private void evaluate (ArrayList <CHC_Chromosome> pop) {
for (int i = 0; i < pop.size(); i++) {
if (pop.get(i).not_eval()) {
pop.get(i).evaluate(dataset, rule_population, gp_coach_obj, has_low_granularity, has_high_granularity, nLabelsLow, nLabelsHigh, raw_alpha);
n_eval++;
}
}
}
/**
* Replaces the current population with the best individuals of the given population and the current population
*
* @param pop Population of new individuals we want to introduce in the current population
* @return true, if any element of the current population is changed with other element of the new population; false, otherwise
*/
private boolean selectNewPopulation (ArrayList <CHC_Chromosome> pop) {
double worst_old_population, best_new_population;
// First, we sort the old and the new population
Collections.sort(population);
Collections.sort(pop);
worst_old_population = ((CHC_Chromosome)population.get(population.size()-1)).getFitness();
if (pop.size() > 0) {
best_new_population = ((CHC_Chromosome)pop.get(0)).getFitness();
}
else {
best_new_population = 0.0;
}
if ((worst_old_population >= best_new_population) || (pop.size() <= 0)) {
return false;
}
else {
ArrayList <CHC_Chromosome> new_pop;
CHC_Chromosome current_chromosome;
int i = 0;
int i_pop = 0;
boolean copy_old_population = true;
double current_fitness;
boolean small_new_pop = false;
new_pop = new ArrayList <CHC_Chromosome> (pop_length);
// Copy the members of the old population better than the members of the new population
do {
current_chromosome = (CHC_Chromosome)population.get(i);
current_fitness = current_chromosome.getFitness();
if (current_fitness < best_new_population) {
// Check if we have enough members in the new population to create the final population
if ((pop_length-i) > pop.size()) {
new_pop.add(current_chromosome);
i++;
small_new_pop = true;
}
else {
copy_old_population = false;
}
}
else {
new_pop.add(current_chromosome);
i++;
}
} while ((i < pop_length) && (copy_old_population));
while (i < pop_length) {
current_chromosome = (CHC_Chromosome)pop.get(i_pop);
new_pop.add(current_chromosome);
i++;
i_pop++;
}
if (small_new_pop) {
Collections.sort(new_pop);
}
current_fitness = ((CHC_Chromosome)new_pop.get(0)).getFitness();
if (best_fitness < current_fitness) {
best_fitness = current_fitness;
n_restart_not_improving = 0;
}
population = new_pop;
return true;
}
}
/**
* Creates a new population using the CHC diverge procedure
*/
private void restartPopulation () {
ArrayList <CHC_Chromosome> new_pop;
CHC_Chromosome current_chromosome;
new_pop = new ArrayList <CHC_Chromosome> (pop_length);
Collections.sort(population);
current_chromosome = (CHC_Chromosome)population.get(0);
new_pop.add(current_chromosome);
for (int i=1; i<pop_length; i++) {
current_chromosome = new CHC_Chromosome (rule_population.size(), tuning_size);
new_pop.add(current_chromosome);
}
population = new_pop;
}
/**
* Obtains the best set of rules from the genetic rule selection process
*
* @return best set of rules
*/
public ArrayList <Rule> obtainNewRuleBase() {
boolean [] selected_rules;
double [] lateral_tuning;
CHC_Chromosome best_solution;
ArrayList <Rule> new_population;
double [][] low_granularity_tuning;
double [][] high_granularity_tuning;
int total_labels;
best_solution = (CHC_Chromosome)population.get(0);
selected_rules = best_solution.obtainSelectedRules();
lateral_tuning = best_solution.obtainLateralTuning();
if (selected_rules.length != rule_population.size()) {
System.err.println("The CHC procedure obtained a different rule base size than the original one");
System.exit(-1);
}
// Create the low granularity tuning matrix
total_labels = nLabelsLow + nLabelsHigh;
if (has_low_granularity) {
low_granularity_tuning = new double [dataset.getnInputs()][nLabelsLow];
if (has_high_granularity) {
// We also have high granularity labels
for (int i=0; i<dataset.getnInputs(); i++) {
for (int j=0; j<nLabelsLow; j++) {
low_granularity_tuning[i][j] = lateral_tuning[i*total_labels+j];
}
}
}
else {
// We only have low granularity labels
for (int i=0; i<dataset.getnInputs(); i++) {
for (int j=0; j<nLabelsLow; j++) {
low_granularity_tuning[i][j] = lateral_tuning[i*nLabelsLow+j];
}
}
}
}
else {
low_granularity_tuning = null;
}
// Create the high granularity tuning matrix
if (has_high_granularity) {
high_granularity_tuning = new double [dataset.getnInputs()][nLabelsHigh];
if (has_low_granularity) {
// We also have low granularity labels
for (int i=0; i<dataset.getnInputs(); i++) {
for (int j=0; j<nLabelsHigh; j++) {
high_granularity_tuning[i][j] = lateral_tuning[i*total_labels+nLabelsLow+j];
}
}
}
else {
// We only have high granularity labels
for (int i=0; i<dataset.getnInputs(); i++) {
for (int j=0; j<nLabelsHigh; j++) {
high_granularity_tuning[i][j] = lateral_tuning[i*nLabelsHigh+j];
}
}
}
}
else {
high_granularity_tuning = null;
}
new_population = new ArrayList <Rule> (rule_population.size());
for (int i=0; i<selected_rules.length; i++) {
if (selected_rules[i]) {
Rule aux_i = (Rule)rule_population.get(i);
Rule new_aux_i = new Rule(aux_i);
// Update the rule according to the chromosome selected
if (new_aux_i.getGranularity() == nLabelsLow) {
if (!has_low_granularity) {
System.err.println("We are selecting a low granularity rule when this rule base hasn't got any low granularity rules");
System.err.println("Rule{" + new_aux_i.getLevel() + "}: " + new_aux_i.printString(dataset.varNames(), dataset.classNames()));
System.exit(-1);
}
new_aux_i.updateFuzzyLabels(low_granularity_tuning, dataset, raw_alpha);
}
else if (new_aux_i.getGranularity() == nLabelsHigh) {
if (!has_high_granularity) {
System.err.println("We are selecting a high granularity rule when this rule base hasn't got any granularity rules");
System.err.println("Rule{" + new_aux_i.getLevel() + "}: " + new_aux_i.printString(dataset.varNames(), dataset.classNames()));
System.exit(-1);
}
new_aux_i.updateFuzzyLabels(high_granularity_tuning, dataset, raw_alpha);
}
else {
System.err.println("This rule has an unknown granularity not considered in this algorithm");
System.exit(-1);
}
new_population.add(new_aux_i);
}
}
return new_population;
}
/**
* Obtains the best lateral tuning from the genetic tuning process
*
* @return real matrix representing the best lateral tuning that needs to be applied to the data base
*/
public double [] obtainLateralTuning () {
double [] lateral_tuning;
CHC_Chromosome best_solution;
best_solution = (CHC_Chromosome)population.get(0);
lateral_tuning = best_solution.obtainLateralTuning();
return lateral_tuning;
}
}