/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Genetic_Rule_Learning.olexGA;
import itk.exeura.learner.engine.basic.Configuration;
import itk.exeura.learner.engine.geneticAlgorithm.GARepAlgoTypes;
import itk.exeura.learner.engine.geneticAlgorithm.GASelectionAlgoTypes;
import itk.exeura.learner.engine.geneticAlgorithm.LearnerParameterSet;
import itk.exeura.learner.wrapper.core.OlexGAparameters;
import itk.exeura.learner.wrapper.core.SFManager;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.core.Files;
/**
*
* @author Adriana Pietramala
*
*/
public class OlexGA {
private static final long serialVersionUID = 8647630661819994437L;
/***********************************************/
/*** START LIST OF AVAILABLE OLEX-GA OPTIONS ***/
/***********************************************/
public static final String SCORING_FUNCTION = "scoringFunction";
public static final String POSITIVE_TERMS_SIZE = "numOfFeatures";
public static final String XOVER_METHOD = "Xover";
public static final String XOVER_RATE = "XOverRate";
public static final String MUTATION_RATE = "mutationRate";
public static final String SELECTION_ALGO = "selectionAlgorithm";
public static final String ELITISM_PROPORTION = "elitismRate";
public static final String POPULATION_SIZE = "populationSize";
public static final String GENERATIONS = "numOfGenerations";
public static final String ATTEMPTS = "numOfRuns";
public static final String LEARNED_CATEGORY_INDEX = "classIndex";
/***********************************************/
/**** END LIST OF AVAILABLE OLEX-GA OPTIONS ****/
/***********************************************/
private final int numOfOptions = 11;
private List<Configuration> configurations = null;
protected OlexGAparameters olexGAParams = null;
protected Dataset m_Instances = null;
ParametersParser pp = null; // new ParametersParser();
/**
*
*/
public OlexGA() {
String dummy = "";
pp = new ParametersParser();
try {
// setOptions(dummy);
} catch (Exception e) {
e.printStackTrace();
}
}
public OlexGA(String configurationFile) {
this();
try {
this.setOptions(configurationFile);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Builds the model
*
* @param inputFile Instances
*
*/
public void buildClassifier(String inputFile) throws Exception {
this.m_Instances = new Dataset(inputFile, true);
if (DatasetChecker.testWithFail(m_Instances))
configurations = WrapperManager.doLearning(m_Instances);
else
throw new Exception("Dataset Check failed");
}
public void setOptions(String configurationFile) throws Exception {
String tmpStr;
pp.parseConfigurationFile(configurationFile);
// Scoring function
setScoringFunction(pp.getParameterValue(SCORING_FUNCTION));
// Crossover method
setXOver(pp.getParameterValue(XOVER_METHOD));
// Positive terms size
setNumOfFeatures(pp.getParameterValue(POSITIVE_TERMS_SIZE));
// Crossover rate
setXOverRate(pp.getParameterValue(XOVER_RATE));
// Mutation rate
setMutationRate(pp.getParameterValue(MUTATION_RATE));
// Selection algorithm
setSelectionAlgorithm(pp.getParameterValue(SELECTION_ALGO));
// Population size
setPopulationSize(pp.getParameterValue(POPULATION_SIZE));
// Generations
setNumOfGenerations(pp.getParameterValue(GENERATIONS));
// Attempts
setNumOfRuns(pp.getParameterValue(ATTEMPTS));
// Elite proportion
setElitismRate(pp.getParameterValue(ELITISM_PROPORTION));
// setClassIndex(pp.getParameterValue(LEARNED_CATEGORY_INDEX));
}
public String[] getOptions() {
String[] options = new String[2 * numOfOptions];
int current = 0;
options[current++] = "-" + SCORING_FUNCTION;
options[current++] = "" + OlexGAparameters.SCORING_FUNCTION;
options[current++] = "-" + POSITIVE_TERMS_SIZE;
options[current++] = "" + OlexGAparameters.POSITIVE_TERMS_SIZE;
options[current++] = "-" + XOVER_METHOD;
options[current++] = "" + OlexGAparameters.XOVER_METHOD;
options[current++] = "-" + XOVER_RATE;
options[current++] = "" + OlexGAparameters.XOVER_RATE;
options[current++] = "-" + MUTATION_RATE;
options[current++] = "" + OlexGAparameters.MUTATION_RATE;
options[current++] = "-" + SELECTION_ALGO;
options[current++] = "" + OlexGAparameters.SELECTION_ALGORITHM;
options[current++] = "-" + POPULATION_SIZE;
options[current++] = "" + OlexGAparameters.POP_SIZE;
options[current++] = "-" + GENERATIONS;
options[current++] = "" + OlexGAparameters.GENERATIONS;
options[current++] = "-" + ATTEMPTS;
options[current++] = "" + OlexGAparameters.ATTEMPTS;
options[current++] = "-" + ELITISM_PROPORTION;
options[current++] = "" + OlexGAparameters.ELITE_PROPORTION;
options[current++] = "-" + LEARNED_CATEGORY_INDEX;
options[current++] = "" + OlexGAparameters.LEARNED_CLASS_VALUE_INDEX;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
public String getTechnicalInformation() {
String result;
result = new String("REFERENCES:");
result += "TITLE: A Genetic Algorithm for Text Classification Rule Induction";
result += "\nAUTHORS: A. Pietramala, Veronica L. Policicchio, P. Rullo, I. Sidhu";
result += "\nIN PROCEEDINGS of "
+ "LNAI - Machine Learning and Knowledge Discovery in Databases - Part II";
result += "\nYEAR: 2008";
result += "\nPAGES: 188-203";
result += "WEB SITE:\n" + "http://www.unical.it/Olex-GA/";
return result;
}
public String toString() {
return getTechnicalInformation();
}
public void setNumOfFeatures(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int num;
try {
num = Integer.parseInt(tmpStr);
} catch (NumberFormatException e) {
System.out
.println("ga reprod type not found --> using 50 instead");
num = 50;
}
OlexGAparameters.POSITIVE_TERMS_SIZE = num;
}
}
public int getNumOfFeatures() {
return OlexGAparameters.POSITIVE_TERMS_SIZE;
}
public void setScoringFunction(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int id = SFManager.getScoringFunctionInternalIndex(tmpStr);
if (id == -1) {
id = 0;
}
OlexGAparameters.SCORING_FUNCTION = id;
}
}
public String getScoringFunction() {
return SFManager.getScoringFunctions()[OlexGAparameters.SCORING_FUNCTION];
}
public void setXOver(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int id = GARepAlgoTypes.getXoverIndex(tmpStr);
if (id == -1) {
id = 0;
System.out.println("ga reprod type not found --> using " + id
+ " instead");
}
OlexGAparameters.XOVER_METHOD = id;
}
}
public String getXOver() {
return GARepAlgoTypes.getRepAlgoritms()[OlexGAparameters.XOVER_METHOD];
}
public void setSelectionAlgorithm(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int id = GASelectionAlgoTypes.getSelectAlgoIndex(tmpStr);
if (id == -1) {
System.out.println("ga selction type not found --> using 0");
id = 0;
}
OlexGAparameters.SELECTION_ALGORITHM = id;
}
}
public String getSelectionAlgorithm() {
return GASelectionAlgoTypes.getSelectionAlgorithmTypes()[OlexGAparameters.SELECTION_ALGORITHM];
}
// public void setClassIndex(String tmpStr) {
// if (tmpStr != null && tmpStr != "") {
// int id = Integer.parseInt(tmpStr);
// if (id == -1) {
// id = 1;
// System.out.println("ga reprod type not found --> using " + id
// + " instead");
//
// }
// OlexGAparameters.LEARNED_CLASS_VALUE_INDEX = id;
// }
//
// }
//
// public int getClassIndex() {
// return OlexGAparameters.LEARNED_CLASS_VALUE_INDEX;
// }
public void setXOverRate(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
double num;
try {
num = Double.parseDouble(tmpStr);
} catch (NumberFormatException e) {
System.out
.println("ga xover rate not found --> using 1.0 instead");
num = 1.0;
}
OlexGAparameters.XOVER_RATE = num;
}
LearnerParameterSet.xOverRate = OlexGAparameters.XOVER_RATE;
}
public double getXOverRate() {
return OlexGAparameters.XOVER_RATE;
}
public void setMutationRate(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
double num;
try {
num = Double.parseDouble(tmpStr);
} catch (NumberFormatException e) {
num = 1.0;
System.out.println("ga xover rate not found --> using " + num
+ " instead");
}
OlexGAparameters.MUTATION_RATE = num;
}
LearnerParameterSet.mutationRate = OlexGAparameters.MUTATION_RATE;
}
public double getMutationRate() {
return OlexGAparameters.MUTATION_RATE;
}
public void setElitismRate(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
double num;
try {
num = Double.parseDouble(tmpStr);
} catch (NumberFormatException e) {
num = 0.001;
System.out.println("ga elitism rate not found --> using " + num
+ " instead");
}
OlexGAparameters.ELITE_PROPORTION = num;
}
}
public double getElitismRate() {
return OlexGAparameters.ELITE_PROPORTION;
}
public void setPopulationSize(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int num;
try {
num = Integer.parseInt(tmpStr);
} catch (NumberFormatException e) {
num = 500;
System.out.println("ga pop size not found --> using " + num
+ " instead");
}
OlexGAparameters.POP_SIZE = num;
}
}
public int getPopulationSize() {
return OlexGAparameters.POP_SIZE;
}
public void setNumOfGenerations(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int num;
try {
num = Integer.parseInt(tmpStr);
} catch (NumberFormatException e) {
num = 200;
System.out.println("ga pop size not found --> using " + num
+ " instead");
}
OlexGAparameters.GENERATIONS = num;
}
}
public int getNumOfGenerations() {
return OlexGAparameters.GENERATIONS;
}
public void setNumOfRuns(String tmpStr) {
if (tmpStr != null && tmpStr != "") {
int num;
try {
num = Integer.parseInt(tmpStr);
} catch (NumberFormatException e) {
num = 1;
System.out.println("ga pop size not found --> using " + num
+ " instead");
}
OlexGAparameters.ATTEMPTS = num;
}
}
public int getNumOfRuns() {
return OlexGAparameters.ATTEMPTS;
}
public List<OlexResult>/* HashMap<String, String> *//* String */classify(
Dataset dataset) {
List<OlexResult> res = new LinkedList<OlexResult>();
// HashMap<String, String> classRes = new HashMap<String, String>();
StringBuffer classRes = new StringBuffer();
OlexGA_Attribute classAt = dataset.getClassAttribute();
String category = dataset.getClassAttribute().value(
OlexGAparameters.LEARNED_CLASS_VALUE_INDEX);
int complIndex = WrapperManager
.computesComplementaryIndex(OlexGAparameters.LEARNED_CLASS_VALUE_INDEX);
String complementary = dataset.getClassAttribute().value(complIndex);
for (int j = 0; j < dataset.numItemsets(); j++) {
Itemset inst = dataset.itemset(j);
String classAs = "";
double d = WrapperManager.doValidation(inst, configurations
.get(OlexGAparameters.LEARNED_CLASS_VALUE_INDEX), category);
if (d != 0.0) {
classAs = category;
} else {
classAs = complementary;
}
OlexResult n = new OlexResult(category, classAt.value((int) inst
.getClassValue()), classAs);
res.add(n);
// classRes.put(classAt.value((int) inst.getClassValue()), classAs);
classRes.append(classAt.value((int) inst.getClassValue()) + "\t"
+ classAs + "\n");
}
// return classRes;
// return classRes.toString();
return res;
}
public String populationSizeTipText() {
return "Number of chromosomes or individuals";
}
public String xOverRateTipText() {
return "The chance that two chromosomes will swap their bits.";
}
public String xOverMethodTipText() {
return "The method used for individuals reproduction.";
}
public String elitismProportionTipText() {
return "The percentage of the best chromosomes to be copied into the new population.";
}
public String generationsTipText() {
return "Number of times the process of going from the current population to the next on is executed.";
}
public String attemptsTipText() {
return "Number of times the genetic algorithm is executed.";
}
public String selectionAlgorithmTipText() {
return "The method of choosing members from the population of chromosomes.";
}
public String scoringFunctionTipText() {
return "If featureSelection is FALSE, the scoring function that will be used to perform feature selection."
+ "If featureSelection is TRUE, the scoring function that you have used to perform feature selection.";
}
public String initializationTypeTipText() {
return "The method used to initialize the population.";
}
public String performFeatureSelectionTipText() {
return "TRUE if you already perform feature selection, FALSE otherwise.";
}
public String mutationRateTipText() {
return "The chance that a bit within a chromosome will be flipped.";
}
public String maxPositiveTermsSizeTipText() {
return "The max size of the subset of terms of the given vocabulary used as candidate positive.";
}
private String computeMeasures(List<OlexResult> trainClass) {
int TP = 0, TN = 0, FN = 0, FP = 0;
double P, R, F;
StringBuffer string = new StringBuffer();
for (Iterator iterator = trainClass.iterator(); iterator.hasNext();) {
OlexResult olexResult = (OlexResult) iterator.next();
String expected = olexResult.getExpected();
String predicted = olexResult.getPredicted();
String classLearned = olexResult.getClassLearned();
if (predicted.equalsIgnoreCase(classLearned)) {
if (predicted.equalsIgnoreCase(expected)) {
TP++;
} else {
FP++;
}
} else {
if (predicted.equalsIgnoreCase(expected)) {
TN++;
} else {
FN++;
}
}
}
if (TP == 0) {
P = R = F = 0;
} else {
P = (double) TP / (TP + FP);
R = (double) TP / (TP + FN);
F = 2 * P * R / (P + R);
}
string.append("TP=" + TP + "\n");
string.append("FP=" + FP + "\n");
string.append("FN=" + FN + "\n");
string.append("TN=" + TN + "\n");
string.append("P=" + P + "\n");
string.append("R=" + R + "\n");
string.append("F=" + F + "\n");
return string.toString();
}
public static void main(String args[]) throws Exception {
System.out.println("ciaoooo o");
String configurationFile = args[0].substring(args[0].lastIndexOf("./"));
System.out.println("conf file " + configurationFile);
OlexGA olex = new OlexGA(configurationFile);
try {
olex.buildClassifier(olex.pp.getTrainingInputFile());
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Learning finito...ora bisogna classificare");
Dataset dataset = new Dataset(olex.pp.getTrainingInputFile(), false);
List<OlexResult> trainClass = olex.classify(dataset);
String string = dataset.copyHeader() + olex.toString(trainClass);
Configuration configuration = olex.configurations
.get(OlexGAparameters.LEARNED_CLASS_VALUE_INDEX);
String details = "***** Measures on Training Set ***** \nF-measure: "
+ configuration.getFmeasure();
String additionalInfo = olex.computeMeasures(trainClass);
String info = details + "\n" + additionalInfo;
info += "\nLEARN TIME: " + configuration.getLearning_time() + " sec.";
Files.writeFile(olex.pp.getTrainingOutputFile(), string);
dataset = new Dataset(olex.pp.getTestInputFile(), false);
long startClassificationtime = System.currentTimeMillis();
List<OlexResult> testClass = olex.classify(dataset);
long final_ct = (long) Math
.ceil(((double) System.currentTimeMillis() - startClassificationtime) / 1000);
info += "\n***** Measures on Test Set ***** \n"
+ olex.computeMeasures(testClass);
info += "\nVALIDATION TIME: " + final_ct + " sec.";
String string2 = dataset.copyHeader() + olex.toString(testClass);
Files.writeFile(olex.pp.getTestOutputFile(), string2);
System.out.println("scrittura file di output");
Files.writeFile(olex.pp.getOutputFile(0), info);
}
public static void testClassification(String testfile, Configuration conf) throws Exception {
System.out.println("ciaoooo o");
OlexGA olex = new OlexGA();
Dataset dataset = new Dataset(testfile, false);
long startClassificationtime = System.currentTimeMillis();
List<OlexResult> testClass = olex.classify(dataset);
long final_ct = (long) Math
.ceil(((double) System.currentTimeMillis() - startClassificationtime) / 1000);
String info = "\n***** Measures on Test Set ***** \n"
+ olex.computeMeasures(testClass);
info += "\nVALIDATION TIME: " + final_ct + " sec.";
// String string2 = dataset.copyHeader() + olex.toString(testClass);
// Files.writeFile(olex.pp.getTestOutputFile(), string2);
// System.out.println("scrittura file di output");
// Files.writeFile(olex.pp.getOutputFile(0), info);
System.out.println(info);
}
private String toString(List<OlexResult> trainClass) {
StringBuffer string = new StringBuffer();
// string.append("EXPECTED " + "\t" + "PREDICTED" + "\n");
for (Iterator iterator = trainClass.iterator(); iterator.hasNext();) {
OlexResult olexResult = (OlexResult) iterator.next();
String expected = olexResult.getExpected();
String predicted = olexResult.getPredicted();
string.append(expected + " " + predicted + "\n");
}
return string.toString();
}
}