/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Genetic_Rule_Learning.olexGA;
import itk.exeura.learner.engine.basic.Category;
import itk.exeura.learner.engine.basic.Configuration;
import itk.exeura.learner.engine.basic.DiscriminativeTerm;
import itk.exeura.learner.engine.basic.DocumentSet;
import itk.exeura.learner.engine.basic.TermConjunction;
import itk.exeura.learner.engine.control.DTSelectionLoop;
import itk.exeura.learner.engine.exception.RunExperimentException;
import itk.exeura.learner.engine.optimization.ExperimentConfiguration;
import itk.exeura.learner.engine.termSelection.VocabularyReducer;
import itk.exeura.learner.wrapper.core.OlexGAparameters;
import itk.exeura.learner.wrapper.core.SFManager;
import itk.exeura.learner.wrapper.core.ScoringFunctionTermsComparator;
import java.util.Collections;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.Vector;
/**
*
* @author Adriana Pietramala
*
*/
public class WrapperManager {
private static HashMap<String, HashMap<String, Boolean>> catPosTerms = null;
private static HashMap<String, HashMap<String, Boolean>> catNegTerms = null;
private static List<Integer> trainingSetDocumentsID = null;
private static Vector<Category> categories = null;
public static Vector<Category> getCategories() {
return categories;
}
public static void setCategories(Vector<Category> categories) {
WrapperManager.categories = categories;
}
public static List<Integer> getTrainingSetDocumentsID() {
return trainingSetDocumentsID;
}
public static void setTrainingSetDocumentsID(
List<Integer> trainingSetDocumentsID) {
WrapperManager.trainingSetDocumentsID = trainingSetDocumentsID;
}
public static Hashtable<String, Vector<Integer>> computesTCs(
Dataset trainingSet) {
OlexGA_Attribute classAttribute = trainingSet.getClassAttribute();
Hashtable<String, Vector<Integer>> TCs = new Hashtable<String, Vector<Integer>>();
trainingSetDocumentsID = new LinkedList<Integer>();
for (int i = 0; i < trainingSet.numItemsets(); i++) {
// memorizza l'istanza corrente
Itemset inst = trainingSet.itemset(i);
// memorizza quanto vale l'indice della classe per questa
// istanza
// System.out.println("Controlla --> double, int che �?????");
int classAttrV = (int) inst.getClassValue(); // inst.value(classAttribute);
// aggiungo l'istanza al training set
trainingSetDocumentsID.add(i);
Vector<Integer> classAttributeDocuments = null;
if ((classAttributeDocuments = TCs.get(classAttribute
.value(classAttrV))) == null) {
classAttributeDocuments = new Vector<Integer>();
TCs.put(classAttribute.value(classAttrV),
classAttributeDocuments);
}
// aggiungo il documento al vettore
classAttributeDocuments.add(i);
}
return TCs;
}
public static List<Configuration> doLearning(Dataset trainingSet)
throws Exception {
catPosTerms = null;
catNegTerms = null;
// Step1: builds the set of categories
OlexGA_Attribute classAttribute = trainingSet.getClassAttribute();
categories = new Vector<Category>();
for (int i = 0; i < classAttribute.numValues(); i++) {
String className = classAttribute.value(i);
Category cat = new Category();
cat.setCategoryName(className);
cat.setCategoryNumber(i);
categories.add(cat);
}
// Step2: builds TCs <category, set of category documents>
Hashtable<String, Vector<Integer>> TCs = WrapperManager
.computesTCs(trainingSet);
setLearnedCategory(trainingSet, TCs);
Hashtable<String, Vector<Integer>> ACs = new Hashtable<String, Vector<Integer>>();
// Step3: builds the vocabulary of each category
VocabularyReducer.vocabularies = WrapperManager.computesVocabulary(
trainingSet, OlexGAparameters.SCORING_FUNCTION,
OlexGAparameters.POSITIVE_TERMS_SIZE,
OlexGAparameters.LEARNED_CLASS_VALUE_INDEX);
ExperimentConfiguration expConf = new ExperimentConfiguration(
WrapperManager.getTrainingSetDocumentsID(), TCs, ACs);
// Stores the learning results
List<Configuration> configurations = null;
// Step4: esecuzione fase di learning
try {
configurations = DTSelectionLoop.loop(expConf, WrapperManager
.getCategories(), OlexGAparameters.FMEASURE,
OlexGAparameters.MAX_CONJ_LENGHT,
OlexGAparameters.INITIALIZATION_TYPE,
OlexGAparameters.SELECTION_ALGORITHM,
OlexGAparameters.NEGATIVE_TERMS_SIZE,
OlexGAparameters.POP_SIZE, OlexGAparameters.GENERATIONS,
OlexGAparameters.ATTEMPTS, OlexGAparameters.XOVER_METHOD,
OlexGAparameters.LEARNED_CLASS_VALUE_INDEX);
} catch (RunExperimentException e) {
e.printStackTrace();
}
VocabularyReducer.vocabularies = null;
return configurations;
}
private static void setLearnedCategory(Dataset inst,
Hashtable<String, Vector<Integer>> tCs) {
String learningFor = "";
int min = Integer.MAX_VALUE;
for (Iterator<String> it = tCs.keySet().iterator(); it.hasNext(); ) {
String cat = it.next();
if (tCs.get(cat).size() < min) {
learningFor = cat;
min = tCs.get(cat).size();
}
}
OlexGAparameters.LEARNED_CLASS_VALUE_INDEX = 1;//inst.getClassAttribute().valueIndex(learningFor);
System.out.println("Learning for category: INDEX " + OlexGAparameters.LEARNED_CLASS_VALUE_INDEX + " LABEL " + learningFor);
}
public static long timeConverter(long startTime, long stopTime) {
return Math.round(((double) (stopTime - startTime)) / 1000);
}
public static int computesComplementaryIndex(int classForIRStatistics) {
assert (categories.size() == 2);
return (classForIRStatistics == 1 ? 0 : 1);
}
public static Hashtable<String, List<DiscriminativeTerm>> computesVocabulary(
Dataset inst, int functionType, int numTerms,
int classForIRStatistics) {
Hashtable<String, List<DiscriminativeTerm>> vocabularies = new Hashtable<String, List<DiscriminativeTerm>>();
Hashtable<String, List<Integer>> termsDocumentSet = new Hashtable<String, List<Integer>>();
// memorizza l'attributo di classe
OlexGA_Attribute classAttribute = inst.getClassAttribute();
// costruisce il termsDocumentSet
vocabularies = computesTermsDocuments(inst, functionType,
classAttribute, termsDocumentSet, classForIRStatistics,
computesComplementaryIndex(classForIRStatistics));
return vocabularies;
}
private static Hashtable<String, List<DiscriminativeTerm>> computesTermsDocuments(
Dataset dataset, int functionType, OlexGA_Attribute classAttribute,
Hashtable<String, List<Integer>> termsDocumentSet,
int classForIRStatistics, int complementClassForIRStatistics) {
Hashtable<String, List<DiscriminativeTerm>> vocabularies = new Hashtable<String, List<DiscriminativeTerm>>();
// preparo la struttura dati che memorizza il vocabolario
for (int i = 0; i < classAttribute.numValues(); i++) {
String categoryName = classAttribute.value(i);
vocabularies
.put(categoryName, new LinkedList<DiscriminativeTerm>());
}
for (int i = 0; i < dataset.numAttributes(); i++) {
OlexGA_Attribute at = dataset.getAttribute(i);
if (at.equals(classAttribute))
continue;
int A = 0;
int B = 0;
int C = 0;
int D = 0;
DiscriminativeTerm dt = new DiscriminativeTerm(at.name(), i + 1);
dt.setDocumentSet(new DocumentSet(dataset.numItemsets()));
for (int j = 0; j < dataset.numItemsets(); j++) {
Itemset inst = dataset.itemset(j);
double termAttribute = inst.getValue(i);
int classAttrValue = ((int) inst.getClassValue());
// salta l'istanza se l'attributo � missing
if (inst.isMissing(i)) {
continue;
}
// sto considerando una istanza della categoria di classe
if (classAttrValue == classForIRStatistics) {
// il termine appare nella istanza della categoria di
// classe
if (termAttribute > 0.0) { //was != 0
A++;
dt.getDocumentSet().addElement(j);
} else
C++;
}
// sto considerando un'istanza che non appartiene alla
// categoria di classe
else {
// il termine appare nell'istanza complementare
if (termAttribute > 0.0) { //WAS !=0
B++;
dt.getDocumentSet().addElement(j);
} else
D++;
}
}
if (A != 0) {
dt.setScoreValue(SFManager.computesFunctionValue(A, B, C, D, (A
+ B + C + D), functionType));
List<DiscriminativeTerm> terms1 = null;
if ((terms1 = vocabularies.get(classAttribute
.value(classForIRStatistics))) != null) {
terms1.add(dt);
} else {
System.err.println("A computation: Empty list of terms");
}
}
if (B != 0) {
List<DiscriminativeTerm> terms1 = null;
if ((terms1 = vocabularies.get(classAttribute
.value(complementClassForIRStatistics))) != null) {
terms1.add(dt);
} else {
System.err.println("B computation: Empty list of terms");
}
}
}
List<DiscriminativeTerm> terms = vocabularies.get(classAttribute
.value(classForIRStatistics));
Collections.sort(terms, new ScoringFunctionTermsComparator());
return vocabularies;
}
public static double doValidation(Itemset inst, Configuration conf,
String className) {
double belongs = 0;
if ((catPosTerms == null) && (catNegTerms == null)) {
catPosTerms = new HashMap<String, HashMap<String, Boolean>>();
catNegTerms = new HashMap<String, HashMap<String, Boolean>>();
}
HashMap<String, Boolean> pTerms = catPosTerms.get(className);
// Only for fast access
if (pTerms == null) {
pTerms = buildsTemporaryDTermsHashMap(conf.getPositiveTerms());
catPosTerms.put(className, pTerms);
}
HashMap<String, Boolean> nTerms = catNegTerms.get(className);
// Only for fast access
if (nTerms == null) {
nTerms = buildsTemporaryDTermsHashMap(conf.getNegativeTerms());
catNegTerms.put(className, nTerms);
}
int i = 0;
for (; i < inst.values.length; i++) {
OlexGA_Attribute at = inst.getAttribute(i);
// L'attributo � presente nell'istanza corrente
if (inst.getValue(i) != 0.0) {
// il nome dell'attributo
String attributeName = at.name();
// verifico che l'attributo sia un termine positivo
if (pTerms.get(attributeName) != null)
// l'istanza corrente contiene ALMENO un termine positivo
break;
}
}
// l'istanza corrente NON contiene nemmeno un termine positivo
// non pu� essere classificata come appartenente alla categoria
if (i == inst.values.length) {
return belongs;
}
// l'istanza corrente contiene almeno un termine positivo
int j = 0;
for (; j < inst.values.length; j++) {
OlexGA_Attribute at = inst.getAttribute(j);
// Verifico che l'attributo faccia parte dell'istanza in esame
if (inst.getValue(j) != 0.0) {
String attributeName = at.name();
// l'attributo corrente � un termine negativo
if (nTerms.get(attributeName) != null) {
break;
}
}
}
// ho scandito tutti gli attributi dell'istanza
// e nessuno di questi � un termine negativo
if (j == inst.values.length) {
belongs = 1;
}
return belongs;
}
// public static double[] doValidation(Instance inst, List<Configuration>
// configurations) {
// return doValidationOriginal(inst, configurations);
// }
//
// public static double[] doValidationOriginal(Instance inst,
// List<Configuration> configurations) {
// double[] belongs = new double[inst.numClasses()];
// for (int i = 0; i < configurations.size(); i++) {
//
// Configuration conf = configurations.get(i);
// if (conf.getPositiveTerms().size() != 0) {
// belongs[i] = doValidation(inst, conf,
// conf.getCategory().getCategoryName());
// }
// }
//
// if (OlexGAparameters.LEARNED_CLASS_VALUE_INDEX == -1) {
// return belongs;
// }
//
// if (belongs[OlexGAparameters.LEARNED_CLASS_VALUE_INDEX] == 0.0) {
// belongs[computesComplementaryIndex(OlexGAparameters.LEARNED_CLASS_VALUE_INDEX)]
// = 1.0;
//
// }
// return belongs;
// }
private static HashMap<String, Boolean> buildsTemporaryDTermsHashMap(
Vector<TermConjunction> posTerms) {
HashMap<String, Boolean> pTerms = new HashMap<String, Boolean>();
Iterator<TermConjunction> posTIter = posTerms.iterator();
while (posTIter.hasNext()) {
TermConjunction tc = posTIter.next();
Set<DiscriminativeTerm> discrTerm = tc.getTerms();
Iterator<DiscriminativeTerm> discrTermIterator = discrTerm
.iterator();
while (discrTermIterator.hasNext()) {
DiscriminativeTerm dt = discrTermIterator.next();
String clearDTName = dt.getTermValue().replace("\"", "");
pTerms.put(clearDTName, true);
}
}
return pTerms;
}
}