/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
package keel.Algorithms.Rule_Learning.AQ;
import java.util.*;
import org.core.*;
import java.io.IOException;
/**
* <p>Title: Main class of the algorithm</p>
* <p>Description: It contains the esential methods for the AQ algorithm</p>
* <p>Created: November 26th 2004</p>
* <p>Copyright: Copyright (c) 2004</p>
* <p>Company: KEEL</p>
* @author Alberto Fern�ndez (University of Granada) 11/26/2004
* @since JDK1.5
* @version 1.6
*/
public class AQ {
//names of the I/O files
private String outputFile;
private String outputFileTr;
private String outputFileTst;
private int nClasses, nClassesTr; // Maximum number of classes
private Complex selectors; // It stores all possible selectors
private int starSize; //Maximum size of the Star (beam search)
private ruleSet finalRuleSet; //Final RuleSet
private evaluateRuleQuality evReg; // To evaluate the performance of the rules
//Data-sets
private myDataset trainData;
private myDataset testData;
private myDataset evalData;
private int trainClasses[], testClasses[];
private long seed;
private String miSalida; //To store the data-set header
private int instancesClassTrain[];
private int instancesClassEval[];
private int instancesClassTest[];
private int classValues[];
private int accuracy;
//private double w;
private String[] attributeNames;
private String[] classNames;
private boolean continousValues = false;
private boolean problem = false;
/**
* It checks if some of the preconditions are not satisfied: There are any continuous value or
* there was a problem while reading the data files
* @return boolean true if the algorithm can run normally, false in other case
*/
public boolean everythingOK() {
return ((!continousValues) && (!problem));
}
/**
* Default builder
*/
public AQ() {
};
/**
* AQ class builder</br>
* It does a local copy of the filenames for their posterior use.<br/>
* Then, it obtains all data from file and stores it in a format recognizable for the program.<br/>
* Finally, it creates all possible selectors for the dataset and stores them.
* @param ftrain Name of the input training file
* @param feval Name of the input validation file
* @param ftest Name of the input test file
* @param foutputTr Name of the output training file
* @param foutputTst Name of the output test file
* @param fsal Name of the output information file
* @param seed Seed for the random number generator
* @param starSize It is the maximum size for the star in the search process (beam search)
* @param _accuracy It refers wether the complete selectors will be employed (disjunctions).
*/
public AQ(String ftrain, String feval, String ftest, String foutputTr,
String foutputTst,
String fsal, long seed, int starSize, int _accuracy) {
int i;
outputFile = fsal;
outputFileTr = foutputTr;
outputFileTst = foutputTst;
this.seed = seed;
accuracy = _accuracy;
//w = _w;
// we obtain the data
Dataset train = new Dataset();
Dataset eval = new Dataset();
Dataset test = new Dataset();
try {
//System.out.println("\nLeyendo train: " + ftrain);
train.readSet(ftrain, true);
if (train.hayAtributosContinuos()) {
System.err.println(
"AQ may not handle continuous attributes.\nPlease discretize the data-set");
//System.exit( -1);
continousValues = true;
}
//System.out.println("\nLeyendo validacion: " + feval);
eval.readSet(feval, false);
if (eval.hayAtributosContinuos()) {
System.err.println(
"AQ may not handle continuous attributes.\nPlease discretize the data-set");
//System.exit( -1);
continousValues = true;
}
//System.out.println("\nLeyendo test: " + ftest);
test.readSet(ftest, false);
} catch (IOException e) {
System.err.println(
"There was a problem while trying to read the dataset files:");
System.err.println("-> " + e);
problem = true; //System.exit(0);
}
if (this.everythingOK()) {
miSalida = new String("");
miSalida = test.copiaCabeceraTest();
train.calculaMasComunes();
test.calculaMasComunes();
System.out.println("\nGenerating datasets");
trainData = creaConjunto(train); //We read the training data
evalData = creaConjunto(eval);
testData = creaConjunto(test); //Idem TEST
this.starSize = starSize;
trainClasses = train.getC();
nClassesTr = train.getnClasses();
nClasses = nClassesTr;
if (test.getnClasses() > nClassesTr) {
nClasses = test.getnClasses();
}
int[] auxiliar = train.getC();
Arrays.sort(auxiliar);
classValues = new int[nClasses];
classValues[0] = auxiliar[0];
int valor = 0;
for (i = 1; i < nClasses; i++) {
int j;
for (j = valor;
(j < auxiliar.length) &&
(auxiliar[j] == classValues[i - 1]);
j++) {
;
}
if (j < auxiliar.length) {
classValues[i] = auxiliar[j];
valor = j;
}
}
instancesClassTrain = new int[nClasses];
for (int j = 0; j < nClasses; j++) {
instancesClassTrain[j] = 0;
for (i = 0; i < trainData.size(); i++) {
if (classValues[j] == trainClasses[i]) {
instancesClassTrain[j]++;
}
}
}
instancesClassTest = test.getC();
auxiliar = eval.getC();
Arrays.sort(auxiliar);
classValues = new int[nClasses];
classValues[0] = auxiliar[0];
valor = 0;
for (i = 1; i < nClasses; i++) {
int j;
for (j = valor;
(j < auxiliar.length) &&
(auxiliar[j] == classValues[i - 1]);
j++) {
;
}
if (j < auxiliar.length) {
classValues[i] = auxiliar[j];
valor = j;
}
}
int[] clasesEval;
clasesEval = eval.getC();
instancesClassEval = new int[nClasses];
for (int j = 0; j < nClasses; j++) {
instancesClassEval[j] = 0;
for (i = 0; i < evalData.size(); i++) {
if (classValues[j] == clasesEval[i]) {
instancesClassEval[j]++;
}
}
}
auxiliar = test.getC();
Arrays.sort(auxiliar);
classValues = new int[nClasses];
classValues[0] = auxiliar[0];
valor = 0;
for (i = 1; i < nClasses; i++) {
int j;
for (j = valor;
(j < auxiliar.length) &&
(auxiliar[j] == classValues[i - 1]);
j++) {
;
}
if (j < auxiliar.length) {
classValues[i] = auxiliar[j];
valor = j;
}
}
instancesClassTest = new int[nClasses];
testClasses = test.getC();
for (int j = 0; j < nClasses; j++) {
instancesClassTest[j] = 0;
for (i = 0; i < testData.size(); i++) {
if (classValues[j] == testClasses[i]) {
instancesClassTest[j]++;
}
}
}
attributeNames = train.dameNombres();
classNames = train.dameClases();
if (classNames.length < nClasses) {
classNames = test.dameClases();
}
if (classNames == null) {
classNames = new String[nClasses];
for (i = 0; i < nClasses; i++) {
classNames[i] = "" + classValues[i];
}
}
}
}
/**
* It creates a dataset (attributes/class) according to those obtained from a data-file
* @param myData It must be a dataset read from file
* @return The new dataset created, that is, a linked-list of objects "muestras"
*/
private myDataset creaConjunto(Dataset myData) {
myDataset datos = new myDataset();
int tam = myData.getnentradas();
double[] vars = new double[tam];
double[][] X;
int[] C;
int clase = 0;
X = myData.getX();
C = myData.getC();
for (int i = 0; i < myData.getndatos(); i++) {
boolean salir = false;
for (int j = 0; (j < tam) && (!salir); j++) {
if (myData.isMissing(i, j)) {
salir = true;
} else {
vars[j] = X[i][j];
}
}
if (!salir) {
clase = C[i]; //Integer.parseInt(mis_datos.getDatosIndex(i, tam));
Instance m = new Instance(vars, clase, tam);
m.setPosFile(i);
datos.addData(m);
}
}
return datos;
}
/**
* We execute here the AQ algorithm and we create the necessary output data
*/
public void execute() {
Randomize.setSeed(seed);
algorithmAQ();
evReg = new evaluateRuleQuality(finalRuleSet, evalData, testData,
instancesClassEval, instancesClassTest,
classNames); //We evaluate the quality of the rules ...
generaSalida(); //We write the output files
}
/**
* Main process of the AQ algorithm
*/
private void algorithmAQ() {
ruleSet reg;
makeSelectors();
finalRuleSet = new ruleSet();
finalRuleSet.addClassNames(classNames);
finalRuleSet.addClassName(attributeNames[trainData.getData(0).
getNattributes()]);
System.out.println("\nExecuting AQ: #" + nClassesTr + " Classes");
for (int i = 0; i < nClasses; i++) {
if (instancesClassTrain[i] > 0) {
myDataset auxTrain = trainData.copyDataSet(); //back up
reg = AQforOneClass(i, auxTrain); //It computes the rule that defines this class for the examples
for (int j = 0; j < reg.size(); j++) {
boolean seguir = true;
for (int k = 0; (k < finalRuleSet.size()) && (seguir); k++) {
/** We delete possible repeated rules **/
seguir = !(reg.getRule(j).same(finalRuleSet.getRule(k)));
}
if (seguir) {
finalRuleSet.addRule(reg.getRule(j)); //It adds all rules to the solution
}
}
}
}
finalRuleSet.print();
};
/**
* It obtains the rule set for one class by means of the AQ algorithm
* @param clas class value (1,2,3...)
* @param auxTrain A "copy" of the original training set
* @return The rule set that defines class "clas" (ONE COVER -> OR of complexes)
*/
private ruleSet AQforOneClass(int clas, myDataset auxTrain) {
Complex c;
ruleSet reg = new ruleSet();
int i;
System.out.println("\nExtracting rules for class " + classNames[clas]);
int cont = 0;
myDataset positives = new myDataset();
myDataset negatives = new myDataset();
for (i = 0; i < auxTrain.size(); i++) {
Instance m = auxTrain.getData(i);
if (m.getClas() == clas) { //If it is from the same class -> positives
positives.addData(m);
} else {
negatives.addData(m); //it is from other class -> negative
}
}
System.out.println("\n***** #POSITIVE instances: " + positives.size());
while (positives.size() > 0) { //While there are any positive example not covered by any rule
c = STAR(positives, negatives); //It computes the star and returns the best complex
c.setClass(clas);
c.addAttributeNames(attributeNames);
reg.addRule(c);
cont++;
for (i = 0; i < positives.size(); ) { // We delete positive examples covered by the best complext
if (c.covered(positives.getData(i))) {
positives.deleteData(i);
} else {
i++;
}
}
// System.out.println("\n*****N� Instances quedan: " + positives.size());
}
return reg;
};
/**
* It generates and searches for the best condition:<br/>
* <ol>
* <li> It selects the seed of the positive examples</li>
* <li> It generates a star that covers the seed and no one of the negatives examples</li>
* <li> It selects the best complex of the star following the LEF criteria (simpler and more positives covered) </li></ol>
* @param ejpositives Positive examples set
* @param ejnegatives Negative examples set
* @return the best complex of the star
*/
private Complex STAR(myDataset ejpositives, myDataset ejnegatives) {
ruleSet star = new ruleSet(); //The star is a rule set
boolean negatives = true;
Complex mejorCompl = new Complex(nClasses);
Instance ejNegativo;
Instance seedExample = ejpositives.getData(Randomize.RandintClosed(0,
ejpositives.size() - 1));
/** First solution -> Star with 1 selector complexes **/
ejNegativo = cercano(seedExample, ejnegatives, star); //we select the negative example nearest to the seed
if (ejNegativo == null) {
return makeComplex(seedExample);
}
star = extension(ejNegativo, seedExample); //We create the extension (selectors that covers the seedExample but not ejNegativo)
calculameValorComplex(star, ejnegatives, ejpositives);
Collections.sort(star.getruleSet()); //we sort according to weight
//eliminaSubsumidos(star); //Eliminar todo Complex subsumido por otros
mejorCompl = star.getRule(0).copyRule();
/** New star: **/
negatives = evaluateStar(mejorCompl, ejnegatives);
while (negatives) { //While star covers any negative example
ejNegativo = cercano(seedExample, ejnegatives, star);
if (ejNegativo == null) {
return star.getRule(0).copyRule();
}
//We specialise the complex of star for not include the negative example
ruleSet ext = extension(ejNegativo, seedExample);
star = conjuncion(ext, star); // x ^ y -> (x \in STAR, y \in EXT)
calculameValorComplex(star, ejnegatives, ejpositives);
Collections.sort(star.getruleSet());
//eliminaSubsumidos(star); //Eliminar todo Complex subsumido por otros
eliminaPeores(star);
if (star.size() > 0) {
if ((evaluateStar(mejorCompl, ejnegatives)) ||
(mejorCompl.getWeight() > star.getRule(0).getWeight())) {
mejorCompl = star.getRule(0).copyRule();
}
negatives = evaluateStar(mejorCompl, ejnegatives);
} else {
negatives = false; //I cannot create any other star (i must exit)
}
}
//mejorCompl = star.getRule(0).copiaRegla();
//mejorCompl.setDistribucion(star.getRule(0).getDistribucion());
return mejorCompl; //Last rule is the one with less weight (covers more positive examples, less negatives and less complex)
};
/**
* This function generates the output in the file specified by "outputFile"
*/
private void generaSalida() {
Files f = new Files();
String cad = "";
int i;
for (i = 0; i < finalRuleSet.size(); i++) {
cad = finalRuleSet.printString();
}
cad += "\n\n" + evReg.printString();
f.writeFile(outputFile, cad);
f.writeFile(outputFileTr, miSalida + evReg.salida(evalData));
f.writeFile(outputFileTst, miSalida + evReg.salida(testData));
};
/**
* It selects the negative example closer to the positive example. If every attribute is the same we delete
* this negative example (noise).
* @param example The positive example "seed"
* @param datos The data-set (containing only negative examples)
* @param star The complexes star computed at this moment
* @return The closer negative example to the seed
*/
private Instance cercano(Instance example, myDataset datos,
ruleSet star) {
Instance negativo = null;
double valorEj = 0;
double valorNeg, total = Float.MAX_VALUE;
int posicion = 0;
boolean verifica;
for (int j = 0; j < example.getMuest().length; j++) {
valorEj += example.getMuest()[j];
}
valorEj /= example.getNattributes();
if (star.size() > 0) {
for (int i = 0; i < datos.size(); i++) {
negativo = datos.getData(i).copy();
verifica = false;
for (int k = 0; k < star.size() && (!verifica); k++) {
verifica = star.getRule(k).covered(negativo);
}
if (verifica) { //Negativo is covered by the star
if (example.compare(negativo)) {
datos.deleteData(i); //I must delete the negative example, since it is the same than seed
instancesClassTrain[negativo.getClas()]--;
} else {
valorNeg = 0;
for (int j = 0; j < negativo.getMuest().length; j++) {
valorNeg += negativo.getMuest()[j];
}
valorNeg /= negativo.getNattributes();
if (Math.abs(valorEj - valorNeg) < total) {
total = Math.abs(valorEj - valorNeg);
posicion = i;
}
}
}
}
} else {
for (int i = 0; i < datos.size(); i++) {
negativo = datos.getData(i).copy();
if (example.compare(negativo)) {
datos.deleteData(i);
instancesClassTrain[negativo.getClas()]--;
} else {
valorNeg = 0;
for (int j = 0; j < negativo.getMuest().length; j++) {
valorNeg += negativo.getMuest()[j];
}
valorNeg /= negativo.getNattributes();
if (Math.abs(valorEj - valorNeg) < total) {
total = Math.abs(valorEj - valorNeg);
posicion = i;
}
}
}
}
if (example.compare(datos.getData(posicion))) {
datos.deleteData(posicion);
instancesClassTrain[negativo.getClas()]--;
return null;
}
return datos.getData(posicion);
}
;
/**
* It computes the value for the weight of each complex by means of:<br/>
* <ol>
* <li> Number of positive examples covered</li>
* <li> Number of negative examples excluded </li>
* <li> Complexity of the complex </li></ol>
* <br/>
* Weight = (a) - (b) / (c)
* @param star Rule set "star"
* @param negatives Negative examples set
* @param positives Positive examples set
*/
private void calculameValorComplex(ruleSet star,
myDataset negatives,
myDataset positives) {
for (int i = 0; i < star.size(); i++) {
int pos = evaluaComplex(star.getRule(i), positives);
int negs = evaluaComplex(star.getRule(i), negatives);
//int P = positives.size();
int N = negatives.size();
int excl = N - negs; //Excluded = total - covered
//double compl = (double)pos/N; //completitud
//double consig = (double)((((double)pos/(pos + negs)) - ((double)P /(P+N))) * (P+N) / N); //ganancia de consistencia
//double peso = Math.pow(compl,w)*Math.pow(consig,1.0-w);
double peso = pos + excl;
star.getRule(i).setWeight(peso); //asignamos el peso
}
// Collections.sort(star.getruleSet()); //Ordena seg�n el valor del peso :) [menos peso, lo ponemos antes]
}
;
/**
* It evaluates a star complex to check if it covers any negative example
* @param c Complex 'i'-th of the star
* @param e Negative examples set
* @return True if it covers any negative example; false in other case.
*/
private boolean evaluateStar(Complex c, myDataset e) {
for (int i = 0; i < e.size(); i++) {
if (c.covered(e.getData(i))) {
return true;
}
}
return false;
}
/**
* It removes the worst rules of the star until a predetermined size (maxstar) [beam search]
* @param star Rule set "star"
*/
private void eliminaPeores(ruleSet star) {
for (int i = star.size() - 1; i >= starSize;
star.deleteRule(i), i--) {
;
}
}
/**
* Evaluation of all complexes over the training set to check how many are covered of each class<br/>
* In the end, we obtain the number of examples that are satisfied in each class by the selected complex
* @param c Selected complex
* @param e Training set
* @return Number of examples from "e" that are covered by "c"
*
*/
private int evaluaComplex(Complex c, myDataset e) {
int i, contador = 0;
for (i = 0; i < e.size(); i++) {
if (c.covered(e.getData(i))) {
contador++;
c.addClassDistribution(e.getData(i).getClas());
}
}
return contador;
}
/**
* It computes a complexes set called "extension", that covers the seed but not the negative example
* @param negative The negative example that is closer to the seed
* @param seed The selected positive example
* @return A selectors set that covers "seed" but not "negative"
*/
private ruleSet extension(Instance negative, Instance seed) {
ruleSet ext = new ruleSet();
myDataset neg = new myDataset();
myDataset pos = new myDataset();
neg.addData(negative);
pos.addData(seed);
for (int i = 0; i < selectors.size(); i++) {
Selector s = selectors.getSelector(i);
Complex c = new Complex(s, nClasses);
c.setClass(pos.getData(0).getClas());
if ((!evaluateStar(c, neg)) && (evaluateStar(c, pos))) {
ext.addRule(c);
}
}
return ext;
};
/**
* It does the conjunction between Extension and Seed (an AND of each complex)
* @param ext Extension set
* @param star Rule set star
* @return star AND Extension [each rule of star have another complex, if not included previously]
*/
private ruleSet conjuncion(ruleSet ext, ruleSet star) {
if (star.size() == 0) { //Para el caso inicial
return ext;
}
ruleSet starAux = new ruleSet();
int clase = ext.getRule(0).getClas();
for (int i = 0; i < ext.size(); i++) {
Selector s = ext.getRule(i).getSelector(0); //extension has only one selector!
for (int j = 0; j < star.size(); j++) {
Complex aux2 = star.getRule(j);
Complex aux = new Complex(nClasses);
boolean sigue = true;
for (int h = 0; (h < aux2.size()) && (sigue); h++) {
Selector s2 = aux2.getSelector(h);
aux.addSelector(s2);
if (s2.compareTo(s) < 2) { //same attribute
sigue = false; //not added
}
}
if (sigue) { //Selector is not included in any complex of star
aux.addSelector(s);
aux.setClass(clase);
//evaluarComplex(aux, train);
starAux.addRule(aux);
}
}
}
return starAux;
}
/**
* To remove every complex subsumed by other </br>
*
* @param star The star
*
*/
private void eliminaSubsumidos(ruleSet star) {
if (star.size() > starSize) {
star.deleteSubsumed(starSize);
}
}
/**
* It builds the total set of selectors to obtain all possible rules
*/
private void makeSelectors() {
int totalAtributos = trainData.getData(0).getNattributes();
int examples = trainData.size();
double[][] lista = new double[examples + 1][totalAtributos];
for (int i = 0; i < totalAtributos; i++) {
lista[0][i] = trainData.getData(0).getMuest()[i];
lista[1][i] = Double.POSITIVE_INFINITY; //index
}
for (int i = 0; i < totalAtributos; i++) {
for (int j = 1; j < examples; j++) {
double valor = trainData.getData(j).getMuest()[i];
int k = 0;
while (!(Double.isInfinite(lista[k][i]))) {
if (lista[k][i] == valor) {
break;
}
k++;
}
if (Double.isInfinite(lista[k][i])) {
lista[k][i] = valor;
lista[k + 1][i] = Double.POSITIVE_INFINITY;
}
}
}
selectors = new Complex(nClasses);
for (int i = 0; i < totalAtributos; i++) {
for (int h = 0; h < examples; h++) {
if (Double.isInfinite(lista[h][i])) {
break;
}
for (int j = 0; j < 4; j++) { //For the 3 possible values in the comparison <>,<=,>
Selector s = new Selector(i, j, lista[h][i]);
selectors.addSelector(s);
}
}
}
//Operator = (disjunts values)
if (accuracy == 1) {
for (int i = 0; i < totalAtributos; i++) {
int total;
for (total = 0; !(Double.isInfinite(lista[total][i])); total++) {
;
}
ArrayList list = new ArrayList();
ArrayList listaAux = new ArrayList();
for (int j = 0; j < total - 1; j++) {
for (int k = j + 1; k < total; k++) {
double[] valores = new double[2];
valores[0] = lista[j][i];
valores[1] = lista[k][i];
listaAux.add(valores);
Selector s = new Selector(i, 0, valores);
selectors.addSelector(s);
}
}
for (int l = 3; l < total - 2; l++) {
double[] auxi = new double[l - 1];
double[] auxi2 = new double[l - 1];
list.addAll(listaAux);
listaAux.clear();
while (!list.isEmpty()) {
boolean salir = false;
auxi = (double[]) list.remove(0);
for (int j = 0; (j < list.size()) && (!salir); j++) {
auxi2 = (double[]) list.get(j);
for (int k = 0; (k < auxi.length - 1) && (!salir);
k++) {
salir = !(auxi[k] == auxi2[k]);
}
if (!salir) {
double[] valores = new double[l];
for (int k = 0; k < l - 1; k++) {
valores[k] = auxi[k];
}
valores[l - 1] = auxi2[l - 2];
listaAux.add(valores);
Selector s = new Selector(i, 0, valores);
selectors.addSelector(s);
}
}
}
}
}
}
}
/**
* Hace un Complex especifico para el example
* @param example Instance example de entrada
* @return Complex El Complex m�s espec�fico que cubre al example
*/
private Complex makeComplex(Instance example) {
Selector s = new Selector(0, 0, example.getAttribute(0));
Complex complex = new Complex(s, nClasses);
for (int i = 1; i < example.getNattributes(); i++) {
s = new Selector(i, 0, example.getAttribute(i));
complex.addSelector(s);
}
complex.setClass(example.getClas());
return complex;
}
}