/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.Alatasetal; /** * <p> * @author Written by Nicol� Flugy Pap� (Politecnico di Milano) 24/03/2009 * @author Modified by Diana Mart�n (dmartin@ceis.cujae.edu.cu) * @version 1.0 * @since JDK1.6 * </p> */ import java.io.PrintWriter; import java.math.BigDecimal; import java.util.*; import org.core.Randomize; public class AlatasetalProcess { /** * <p> * It provides the implementation of the algorithm to be run in a process * </p> */ private final int ATTRIBUTE_NOT_COVERED = -1; private final int ATTRIBUTE_COVERED_BY_ANTECEDENT = 0; private final int ATTRIBUTE_COVERED_BY_CONSEQUENT = 1; private final int ATTRIBUTE_COVERED_BY_BOTH = 2; private myDataset dataset; private int nTrials, trials; private int randomChromosomes; private int r; private int tournamentSize; private double pc; private double pmMin; private double pmMax; private double a1; private double a2; private double a3; private double a4; private double a5; private double af; private int uPopSize; private int nAttr; private int nTrans; private double[] maxAmplitudes; private double minFitnessValue; private ArrayList<Chromosome> uPop; /** * <p> * It creates a new process for the algorithm by setting up its parameters * </p> * @param dataset The instance of the dataset for dealing with its records * @param nGen The maximum number of generations to reach before completing the whole evolutionary learning * @param randomChromosomes The number of initial random chromosomes * @param r The number of parts in which each random chromosome is divided to generate the others by doing inversions * @param tournamentSize The size of tournament to select the fittest chromosome in the current population * @param pc The probability for the crossover operator * @param pmMin The minimum probability for the adaptive mutation operator * @param pmMax The maximum probability for the adaptive mutation operator * @param a1 The factor determining the importance of the rules support * @param a2 The factor determining the importance of the rules confidence * @param a3 The factor determining the importance of the number of involved attributes * @param a4 The factor determining the importance of the amplitude of intervals * @param a5 The factor determining the importance of the number of rules already covered * @param af The factor of amplitude for each of the dataset attribute */ public AlatasetalProcess(myDataset dataset, int nTrials, int randomChromosomes, int r, int tournamentSize, double pc, double pmMin, double pmMax, double a1, double a2, double a3, double a4, double a5, double af) { int i; double sum_max_amp = 0.0; this.dataset = dataset; this.nTrials = nTrials; this.randomChromosomes = randomChromosomes; this.r = r; this.tournamentSize = tournamentSize; this.pc = pc; this.pmMin = pmMin; this.pmMax = pmMax; this.a1 = a1; this.a2 = a2; this.a3 = a3; this.a4 = a4; this.a5 = a5; this.af = af; this.uPopSize = (int)Math.pow(2, this.r) * this.randomChromosomes; this.nAttr = this.dataset.getnVars(); this.nTrans = this.dataset.getnTrans(); this.maxAmplitudes = new double[this.nAttr]; for (i=0; i < this.maxAmplitudes.length; i++) { this.maxAmplitudes[i] = this.dataset.getMax(i) - this.dataset.getMin(i); sum_max_amp += this.maxAmplitudes[i]; } this.minFitnessValue = -(this.nAttr + sum_max_amp + 100.0); } /** * <p> * It runs the evolutionary learning for mining association rules * </p> */ public void run() { int i, j, nGn = 0; this.trials = 0; Chromosome c1, c2; System.out.print("Initializing Uniform Population... "); this.uPop = this.initializeUniformPopulation(); this.evaluate(this.uPop, 0, this.uPop.size()); Collections.sort(this.uPop); System.out.print("done.\n"); while (this.trials < this.nTrials) { System.out.print("Computing Generation " + (nGn + 1) + "... "); while (this.uPop.size() < (this.uPopSize * 2)) { if (this.uPop.size() != this.uPopSize) { if (Randomize.Rand() < 0.5) this.crossover(this.uPop); else this.mutate(this.uPop); } else this.uniformOperator(this.uPop); } this.evaluate(this.uPop, this.uPopSize, this.uPop.size()); this.computeAdjustedFitness(this.uPop); for (i=0; i < this.uPop.size(); i++) { c1 = this.uPop.get(i); for (j=this.uPop.size()-1; j > i+1; j--) { c2 = this.uPop.get(j); if (c1.equals(c2)) this.uPop.remove(j); } } Collections.sort(this.uPop); while (this.uPop.size() > this.uPopSize) this.uPop.remove(this.uPopSize); nGn++; System.out.print("done.\n"); } this.adjustIntervals(this.uPop); this.removeRedundant(this.uPop); } public void removeRedundant (ArrayList<Chromosome> upop) { int i, j; boolean stop; Chromosome chromo1, chromo2; // Collections.sort(upop); this.sortByAmplitude(upop); for (i=0; i < upop.size(); i++) { stop = false; for (j = upop.size()-1; j >=0 && !stop; j--) { if (j != i) { chromo1 = upop.get(i); chromo2 = upop.get(j); if (chromo1.getnAnts() == chromo2.getnAnts()) { if (chromo1.isSubChromo(chromo2)) { upop.remove(j); if (j < i) i--; } } else if (chromo1.getnAnts() > chromo2.getnAnts()) stop = true; } } } } public void sortByAmplitude( ArrayList<Chromosome> pop) { for ( int i = 0; i < pop.size()-1; i++ ) { for (int j = i+1; j < pop.size(); j++) { if(pop.get(j).getnAnts() < pop.get(i).getnAnts()) { Chromosome temp = pop.get( i ); pop.set(i, pop.get(j)); pop.set(j, temp); } } } } private boolean equalChromotoPop(Chromosome chromo_a, ArrayList<Chromosome> pop){ boolean equal_chr = false; int i=0; while((!equal_chr)&&(i<pop.size())){ if(chromo_a.equals(pop.get(i))) equal_chr = true; i++; } return equal_chr; } /** * <p> * It constructs a rules set once the whole evolutionary learning has been carried out. * From the last population it filters those chromosomes which satisfy both confidence and support thresholds * </p> * @param minConfidence The user-specified minimum confidence for the mined association rules * @param minSupport The user-specified minimum support for the mined association rules * @return An array of association rules having both minimum confidence and support */ public ArrayList<AssociationRule> generateRulesSet(double minSupport) { int r; Chromosome chr; ArrayList<AssociationRule> rules = new ArrayList<AssociationRule>(); ArrayList<Chromosome> chrFinal = new ArrayList<Chromosome>(); for (r=0; r < this.uPop.size(); r++) { chr = this.uPop.get(r); if (chr.getRuleSupport() >= minSupport) { rules.add( new AssociationRule(chr) ); chrFinal.add(chr); } } return rules; } /** * <p> * It prints out on screen relevant information regarding the mined association rules * </p> * @param rules The array of association rules from which gathering relevant information */ public void printReport(ArrayList<AssociationRule> rules) { int i, r, t, cnt_cov_rec = 0; double avg_yulesQ = 0.0, avg_sup = 0.0, avg_conf = 0.0, avg_ant_length = 0.0, avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; boolean[] cov_rec; ArrayList<Integer> cov_tids; cov_rec = new boolean[this.nTrans]; for (i=0; i < cov_rec.length; i++) cov_rec[i] = false; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getSupport(); avg_conf += ar.getConfidence(); avg_ant_length += ar.getIdOfAntecedents().size()+ ar.getIdOfConsequents().size(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getnetConf(); avg_yulesQ += ar.getyulesQ(); cov_tids = ar.getCoveredTIDs(); for (i=0; i < cov_tids.size(); i++) { t = cov_tids.get(i); if (! cov_rec[t]) { cov_rec[t] = true; cnt_cov_rec++; } } } System.out.println("\nNumber of Frequent Itemsets found: " + "-"); System.out.println("\nNumber of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * cnt_cov_rec) / this.nTrans ), 2)); } } public static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } public String printRules(ArrayList<AssociationRule> rules) { int i, lenghtrule; boolean stop; String rulesList; stop = false; rulesList = ""; rulesList += ("Support\tantecedent_support\tconsequent_support\tConfidence\tLift\tConv\tCF\tNetConf\tYulesQ\tnAttributes\n"); for (i=0; i < rules.size() && !stop; i++) { lenghtrule = rules.get(i).getAntecedents().length+ rules.get(i).getConsequents().length; rulesList += ("" + roundDouble(rules.get(i).getSupport(),2) + "\t" + roundDouble(rules.get(i).getAntecedentSupport(),2) + "\t" + roundDouble(rules.get(i).getConsequentSupport(),2) + "\t" + roundDouble(rules.get(i).getConfidence(),2) + "\t" + roundDouble(rules.get(i).getLift(),2) + "\t" + roundDouble(rules.get(i).getConv(),2) + "\t" + roundDouble(rules.get(i).getCF(),2) + "\t" + roundDouble(rules.get(i).getnetConf(),2) + "\t" + roundDouble(rules.get(i).getyulesQ(),2) + "\t" + lenghtrule + "\n"); } return rulesList; } public void saveReport(ArrayList<AssociationRule> rules,PrintWriter w) { int i, r, t, cnt_cov_rec = 0; double avg_yulesQ = 0.0, avg_sup = 0.0, avg_conf = 0.0, avg_ant_length = 0.0, avg_lift = 0.0,avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; boolean[] cov_rec; ArrayList<Integer> cov_tids; cov_rec = new boolean[this.nTrans]; for (i=0; i < cov_rec.length; i++) cov_rec[i] = false; AssociationRule ar; for (r=0; r < rules.size(); r++) { ar = rules.get(r); avg_sup += ar.getSupport(); avg_conf += ar.getConfidence(); avg_ant_length += ar.getIdOfAntecedents().size()+ ar.getIdOfConsequents().size(); avg_lift += ar.getLift(); avg_conv += ar.getConv(); avg_CF += ar.getCF(); avg_netConf += ar.getnetConf(); avg_yulesQ += ar.getyulesQ(); cov_tids = ar.getCoveredTIDs(); for (i=0; i < cov_tids.size(); i++) { t = cov_tids.get(i); if (! cov_rec[t]) { cov_rec[t] = true; cnt_cov_rec++; } } } w.println("\nNumber of Frequent Itemsets found: " + "-"); System.out.println("\nNumber of Frequent Itemsets found: " + "-"); w.println("\nNumber of Association Rules generated: " + rules.size()); System.out.println("\nNumber of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { w.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); w.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); w.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); w.println("Average Conviction: " + roundDouble(( avg_conv / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size() ),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size() ),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); w.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); w.println("Number of Covered Records (%): " + roundDouble(( (100.0 * cnt_cov_rec) / this.nTrans ), 2)); System.out.println("Number of Covered Records (%): " + roundDouble(( (100.0 * cnt_cov_rec) / this.nTrans ), 2)); } } private ArrayList<Chromosome> initializeUniformPopulation() { int cnt_chr, g, type_attr, step, mod; double lb, ub, top, min_attr, max_attr; step = this.nAttr / this.r; mod = this.nAttr % this.r; ArrayList<Chromosome> popInit = new ArrayList<Chromosome>(); Gene[] rnd_genes = new Gene[this.nAttr]; for (cnt_chr=0; cnt_chr < this.randomChromosomes; cnt_chr++) { for (g=0; g < rnd_genes.length; g++) { rnd_genes[g] = new Gene(); type_attr = this.dataset.getAttributeType(g); min_attr = this.dataset.getMin(g); max_attr = this.dataset.getMax(g); if ( type_attr != myDataset.NOMINAL ) { if ( type_attr == myDataset.REAL ) { lb = Randomize.RanddoubleClosed(min_attr, max_attr); top = Math.min(lb + this.maxAmplitudes[g], max_attr); ub = Randomize.RanddoubleClosed(lb + 0.0001, top); } else { lb = Randomize.RandintClosed((int)min_attr, (int)max_attr); top = Math.min(lb + this.maxAmplitudes[g], max_attr); ub = Randomize.RandintClosed((int)lb + 1, (int)top); } } else lb = ub = Randomize.RandintClosed((int)min_attr, (int)max_attr); rnd_genes[g].setLowerBound(lb); rnd_genes[g].setUpperBound(ub); rnd_genes[g].setAttr (g); rnd_genes[g].setType(this.dataset.getAttributeType(g)); rnd_genes[g].setMin_attr(this.dataset.getMin(g)); rnd_genes[g].setMax_attr(this.dataset.getMax(g)); rnd_genes[g].setIsPositiveInterval( (Randomize.RandintClosed(0, 1) == 1) ? true : false ); rnd_genes[g].setActAs( Randomize.RandintClosed(Gene.NOT_INVOLVED, Gene.CONSEQUENT) ); } this.buildAllChromosomes(popInit, new Chromosome(rnd_genes), new boolean[this.r], 0, this.r, step, mod); } return popInit; } private void buildAllChromosomes(ArrayList<Chromosome> upop, Chromosome orig_chr, boolean[] mask, int p, int r, int step, int mod) { Chromosome chromo,chromo1; if (p == r - 1) { mask[p] = false; chromo = this.buildChromosome(orig_chr, mask, step, mod); if(!equalChromotoPop(chromo, upop)) upop.add(chromo.copy()); mask[p] = true; chromo1 = this.buildChromosome(orig_chr, mask, step, mod); if(!equalChromotoPop(chromo1, upop)) upop.add(chromo1.copy()); } else { mask[p] = false; this.buildAllChromosomes(upop, orig_chr, mask, p + 1, r, step, mod); mask[p] = true; this.buildAllChromosomes(upop, orig_chr, mask, p + 1, r, step, mod); } } private Chromosome buildChromosome(Chromosome c, boolean[] mask, int step, int mod) { int i, g, start, end; Chromosome c_tmp; c_tmp = new Chromosome( c.getGenes() ); end = 0; for (i=1; i <= mask.length; i++) { start = end; end = ( (mask.length - i) >= mod ) ? (start + step) : (start + step + 1); if ( mask[i - 1] ) { for (g=start; g < end; g++) c_tmp.getGene(g).invert(this.dataset.getAttributeType(g), this.dataset.getMin(g), this.dataset.getMax(g)); } } c_tmp.forceConsistency(); return c_tmp; } private void evaluate(ArrayList<Chromosome> upop, int start_index, int end_index) { for (int i=start_index; i < end_index; i++){ this.computeFitness( upop.get(i) ); } } private void crossover(ArrayList<Chromosome> upop) { int g; Chromosome parent1, parent2, offspring; Gene[] genes_offspring; if (Randomize.Rand() < this.pc) { parent1 = this.tournamentSelection(upop); parent2 = this.tournamentSelection(upop); if (! parent1.equals(parent2)) { genes_offspring = new Gene[this.nAttr]; for (g=0; g < this.nAttr; g++) genes_offspring[g] = (Randomize.Rand() < 0.5) ? parent1.getGene(g).copy() : parent2.getGene(g).copy(); offspring = new Chromosome(genes_offspring); offspring.forceConsistency(); } else offspring = new Chromosome( parent1.getGenes() ); // if(!equalChromotoPop(offspring, upop)) upop.add(offspring); } } private void mutate(ArrayList<Chromosome> upop) { int i, g, cnt_hit = 0; double adaptive_ps, type_attr, min_attr, max_attr, top; Gene gene; Chromosome chr, best_chr; best_chr = upop.get(0); for (i=this.uPopSize; i < upop.size(); i++) if ( upop.get(i).equals(best_chr) ) cnt_hit++; for (i=0; (i < this.uPopSize) && (upop.size() < (this.uPopSize * 2)); i++) { adaptive_ps = this.pmMin + cnt_hit * ((this.pmMax - this.pmMin) / (upop.size() - this.uPopSize)); if (Randomize.Rand() < adaptive_ps) { chr = new Chromosome( upop.get(i).getGenes() ); g = Randomize.Randint(0, this.nAttr); gene = chr.getGene(g); type_attr = this.dataset.getAttributeType(g); min_attr = this.dataset.getMin(g); max_attr = this.dataset.getMax(g); if (type_attr != myDataset.NOMINAL) { if (type_attr == myDataset.REAL) { if (Randomize.Rand() < 0.5) { if (Randomize.Rand() < 0.5) { top = Math.max(gene.getUpperBound() - this.maxAmplitudes[g], min_attr); gene.setLowerBound(Randomize.RanddoubleClosed(top, gene.getLowerBound())); } else gene.setLowerBound(Randomize.Randdouble(gene.getLowerBound(), gene.getUpperBound())); } else { if (Randomize.Rand() < 0.5) { top = Math.min(gene.getLowerBound() + this.maxAmplitudes[g], max_attr); gene.setUpperBound(Randomize.RanddoubleClosed(gene.getUpperBound(), top)); } else gene.setUpperBound(Randomize.RanddoubleClosed(gene.getLowerBound()+0.0001, gene.getUpperBound())); } } else { if (Randomize.Rand() < 0.5) { if (Randomize.Rand() < 0.5) { top = Math.max(gene.getUpperBound() - this.maxAmplitudes[g], min_attr); gene.setLowerBound(Randomize.RandintClosed((int)top, (int)gene.getLowerBound())); } else gene.setLowerBound(Randomize.Randint((int)gene.getLowerBound(), (int)gene.getUpperBound())); } else { if (Randomize.Rand() < 0.5) { top = Math.min(gene.getLowerBound() + this.maxAmplitudes[g], max_attr); gene.setUpperBound(Randomize.RandintClosed((int)gene.getUpperBound(), (int)top)); } else gene.setUpperBound(Randomize.RandintClosed((int)gene.getLowerBound() + 1, (int)gene.getUpperBound())); } } } else { top = Randomize.RandintClosed((int)min_attr, (int)max_attr); gene.setLowerBound(top); gene.setUpperBound(top); } gene.setIsPositiveInterval( (Randomize.RandintClosed(0, 1) == 1) ? true : false ); gene.setActAs( Randomize.RandintClosed(Gene.NOT_INVOLVED, Gene.CONSEQUENT) ); chr.forceConsistency(); //if(!equalChromotoPop(chr, upop)) upop.add(chr); if ( chr.equals(best_chr) ) cnt_hit++; } } } private void uniformOperator(ArrayList<Chromosome> upop) { int r, step, mod; ArrayList<Integer> diff_pos; ArrayList<Double> new_values; Chromosome chr1, chr2; r = 2; chr1 = upop.get(0); chr2 = upop.get(1); diff_pos = this.getDifferentPositions(chr1, chr2); new_values = this.buildNewValues(chr1, chr2, diff_pos); step = new_values.size() / r; mod = new_values.size() % r; this.buildCombinationsOfNewValues(upop, chr1, new_values, diff_pos, new boolean[r], 0, r, step, mod); } private ArrayList<Integer> getDifferentPositions(Chromosome chr1, Chromosome chr2) { int g; ArrayList<Integer> diff_pos = new ArrayList<Integer>(); for (g=0; g < this.nAttr; g++) { if ( chr1.getGene(g).getActAs() != chr2.getGene(g).getActAs() ) diff_pos.add(g * 4); if ( chr1.getGene(g).getIsPositiveInterval() != chr2.getGene(g).getIsPositiveInterval() ) diff_pos.add((g * 4) + 1); if ( chr1.getGene(g).getLowerBound() != chr2.getGene(g).getLowerBound() ) diff_pos.add((g * 4) + 2); if ( chr1.getGene(g).getUpperBound() != chr2.getGene(g).getUpperBound() ) diff_pos.add((g * 4) + 3); } return diff_pos; } private ArrayList<Double> buildNewValues(Chromosome chr1, Chromosome chr2, ArrayList<Integer> diff_pos) { int d, n, g, p; ArrayList<Double> new_values = new ArrayList<Double>(); for (d=0; d < diff_pos.size(); d++) { n = diff_pos.get(d); p = n % 4; g = (n - p) / 4; switch (p) { case 0: new_values.add( (double)Randomize.RandintClosed(Gene.NOT_INVOLVED, Gene.CONSEQUENT) ); break; case 1: new_values.add( (double)Randomize.RandintClosed(0, 1) ); break; case 2: if (this.dataset.getAttributeType(g) == myDataset.REAL) new_values.add( (chr1.getGene(g).getLowerBound() + chr2.getGene(g).getLowerBound()) / 2.0 ); else new_values.add( (double)Math.round( (chr1.getGene(g).getLowerBound() + chr2.getGene(g).getLowerBound()) / 2.0 ) ); break; case 3: if (this.dataset.getAttributeType(g) == myDataset.REAL) new_values.add( (chr1.getGene(g).getUpperBound() + chr2.getGene(g).getUpperBound()) / 2.0 ); else new_values.add( (double)Math.round( (chr1.getGene(g).getUpperBound() + chr2.getGene(g).getUpperBound()) / 2.0 ) ); } } return new_values; } private void buildCombinationsOfNewValues(ArrayList<Chromosome> upop, Chromosome orig_chr, ArrayList<Double> orig_values, ArrayList<Integer> diff_pos, boolean[] mask, int p, int r, int step, int mod) { Chromosome chromo, chromo1; if (p == r - 1) { mask[p] = false; chromo = this.buildChromosomeFromDifferentValues(orig_chr, orig_values, diff_pos, mask, step, mod); // if(!equalChromotoPop(chromo, upop)) upop.add(chromo.copy()); mask[p] = true; chromo1 = this.buildChromosomeFromDifferentValues(orig_chr, orig_values, diff_pos, mask, step, mod); // if(!equalChromotoPop(chromo1, upop)) upop.add(chromo.copy()); } else { mask[p] = false; this.buildCombinationsOfNewValues(upop, orig_chr, orig_values, diff_pos, mask, p + 1, r, step, mod); mask[p] = true; this.buildCombinationsOfNewValues(upop, orig_chr, orig_values, diff_pos, mask, p + 1, r, step, mod); } } private Chromosome buildChromosomeFromDifferentValues(Chromosome orig_chr, ArrayList<Double> orig_values, ArrayList<Integer> diff_pos, boolean[] mask, int step, int mod) { int i, d, n, p, g, start, end; double v; Chromosome c_tmp; c_tmp = new Chromosome( orig_chr.getGenes() ); if (step != 0.0) { end = 0; for (i=1; i <= mask.length; i++) { start = end; end = ( (mask.length - i) >= mod ) ? (start + step) : (start + step + 1); for (d=start; d < end; d++) { n = diff_pos.get(d); v = orig_values.get(d); p = n % 4; g = (n - p) / 4; switch (p) { case 0: if ( mask[i - 1] ) { switch ( (int)v ) { case Gene.NOT_INVOLVED: c_tmp.getGene(g).setActAs( Gene.ANTECEDENT ); break; case Gene.ANTECEDENT: c_tmp.getGene(g).setActAs( Gene.CONSEQUENT); break; case Gene.CONSEQUENT: c_tmp.getGene(g).setActAs( Gene.NOT_INVOLVED ); } } else c_tmp.getGene(g).setActAs( (int)v ); break; case 1: if ( mask[i - 1] ) c_tmp.getGene(g).setIsPositiveInterval( (v == 1.0) ? false : true ); else c_tmp.getGene(g).setIsPositiveInterval( (v == 1.0) ? true : false ); break; case 2: if ( mask[i - 1] ) { if (this.dataset.getAttributeType(g) != myDataset.NOMINAL) { if (this.dataset.getAttributeType(g) == myDataset.REAL) c_tmp.getGene(g).setLowerBound( Randomize.RandClosed() * (v - this.dataset.getMin(g)) + this.dataset.getMin(g) ); else c_tmp.getGene(g).setLowerBound( Randomize.RandintClosed((int)this.dataset.getMin(g), (int)v) ); } else { if (v == this.dataset.getMax(g)) c_tmp.getGene(g).setLowerBound(this.dataset.getMin(g)); else c_tmp.getGene(g).setLowerBound(v + 1); } } else c_tmp.getGene(g).setLowerBound(v); break; case 3: if ( mask[i - 1] ) { if (this.dataset.getAttributeType(g) != myDataset.NOMINAL) { if (this.dataset.getAttributeType(g) == myDataset.REAL) c_tmp.getGene(g).setUpperBound( Randomize.RandClosed() * (this.dataset.getMax(g) - v) + v ); else c_tmp.getGene(g).setUpperBound( Randomize.RandintClosed((int)v, (int)this.dataset.getMax(g)) ); } else { if (v == this.dataset.getMax(g)) c_tmp.getGene(g).setUpperBound(this.dataset.getMin(g)); else c_tmp.getGene(g).setUpperBound(v + 1); } } else c_tmp.getGene(g).setUpperBound(v); } } } c_tmp.forceConsistency(); } return c_tmp; } private Chromosome tournamentSelection(ArrayList<Chromosome> upop) { int rnd_index, cnt = 0; ArrayList<Chromosome> rnd_chrs = new ArrayList<Chromosome>(); while ( cnt < this.tournamentSize ) { rnd_index = Randomize.Randint(0, this.uPopSize); rnd_chrs.add( upop.get(rnd_index) ); cnt++; } Collections.sort(rnd_chrs); return ( rnd_chrs.get(0) ); } private void computeFitness(Chromosome c) { double yulesQ, numeratorYules, denominatorYules, all_sup, ant_sup, conf, cons_sup,lift, conv, CF, netConf; ArrayList<Integer> involved_attrs, covered_tids; involved_attrs = c.getIndexOfInvolvedGenes(); covered_tids = this.countSupport(c.getGenes(), involved_attrs); all_sup = (double)covered_tids.size() / (double)this.nTrans; if (all_sup > 0.0) { ant_sup = (double)this.countSupport(c.getGenes(), c.getIndexOfAntecedentGenes()).size() / (double)this.nTrans; cons_sup = (double)this.countSupport(c.getGenes(), c.getIndexOfConsequentGenes()).size() / (double)this.nTrans; conf = all_sup / ant_sup; c.setFitness( (this.a1 * all_sup) + (this.a2 * conf) - (this.a3 * involved_attrs.size()) - (this.a4 * this.sumInterval(c.getGenes(), involved_attrs)) ); //compute lift if((cons_sup == 0) || (ant_sup == 0)) lift = 1; else lift = all_sup / (ant_sup*cons_sup); //compute conviction if((cons_sup == 1)||(ant_sup == 0)) conv = 1; else conv = (ant_sup*(1-cons_sup))/(ant_sup-all_sup); //compute netconf if((ant_sup == 0)||(ant_sup == 1)||(Math.abs((ant_sup * (1-ant_sup))) <= 0.001)) netConf = 0; else netConf = (all_sup - (ant_sup*cons_sup))/(ant_sup * (1-ant_sup)); //compute yulesQ numeratorYules = ((all_sup * (1 - cons_sup - ant_sup + all_sup)) - ((ant_sup - all_sup)* (cons_sup - all_sup))); denominatorYules = ((all_sup * (1 - cons_sup - ant_sup + all_sup)) + ((ant_sup - all_sup)* (cons_sup - all_sup))); if((ant_sup == 0)||(ant_sup == 1)|| (cons_sup == 0)||(cons_sup == 1)||(Math.abs(denominatorYules) <= 0.001)) yulesQ = 0; else yulesQ = numeratorYules/denominatorYules; //compute Certain Factor(CF) CF = 0; if(conf > cons_sup) CF = (conf - cons_sup)/(1-cons_sup); else if(conf < cons_sup) CF = (conf - cons_sup)/(cons_sup); c.setRuleSupport(all_sup); c.setAntecedentSupport(ant_sup); c.setConsequentSupport(cons_sup); c.setRuleConfidence(conf); c.setRuleLift(lift); c.setRuleConv(conv); c.setRuleCF(CF); c.setRuleNetconf(netConf); c.setRuleYulesQ(yulesQ); for (int t=0; t < covered_tids.size(); t++) c.addCoveredTID( covered_tids.get(t) ); } else c.setFitness(this.minFitnessValue); this.trials++; } private double sumInterval(Gene[] genes, ArrayList<Integer> index_list) { double lb, ub, amp, interval, sum_interval = 0.0; int g; for (int i=0; i < index_list.size(); i++) { g = index_list.get(i); lb = genes[g].getLowerBound(); ub = genes[g].getUpperBound(); amp = ub - lb; interval = ( genes[g].getIsPositiveInterval() ) ? amp / this.af : (this.maxAmplitudes[g] - amp) / this.af; sum_interval += interval; } return sum_interval; } private void computeAdjustedFitness(ArrayList<Chromosome> upop) { int m1, m2, i, j, k, t, a, sum_marked; boolean ok; Chromosome c; ArrayList<Integer> ant_attrs, cons_attrs, covered_tids; int[][] marked_attr = new int[this.nTrans][this.nAttr]; for (m1=0; m1 < marked_attr.length; m1++) for (m2=0; m2 < marked_attr[m1].length; m2++) marked_attr[m1][m2] = this.ATTRIBUTE_NOT_COVERED; for (i=0; i < this.uPopSize; i++) { c = upop.get(i); covered_tids = c.getCoveredTIDs(); ant_attrs = c.getIndexOfAntecedentGenes(); cons_attrs = c.getIndexOfConsequentGenes(); for (j=0; j < covered_tids.size(); j++) { t = covered_tids.get(j); for (k=0; k < ant_attrs.size(); k++) { a = ant_attrs.get(k); if ( marked_attr[t][a] == this.ATTRIBUTE_NOT_COVERED ) marked_attr[t][a] = this.ATTRIBUTE_COVERED_BY_ANTECEDENT; else if ( marked_attr[t][a] == this.ATTRIBUTE_COVERED_BY_CONSEQUENT ) marked_attr[t][a] = this.ATTRIBUTE_COVERED_BY_BOTH; } for (k=0; k < cons_attrs.size(); k++) { a = cons_attrs.get(k); if ( marked_attr[t][a] == this.ATTRIBUTE_NOT_COVERED ) marked_attr[t][a] = this.ATTRIBUTE_COVERED_BY_CONSEQUENT; else if ( marked_attr[t][a] == this.ATTRIBUTE_COVERED_BY_ANTECEDENT ) marked_attr[t][a] = this.ATTRIBUTE_COVERED_BY_BOTH; } } } for (i=this.uPopSize; i < upop.size(); i++) { c = upop.get(i); covered_tids = c.getCoveredTIDs(); ant_attrs = c.getIndexOfAntecedentGenes(); cons_attrs = c.getIndexOfConsequentGenes(); sum_marked = 0; for (j=0; j < covered_tids.size(); j++) { t = covered_tids.get(j); ok = true; for (k=0; k < ant_attrs.size() && ok; k++) { a = ant_attrs.get(k); if ( ( marked_attr[t][a] == this.ATTRIBUTE_NOT_COVERED ) || (marked_attr[t][a] == this.ATTRIBUTE_COVERED_BY_CONSEQUENT) ) ok = false; } for (k=0; k < cons_attrs.size() && ok; k++) { a = cons_attrs.get(k); if ( ( marked_attr[t][a] == this.ATTRIBUTE_NOT_COVERED ) || (marked_attr[t][a] == this.ATTRIBUTE_COVERED_BY_ANTECEDENT) ) ok = false; } if (ok) sum_marked++; } if (sum_marked > 0) c.setFitness(c.getFitness() - (this.a5 * sum_marked)); } } private ArrayList<Integer> countSupport(Gene[] genes, ArrayList<Integer> index_list) { ArrayList<Integer> tid_list = new ArrayList<Integer>(); double[][] trans = dataset.getTrueTransactions(); int t, i, g; double lb, ub; boolean ok; for (t=0; t < this.nTrans; t++) { ok = true; for (i=0; i < index_list.size() && ok; i++) { g = index_list.get(i); lb = genes[g].getLowerBound(); ub = genes[g].getUpperBound(); if ( genes[g].getIsPositiveInterval() ) { if ((trans[t][g] < lb) || (trans[t][g] > ub)) ok = false; } else { if ((trans[t][g] >= lb) && (trans[t][g] <= ub)) ok = false; } } if (ok) tid_list.add(t); } return tid_list; } private void adjustIntervals(ArrayList<Chromosome> upop) { int i, g; Chromosome chr; Gene[] genes; ArrayList<Integer> cov_tids; double[][] trans = this.dataset.getTrueTransactions(); for (i=0; i < upop.size(); i++) { chr = upop.get(i); genes = chr.getGenes(); cov_tids = chr.getCoveredTIDs(); for (g=0; g < genes.length; g++) { if ( (this.dataset.getAttributeType(g) != myDataset.NOMINAL) && (genes[g].getActAs() != Gene.NOT_INVOLVED) ) { if ( genes[g].getIsPositiveInterval() ) this.adjustPositiveInterval(genes[g], g, cov_tids, trans); else { if (this.dataset.getAttributeType(g) == myDataset.REAL) this.adjustNegativeInterval(genes[g], g, cov_tids, trans, 0.0001); else this.adjustNegativeInterval(genes[g], g, cov_tids, trans, 1.0); } } } } } private void adjustPositiveInterval(Gene gene, int g, ArrayList<Integer> cov_tids, double[][] trans) { int r, t; double min, max; min = gene.getUpperBound(); max = gene.getLowerBound(); for (r=0; r < cov_tids.size(); r++) { t = cov_tids.get(r); if (trans[t][g] < min) min = trans[t][g]; if (trans[t][g] > max) max = trans[t][g]; } gene.setLowerBound(min); gene.setUpperBound(max); } private void adjustNegativeInterval(Gene gene, int g, ArrayList<Integer> cov_tids, double[][] trans, double delta) { int r, t; double min, max; min = this.dataset.getMax(g) + delta; max = this.dataset.getMin(g) - delta; for (r=0; r < cov_tids.size(); r++) { t = cov_tids.get(r); if ( (trans[t][g] < min) && (trans[t][g] > gene.getUpperBound()) ) min = trans[t][g]; if ( (trans[t][g] > max) && (trans[t][g] < gene.getLowerBound()) ) max = trans[t][g]; } gene.setLowerBound(max + delta); gene.setUpperBound(min - delta); } }