/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.UnsupervisedLearning.AssociationRules.IntervalRuleLearning.MOEA_Ghosh; import java.io.PrintWriter; import java.math.BigDecimal; import java.util.*; import org.core.Randomize; public class MOEA_GoshProcess { /** * <p> * It provides the implementation of the algorithm to be run in a process * </p> */ private String paretos; private myDataset dataset; private int nTrials; private int pointCrossover; private int numObjectives; private double pc; private double pm; private double af; private int uPopSize; private int nAttr; private int nTrans; private int trials; private int max_rank; private ArrayList<Chromosome> uPop; private ArrayList<Chromosome> child_pop; private ArrayList<Chromosome> separate_pop; /** * <p> * It creates a new process for the algorithm by setting up its parameters * </p> * @param dataset The instance of the dataset for dealing with its records * @param nTrials The maximum number of generations to reach before completing the whole evolutionary learning * @param popSize The number of chromosomes in the population * @param pc The probability for the crossover operator * @param pointCrossover The number of point crossover to uses in crossover operator * @param pm The probability for the mutation operator * @param af The factor of amplitude for each of the dataset attribute */ //variant 1 public MOEA_GoshProcess (myDataset dataset, int numObjectives, int nTrials, int popSize, int pointCrossover, double pc, double pm, double af) { this.dataset = dataset; this.nTrials = nTrials; this.uPopSize = popSize; this.pointCrossover = pointCrossover; this.pc = pc; this.pm = pm; this.af = af; this.numObjectives = numObjectives; this.nAttr = this.dataset.getnVars(); this.nTrans = this.dataset.getnTrans(); this.trials = 0; this.paretos = new String(""); this.uPop = new ArrayList<Chromosome>(); this.child_pop = new ArrayList<Chromosome>(); this.separate_pop = new ArrayList<Chromosome>(); } /** * <p> * It runs the evolutionary learning for mining association rules * </p> */ public void run(){ int nGn = 0; int rescue = 10000; this.trials = 0; this.paretos = new String(""); this.initializePopulation(); do { System.out.println("Computing Generation " + (nGn + 1) + "... "); this.assign_rank(this.uPop); this.assign_fitness(); this.update_separate_pop(); this.selection(); this.genetic_operators(); nGn++; if (this.trials > rescue && rescue < this.nTrials) { rescue += 10000; } } while (this.trials < this.nTrials); printPareto(); System.out.println("done.\n"); System.out.println("Number of trials = " + this.trials + "\n"); } private void initializePopulation() { int i, j, tmp, pos, operator; double value; double[] example; int[] sample; ArrayList<Integer> tr_not_marked; Gene[] rnd_genes; Chromosome chromo; this.uPop.clear(); tr_not_marked = new ArrayList<Integer> (); rnd_genes = new Gene[this.nAttr]; sample = new int[this.nAttr]; this.trials = 0; for(i=0; i < this.nAttr; i++) rnd_genes[i] = new Gene(); for(i=0; i < this.nAttr; i++) sample[i] = i; for(i=0; i < this.nTrans; i++) tr_not_marked.add(i); while(this.uPop.size() <= this.uPopSize){ if(tr_not_marked.size() == 0) for(i=0; i<this.nTrans; i++) tr_not_marked.add(i); pos = tr_not_marked.get(Randomize.Randint(0, tr_not_marked.size())); example = this.dataset.getExample(pos); for(i=0; i < this.nAttr; i++) { j = Randomize.Randint(0, this.nAttr); tmp = sample[i]; sample[i] = sample[j]; sample[j] = tmp; } for (i=0; i < nAttr; i++) { rnd_genes[sample[i]].setAttr (sample[i]); rnd_genes[sample[i]].setActAs (Randomize.RandintClosed(Gene.NOT_INVOLVED, Gene.CONSEQUENT)); value = example[sample[i]]; if (this.dataset.getAttributeType(sample[i]) != myDataset.NOMINAL) { if (this.dataset.getAttributeType(sample[i]) == myDataset.REAL) { operator = Randomize.RandintClosed(Gene.MINOR, Gene.MAJOR); } else { operator = Randomize.RandintClosed(Gene.MINOR, Gene.MAJOR); } } else operator = Randomize.RandintClosed(Gene.EQUAL, Gene.UNEQUAL); rnd_genes[sample[i]].setValue(value); rnd_genes[sample[i]].setOperator(operator); } chromo = new Chromosome(rnd_genes, this.numObjectives); chromo.forceConsistency(); chromo.computeObjetives (this.dataset); this.trials++; if(!equalChromotoPop(chromo, this.uPop)) { this.uPop.add(chromo); this.deleteTransCovered(chromo, tr_not_marked); } } } private void deleteTransCovered (Chromosome chromo, ArrayList<Integer> tr_not_marked){ int i; double [] example; for (i = tr_not_marked.size()-1; i >= 0; i--) { example = this.dataset.getExample(tr_not_marked.get(i)); if(chromo.isCovered(example)) tr_not_marked.remove(i); } } private boolean equalChromotoPop(Chromosome chromo, ArrayList<Chromosome> pop){ int i; boolean value; Chromosome aux; value = false; for (i=0; (!value) && (i < pop.size()); i++) { aux = pop.get(i); if(chromo.equals(aux)) value = true; } return value; } /** * Selection * */ private void selection(){ Chromosome chr; List<LimitRoulette> Listlimit; this.child_pop.clear(); Listlimit = this.getLimitRoulette(); while (this.child_pop.size() < this.uPopSize) { chr = this.roulette(Listlimit); this.child_pop.add(chr); } this.uPop.clear(); this.uPop = (ArrayList<Chromosome>) this.child_pop.clone(); } private void genetic_operators(){ int i; Chromosome dad, mom; ArrayList <Chromosome> children; //crossover for(i=0; i<this.uPop.size(); i+=2){ dad = this.uPop.get(i); mom = this.uPop.get(i+1); if (Randomize.Rand() < this.pc){ children = multipoint_crossover(dad, mom); this.uPop.set(i, children.get(0)); this.uPop.set(i+1,children.get(1)); } } } /** * Routine to perform non-dominated sorting * @param mixed_pop Mixed population * @param new_pop new population */ private void update_separate_pop (){ boolean finish_rank = false; int i; Collections.sort(this.uPop); for(i=0; i<this.uPop.size()&& !finish_rank;i++){ if(this.uPop.get(i).getRank() == 1){ if(!equalChromotoPop(this.uPop.get(i),this.separate_pop)) this.separate_pop.add(this.uPop.get(i).copy()); } else finish_rank = true; } this.assign_rank(this.separate_pop); finish_rank = false; Collections.sort(this.separate_pop); for(i= this.separate_pop.size()-1; i > 0 && !finish_rank;i--){ if(this.separate_pop.get(i).getRank() != 1){ this.separate_pop.remove(i); } else finish_rank = true; } } /** * Exchange the genes of the father and mother according crossing points at intervals * odd ex.> 1-2, 3-4.... * @param dad * @param mom */ private ArrayList<Chromosome> multipoint_crossover (Chromosome dad, Chromosome mom) { Gene[] genesSon1; Gene[] genesSon2; Chromosome son1, son2; genesSon1 = new Gene[this.nAttr]; genesSon2 = new Gene[this.nAttr]; ArrayList<Integer> crosspoints = cross_points(); ArrayList<Chromosome> children = new ArrayList<Chromosome>(); int posini = 0; int posfin = crosspoints.get(0); boolean crossover = true; for (int i = 0;i < crosspoints.size();i++) { if (crossover) { for (int j = posini; j < posfin; j++) { genesSon1[j] = dad.getGene(j).copy(); genesSon2[j] = mom.getGene(j).copy(); } } else { for (int j = posini; j < posfin; j++) { genesSon2[j] = dad.getGene(j).copy(); genesSon1[j] = mom.getGene(j).copy(); } } if(i < crosspoints.size()-1){ crossover = !crossover; posini = crosspoints.get(i); posfin = crosspoints.get(i+1); } } son1 = new Chromosome(genesSon1, this.numObjectives); son2 = new Chromosome(genesSon2, this.numObjectives); if (Randomize.Rand() < this.pm) this.mutate (son1); if (Randomize.Rand() < this.pm) this.mutate (son2); son1.forceConsistency(); son2.forceConsistency(); son1.computeObjetives (this.dataset); son2.computeObjetives (this.dataset); this.trials += 2; children.add(son1); children.add(son2); return children; } private ArrayList<Integer> cross_points(){ Integer num; ArrayList<Integer>points = new ArrayList<Integer>(); for (int i = 0; i< this.pointCrossover;i++){ for (num = Randomize.Randint(0, this.nAttr); points.contains(num); num = Randomize.Randint(0, this.nAttr)); int j = 0; boolean found = false; while ((j<i)&&(!found)) { if(points.get(j) > num){ found = true; } else j++; } points.add(j,num); } points.add(this.nAttr); return points; } private void mutate (Chromosome chr) { int i; double type_attr, min_attr, max_attr; Gene gene; i = Randomize.Randint(0, this.nAttr); gene = chr.getGene(i); type_attr = this.dataset.getAttributeType(i); min_attr = this.dataset.getMin(i); max_attr = this.dataset.getMax(i); if (type_attr != myDataset.NOMINAL) { if (type_attr == myDataset.REAL) { gene.setValue(Randomize.RanddoubleClosed (min_attr, max_attr)); } else { gene.setValue(Randomize.RandintClosed((int)min_attr, (int)max_attr)); } gene.setOperator(gene.randOperatorNumeric()); } else { gene.setValue(Randomize.RandintClosed((int)min_attr, (int)max_attr)); gene.setOperator(gene.randOperatorNominal()); } gene.setActAs (gene.randAct()); chr.forceConsistency(); chr.computeObjetives(this.dataset); } private List<LimitRoulette> getLimitRoulette(){ double totalWeight = 0,probF; for (int i = 0; i < this.uPop.size(); i++) { totalWeight = this.uPop.get(i).getFitness_rank() + totalWeight; } List<Double> listProb = new ArrayList<Double>(); for (int i = 0; i < this.uPop.size(); i++) { probF = this.uPop.get(i).getFitness_rank() / totalWeight; listProb.add(probF); } List<LimitRoulette> listLimit = new ArrayList<LimitRoulette>(); double limitHigh = 0; double limitLow = 0; for (int i = 0; i < listProb.size(); i++) { LimitRoulette limitRoulette = new LimitRoulette(); limitHigh = listProb.get(i) + limitHigh; limitRoulette.setLimitHigh(limitHigh); limitRoulette.setLimitLow(limitLow); limitLow = limitHigh; limitRoulette.setChromosome(this.uPop.get(i)); listLimit.add(limitRoulette); } return listLimit; } private Chromosome roulette(List<LimitRoulette> listLimit) { double numbAleatory = Randomize.Rand(); boolean find = false; int i = 0; while ((find == false) && (i < listLimit.size())){ if((listLimit.get(i).getLimitLow() <= numbAleatory) && (numbAleatory <= listLimit.get(i).getLimitHigh())) find = true; else i++; } return listLimit.get(i).getChromosome(); } /** * Routine for usual non-domination checking * @param a chromosome a * @param b chromosome b * @return 1 if a dominates b, -1 if b dominates a and 0 if both a and b are non-dominated */ private int check_dominance (Chromosome a, Chromosome b){ int i; int flag1; int flag2; flag1 = 0; flag2 = 0; for (i=0; i<this.numObjectives; i++){ if (a.getObjective(i) > b.getObjective(i)) flag1 = 1; else if (a.getObjective(i) < b.getObjective(i)) flag2 = 1; } if ((flag1 == 1) && (flag2 == 0)) return (1); else if ((flag1 == 0) && (flag2 == 1)) return (-1); else return (0); } /** * Function to assign rank to a population of size pop_size * @param new_pop population */ private void assign_rank (ArrayList<Chromosome> new_pop){ int flag; int i; int end; int front_size; int rank=1; Lists orig; Lists cur; Lists temp1, temp2; orig = new Lists(); cur = new Lists(); front_size = 0; temp1 = orig; for (i=0; i< new_pop.size(); i++) { temp1.insert (temp1,i); temp1 = temp1.child; } do { if (orig.child.child == null) { new_pop.get(orig.child.index).rank = rank; break; } temp1 = orig.child; temp1.insert (cur, temp1.index); front_size = 1; temp2 = cur.child; temp1 = temp1.del (temp1); temp1 = temp1.child; do { temp2 = cur.child; do { end = 0; flag = check_dominance (new_pop.get(temp1.index), new_pop.get(temp2.index)); if (flag == 1) { temp1.insert (orig, temp2.index); temp2 = temp2.del (temp2); front_size--; temp2 = temp2.child; } if (flag == 0) { temp2 = temp2.child; } if (flag == -1) { end = 1; } } while (end!=1 && temp2!=null); if (flag == 0 || flag == 1) { temp1.insert (cur, temp1.index); front_size++; temp1 = temp1.del (temp1); } temp1 = temp1.child; } while (temp1 != null); temp2 = cur.child; do { new_pop.get(temp2.index).rank = rank; temp2 = temp2.child; } while (temp2 != null); temp2 = cur.child; do { temp2 = temp2.del (temp2); temp2 = temp2.child; } while (cur.child !=null); rank++; } while (orig.child != null); this.max_rank = rank; return; } private void assign_fitness(){ int i; for(i=0; i<this.uPop.size(); i++){ this.uPop.get(i).computeFitness(this.max_rank); } } public ArrayList<AssociationRule> generateRulesPareto() { int i; boolean stop; Chromosome chromo; ArrayList<AssociationRule> rulesPareto = new ArrayList<AssociationRule>(); stop = false; for (i=0; i < this.separate_pop.size() && !stop; i++) { chromo = this.separate_pop.get(i); if (chromo.getRank() < 2) { rulesPareto.add (new AssociationRule(chromo)); } else stop = true; } return rulesPareto; } static double roundDouble(double number, int decimalPlace){ double numberRound; if(!Double.isInfinite(number)&&(!Double.isNaN(number))){ BigDecimal bd = new BigDecimal(number); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_UP); numberRound = bd.doubleValue(); return numberRound; }else return number; } public void saveReport(ArrayList<AssociationRule> rules,PrintWriter w) { int i, j, r, cnt_cov_rec; double avg_yulesQ = 0.0, avg_sup = 0.0, avg_conf = 0.0, avg_ant_length = 0.0, avg_lift = 0.0, avg_conv = 0.0, avg_CF = 0.0, avg_netConf = 0.0; int[] covered; AssociationRule rule; covered = new int[this.nTrans]; for (i=0; i < this.nTrans; i++) covered[i] = 0; for (r=0; r < rules.size(); r++) { rule = rules.get(r); avg_sup += rule.getSupport(); avg_conf += rule.getConfidence(); avg_lift += rule.getLift(); avg_ant_length += (rule.getnAnts()+rule.getConsequents().size()); avg_conv += rule.getConv(); avg_CF += rule.getCF(); avg_netConf += rule.getNetConf(); avg_yulesQ += rule.getYulesQ(); for (j=0; j < this.nTrans; j++) { if (covered[j] < 1) { if (rule.isCovered(this.dataset.getExample(j))) covered[j] = 1; } } } cnt_cov_rec = 0; for (i=0; i < this.nTrans; i++) cnt_cov_rec += covered[i]; w.println("\nNumber of Frequent Itemsets found:\"" + "-"); System.out.println("\nNumber of Frequent Itemsets found: " + "-"); w.println("\nNumber of Association Rules generated:\"" + rules.size()); System.out.println("\nNumber of Association Rules generated: " + rules.size()); if (! rules.isEmpty()) { w.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); System.out.println("Average Support: " + roundDouble(( avg_sup / rules.size() ),2)); w.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); System.out.println("Average Confidence: " + roundDouble(( avg_conf / rules.size() ),2)); w.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); System.out.println("Average Lift: " + roundDouble(( avg_lift / rules.size() ),2)); w.println("Average Conviction: " + roundDouble(( avg_conv / rules.size() ),2)); System.out.println("Average Conviction: " + roundDouble(( avg_conv/ rules.size() ),2)); w.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size() ),2)); System.out.println("Average Certain Factor: " + roundDouble(( avg_CF/ rules.size()),2)); w.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size() ),2)); System.out.println("Average Netconf: " + roundDouble(( avg_netConf/ rules.size()),2)); w.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size() ),2)); System.out.println("Average YulesQ: " + roundDouble(( avg_yulesQ/ rules.size()),2)); w.println("Average Number of Antecedents: " + roundDouble((avg_ant_length / rules.size() ),2)); System.out.println("Average Number of Antecedents: " + roundDouble(( avg_ant_length / rules.size() ),2)); w.println("Number of Covered Records (%): " + roundDouble((100.0 * cnt_cov_rec) / this.nTrans, 2)); System.out.println("Number of Covered Records (%): " + roundDouble((100.0 * cnt_cov_rec) / this.nTrans, 2)); } else System.out.println("No Statistics."); } public void printPareto() { int i; boolean stop; Chromosome chromo; stop = false; this.paretos += ""; this.paretos += ("Support\tantecedent_support\tconsequent_support\tConfidence\tLift\tConv\tCF\tNetConf\tYulesQ\tnAttributes\n"); for (i=0; i < this.separate_pop.size() && !stop; i++) { chromo = this.separate_pop.get(i); if (chromo.getRank() < 2) { this.paretos += ("" + roundDouble(chromo.getSupport(),2) + "\t" + roundDouble(chromo.getAntsSupport(),2) + "\t" + roundDouble(chromo.getConsSupport(),2) + "\t" + roundDouble(chromo.getConfidence(),2) + "\t" + roundDouble(chromo.getLift(),2) + "\t" + roundDouble(chromo.getConv(),2) + "\t" + roundDouble(chromo.getCF(),2) + "\t" + roundDouble(chromo.getNetConf(),2) + "\t" + roundDouble(chromo.getYulesQ(),2) + "\t" + (chromo.getnAnts()+1) + "\n"); } else stop = true; } } public String getParetos() { return (this.paretos); } }