/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Apriori.java * Copyright (C) 1999 Eibe Frank,Mark Hall * */ package weka.associations; import java.io.*; import java.util.*; import weka.core.*; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Remove; /** * Class implementing an Apriori-type algorithm. Iteratively reduces the minimum * support until it finds the required number of rules with the given minimum * confidence. <p> * * Reference: R. Agrawal, R. Srikant (1994). <i>Fast algorithms for * mining association rules in large databases </i>. Proc * International Conference on Very Large Databases, * pp. 478-499. Santiage, Chile: Morgan Kaufmann, Los Altos, CA. <p> * * Valid options are:<p> * * -N required number of rules <br> * The required number of rules (default: 10). <p> * * -T type of metric by which to sort rules <br> * 0 = confidence | 1 = lift | 2 = leverage | 3 = Conviction. <p> * * -C minimum confidence of a rule <br> * The minimum confidence of a rule (default: 0.9). <p> * * -D delta for minimum support <br> * The delta by which the minimum support is decreased in * each iteration (default: 0.05). <p> * * -U upper bound for minimum support <br> * The upper bound for minimum support. Don't explicitly look for * rules with more than this level of support. <p> * * -M lower bound for minimum support <br> * The lower bound for the minimum support (default = 0.1). <p> * * -S significance level <br> * If used, rules are tested for significance at * the given level. Slower (default = no significance testing). <p> * * -R <br> * If set then columns that contain all missing values are removed from * the data. * * -I <br> * If set the itemsets found are also output (default = no). <p> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */ public class Apriori extends Associator implements OptionHandler { /** The minimum support. */ protected double m_minSupport; /** The upper bound on the support */ protected double m_upperBoundMinSupport; /** The lower bound for the minimum support. */ protected double m_lowerBoundMinSupport; /** Metric types. */ protected static final int CONFIDENCE = 0; protected static final int LIFT = 1; protected static final int LEVERAGE = 2; protected static final int CONVICTION = 3; public static final Tag [] TAGS_SELECTION = { new Tag(CONFIDENCE, "Confidence"), new Tag(LIFT, "Lift"), new Tag(LEVERAGE, "Leverage"), new Tag(CONVICTION, "Conviction") }; /** The selected metric type. */ protected int m_metricType = CONFIDENCE; /** The minimum metric score. */ protected double m_minMetric; /** The maximum number of rules that are output. */ protected int m_numRules; /** Delta by which m_minSupport is decreased in each iteration. */ protected double m_delta; /** Significance level for optional significance test. */ protected double m_significanceLevel; /** Number of cycles used before required number of rules was one. */ protected int m_cycles; /** The set of all sets of itemsets L. */ protected FastVector m_Ls; /** The same information stored in hash tables. */ protected FastVector m_hashtables; /** The list of all generated rules. */ protected FastVector[] m_allTheRules; /** The instances (transactions) to be used for generating the association rules. */ protected Instances m_instances; /** Output itemsets found? */ protected boolean m_outputItemSets; protected boolean m_removeMissingCols; /** Report progress iteratively */ protected boolean m_verbose; /** * Returns a string describing this associator * @return a description of the evaluator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Finds association rules."; } /** * Constructor that allows to sets default values for the * minimum confidence and the maximum number of rules * the minimum confidence. */ public Apriori() { resetOptions(); } /** * Resets the options to the default values. */ public void resetOptions() { m_removeMissingCols = false; m_verbose = false; m_delta = 0.05; m_minMetric = 0.90; m_numRules = 10; m_lowerBoundMinSupport = 0.1; m_upperBoundMinSupport = 1.0; m_significanceLevel = -1; m_outputItemSets = false; } /** * Removes columns that are all missing from the data * @param instances the instances * @return a new set of instances with all missing columns removed */ private Instances removeMissingColumns(Instances instances) throws Exception { int numInstances = instances.numInstances(); StringBuffer deleteString = new StringBuffer(); int removeCount = 0; boolean first = true; int maxCount = 0; for (int i=0;i<instances.numAttributes();i++) { AttributeStats as = instances.attributeStats(i); if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { // see if we can decrease this by looking for the most frequent value int [] counts = as.nominalCounts; if (counts[Utils.maxIndex(counts)] > maxCount) { maxCount = counts[Utils.maxIndex(counts)]; } } if (as.missingCount == numInstances) { if (first) { deleteString.append((i+1)); first = false; } else { deleteString.append(","+(i+1)); } removeCount++; } } if (m_verbose) { System.err.println("Removed : "+removeCount+" columns with all missing " +"values."); } if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { m_upperBoundMinSupport = (double)maxCount / (double)numInstances; if (m_verbose) { System.err.println("Setting upper bound min support to : " +m_upperBoundMinSupport); } } if (deleteString.toString().length() > 0) { Remove af = new Remove(); af.setAttributeIndices(deleteString.toString()); af.setInvertSelection(false); af.setInputFormat(instances); Instances newInst = Filter.useFilter(instances, af); return newInst; } return instances; } /** * Method that generates all large itemsets with a minimum support, and from * these all association rules with a minimum confidence. * * @param instances the instances to be used for generating the associations * @exception Exception if rules can't be built successfully */ public void buildAssociations(Instances instances) throws Exception { double[] confidences, supports; int[] indices; FastVector[] sortedRuleSet; int necSupport=0; if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } if (m_removeMissingCols) { instances = removeMissingColumns(instances); } // Decrease minimum support until desired number of rules found. m_cycles = 0; m_minSupport = m_upperBoundMinSupport - m_delta; m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport; do { // Reserve space for variables m_Ls = new FastVector(); m_hashtables = new FastVector(); m_allTheRules = new FastVector[6]; m_allTheRules[0] = new FastVector(); m_allTheRules[1] = new FastVector(); m_allTheRules[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3] = new FastVector(); m_allTheRules[4] = new FastVector(); m_allTheRules[5] = new FastVector(); } sortedRuleSet = new FastVector[6]; sortedRuleSet[0] = new FastVector(); sortedRuleSet[1] = new FastVector(); sortedRuleSet[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3] = new FastVector(); sortedRuleSet[4] = new FastVector(); sortedRuleSet[5] = new FastVector(); } // Find large itemsets and rules findLargeItemSets(instances); if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) findRulesBruteForce(); else findRulesQuickly(); // Sort rules according to their support supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < m_allTheRules[2].size(); i++) supports[i] = (double)((ItemSet)m_allTheRules[1].elementAt(i)).support(); indices = Utils.stableSort(supports); for (int i = 0; i < m_allTheRules[2].size(); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i])); } } // Sort rules according to their confidence m_allTheRules[0].removeAllElements(); m_allTheRules[1].removeAllElements(); m_allTheRules[2].removeAllElements(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].removeAllElements(); m_allTheRules[4].removeAllElements(); m_allTheRules[5].removeAllElements(); } confidences = new double[sortedRuleSet[2].size()]; int sortType = 2 + m_metricType; for (int i = 0; i < sortedRuleSet[2].size(); i++) confidences[i] = ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue(); indices = Utils.stableSort(confidences); for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) { m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i])); m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i])); m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i])); m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i])); m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i])); } } m_minSupport -= m_delta; m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? 0 : m_minSupport; necSupport = (int)(m_minSupport * (double)instances.numInstances()+0.5); m_cycles++; if (m_verbose) { if (m_Ls.size() > 1) { System.out.println(toString()); } } } while ((m_allTheRules[0].size() < m_numRules) && (m_minSupport >= m_lowerBoundMinSupport) /* (necSupport >= lowerBoundNumInstancesSupport)*/ /* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1)); m_minSupport += m_delta; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { String string1 = "\tThe required number of rules. (default = " + m_numRules + ")", string2 = "\tThe minimum confidence of a rule. (default = " + m_minMetric + ")", string3 = "\tThe delta by which the minimum support is decreased in\n", string4 = "\teach iteration. (default = " + m_delta + ")", string5 = "\tThe lower bound for the minimum support. (default = " + m_lowerBoundMinSupport + ")", string6 = "\tIf used, rules are tested for significance at\n", string7 = "\tthe given level. Slower. (default = no significance testing)", string8 = "\tIf set the itemsets found are also output. (default = no)", stringType = "\tThe metric type by which to rank rules. (default = " +"confidence)"; FastVector newVector = new FastVector(9); newVector.addElement(new Option(string1, "N", 1, "-N <required number of rules output>")); newVector.addElement(new Option(stringType, "T", 1, "-T <0=confidence | 1=lift | " +"2=leverage | 3=Conviction>")); newVector.addElement(new Option(string2, "C", 1, "-C <minimum metric score of a rule>")); newVector.addElement(new Option(string3 + string4, "D", 1, "-D <delta for minimum support>")); newVector.addElement(new Option("\tUpper bound for minimum support. " +"(default = 1.0)", "U", 1, "-U <upper bound for minimum support>")); newVector.addElement(new Option(string5, "M", 1, "-M <lower bound for minimum support>")); newVector.addElement(new Option(string6 + string7, "S", 1, "-S <significance level>")); newVector.addElement(new Option(string8, "S", 0, "-I")); newVector.addElement(new Option("\tRemove columns that contain " +"all missing values (default = no)" , "R", 0, "-R")); newVector.addElement(new Option("\tReport progress iteratively. (default " +"= no)", "V", 0, "-V")); return newVector.elements(); } /** * Parses a given list of options. Valid options are:<p> * * -N required number of rules <br> * The required number of rules (default: 10). <p> * * -T type of metric by which to sort rules <br> * 0 = confidence | 1 = lift | 2 = leverage | 3 = Conviction. <p> * * -C minimum metric score of a rule <br> * The minimum confidence of a rule (default: 0.9). <p> * * -D delta for minimum support <br> * The delta by which the minimum support is decreased in * each iteration (default: 0.05). * * -U upper bound for minimum support <br> * The upper bound for minimum support. Don't explicitly look for * rules with more than this level of support. <p> * * -M lower bound for minimum support <br> * The lower bound for the minimum support (default = 0.1). <p> * * -S significance level <br> * If used, rules are tested for significance at * the given level. Slower (default = no significance testing). <p> * * -I <br> * If set the itemsets found are also output (default = no). <p> * * -V <br> * If set then progress is reported iteratively during execution. <p> * * -R <br> * If set then columns that contain all missing values are removed from * the data. <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { resetOptions(); String numRulesString = Utils.getOption('N', options), minConfidenceString = Utils.getOption('C', options), deltaString = Utils.getOption('D', options), maxSupportString = Utils.getOption('U', options), minSupportString = Utils.getOption('M', options), significanceLevelString = Utils.getOption('S', options); String metricTypeString = Utils.getOption('T', options); if (metricTypeString.length() != 0) { setMetricType(new SelectedTag(Integer.parseInt(metricTypeString), TAGS_SELECTION)); } if (numRulesString.length() != 0) { m_numRules = Integer.parseInt(numRulesString); } if (minConfidenceString.length() != 0) { m_minMetric = (new Double(minConfidenceString)).doubleValue(); } if (deltaString.length() != 0) { m_delta = (new Double(deltaString)).doubleValue(); } if (maxSupportString.length() != 0) { setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue()); } if (minSupportString.length() != 0) { m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue(); } if (significanceLevelString.length() != 0) { m_significanceLevel = (new Double(significanceLevelString)).doubleValue(); } m_outputItemSets = Utils.getFlag('I', options); m_verbose = Utils.getFlag('V', options); setRemoveAllMissingCols(Utils.getFlag('R', options)); } /** * Gets the current settings of the Apriori object. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [16]; int current = 0; if (m_outputItemSets) { options[current++] = "-I"; } if (getRemoveAllMissingCols()) { options[current++] = "-R"; } options[current++] = "-N"; options[current++] = "" + m_numRules; options[current++] = "-T"; options[current++] = "" + m_metricType; options[current++] = "-C"; options[current++] = "" + m_minMetric; options[current++] = "-D"; options[current++] = "" + m_delta; options[current++] = "-U"; options[current++] = ""+m_upperBoundMinSupport; options[current++] = "-M"; options[current++] = ""+m_lowerBoundMinSupport; options[current++] = "-S"; options[current++] = "" + m_significanceLevel; while (current < options.length) { options[current++] = ""; } return options; } /** * Outputs the size of all the generated sets of itemsets and the rules. */ public String toString() { StringBuffer text = new StringBuffer(); if (m_Ls.size() <= 1) return "\nNo large itemsets and rules found!\n"; text.append("\nApriori\n=======\n\n"); text.append("Minimum support: " + Utils.doubleToString(m_minSupport,2) + '\n'); text.append("Minimum metric <"); switch(m_metricType) { case CONFIDENCE: text.append("confidence>: "); break; case LIFT: text.append("lift>: "); break; case LEVERAGE: text.append("leverage>: "); break; case CONVICTION: text.append("conviction>: "); break; } text.append(Utils.doubleToString(m_minMetric,2)+'\n'); if (m_significanceLevel != -1) text.append("Significance level: "+ Utils.doubleToString(m_significanceLevel,2)+'\n'); text.append("Number of cycles performed: " + m_cycles+'\n'); text.append("\nGenerated sets of large itemsets:\n"); for (int i = 0; i < m_Ls.size(); i++) { text.append("\nSize of set of large itemsets L("+(i+1)+"): "+ ((FastVector)m_Ls.elementAt(i)).size()+'\n'); if (m_outputItemSets) { text.append("\nLarge Itemsets L("+(i+1)+"):\n"); for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++) text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)). toString(m_instances)+"\n"); } } text.append("\nBest rules found:\n\n"); for (int i = 0; i < m_allTheRules[0].size(); i++) { text.append(Utils.doubleToString((double)i+1, (int)(Math.log(m_numRules)/Math.log(10)+1),0)+ ". " + ((ItemSet)m_allTheRules[0].elementAt(i)). toString(m_instances) + " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)). toString(m_instances) +" conf:("+ Utils.doubleToString(((Double)m_allTheRules[2]. elementAt(i)).doubleValue(),2)+")"); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { text.append((m_metricType == LIFT ? " <" : "")+" lift:("+ Utils.doubleToString(((Double)m_allTheRules[3]. elementAt(i)).doubleValue(),2) +")"+(m_metricType == LIFT ? ">" : "")); text.append((m_metricType == LEVERAGE ? " <" : "")+" lev:("+ Utils.doubleToString(((Double)m_allTheRules[4]. elementAt(i)).doubleValue(),2) +")"); text.append(" ["+ (int)(((Double)m_allTheRules[4].elementAt(i)) .doubleValue() * (double)m_instances.numInstances()) +"]"+(m_metricType == LEVERAGE ? ">" : "")); text.append((m_metricType == CONVICTION ? " <" : "")+" conv:("+ Utils.doubleToString(((Double)m_allTheRules[5]. elementAt(i)).doubleValue(),2) +")"+(m_metricType == CONVICTION ? ">" : "")); } text.append('\n'); } return text.toString(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String removeAllMissingColsTipText() { return "Remove columns with all missing values."; } /** * Remove columns containing all missing values. * @param r true if cols are to be removed. */ public void setRemoveAllMissingCols(boolean r) { m_removeMissingCols = r; } /** * Returns whether columns containing all missing values are to be removed * @return true if columns are to be removed. */ public boolean getRemoveAllMissingCols() { return m_removeMissingCols; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String upperBoundMinSupportTipText() { return "Upper bound for minimum support. Start iteratively decreasing " +"minimum support from this value."; } /** * Get the value of upperBoundMinSupport. * * @return Value of upperBoundMinSupport. */ public double getUpperBoundMinSupport() { return m_upperBoundMinSupport; } /** * Set the value of upperBoundMinSupport. * * @param v Value to assign to upperBoundMinSupport. */ public void setUpperBoundMinSupport(double v) { m_upperBoundMinSupport = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String lowerBoundMinSupportTipText() { return "Lower bound for minimum support."; } /** * Get the value of lowerBoundMinSupport. * * @return Value of lowerBoundMinSupport. */ public double getLowerBoundMinSupport() { return m_lowerBoundMinSupport; } /** * Set the value of lowerBoundMinSupport. * * @param v Value to assign to lowerBoundMinSupport. */ public void setLowerBoundMinSupport(double v) { m_lowerBoundMinSupport = v; } /** * Get the metric type * * @return the type of metric to use for ranking rules */ public SelectedTag getMetricType() { return new SelectedTag(m_metricType, TAGS_SELECTION); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String metricTypeTipText() { return "Set the type of metric by which to rank rules. Confidence is " +"the proportion of the examples covered by the premise that are also " +"covered by the consequence. Lift is confidence divided by the " +"proportion of all examples that are covered by the consequence. This " +"is a measure of the importance of the association that is independent " +"of support. Leverage is the proportion of additional examples covered " +"by both the premise and consequence above those expected if the " +"premise and consequence were independent of each other. The total " +"number of examples that this represents is presented in brackets " +"following the leverage. Conviction is " +"another measure of departure from independence. Conviction is given " +"by "; } /** * Set the metric type for ranking rules * * @param d the type of metric */ public void setMetricType (SelectedTag d) { if (d.getTags() == TAGS_SELECTION) { m_metricType = d.getSelectedTag().getID(); } if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) { m_metricType = CONFIDENCE; } if (m_metricType == CONFIDENCE) { setMinMetric(0.9); } if (m_metricType == LIFT || m_metricType == CONVICTION) { setMinMetric(1.1); } if (m_metricType == LEVERAGE) { setMinMetric(0.1); } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String minMetricTipText() { return "Minimum metric score. Consider only rules with scores higher than " +"this value."; } /** * Get the value of minConfidence. * * @return Value of minConfidence. */ public double getMinMetric() { return m_minMetric; } /** * Set the value of minConfidence. * * @param v Value to assign to minConfidence. */ public void setMinMetric(double v) { m_minMetric = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numRulesTipText() { return "Number of rules to find."; } /** * Get the value of numRules. * * @return Value of numRules. */ public int getNumRules() { return m_numRules; } /** * Set the value of numRules. * * @param v Value to assign to numRules. */ public void setNumRules(int v) { m_numRules = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String deltaTipText() { return "Iteratively decrease support by this factor. Reduces support " +"until min support is reached or required number of rules has been " +"generated."; } /** * Get the value of delta. * * @return Value of delta. */ public double getDelta() { return m_delta; } /** * Set the value of delta. * * @param v Value to assign to delta. */ public void setDelta(double v) { m_delta = v; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String significanceLevelTipText() { return "Significance level. Significance test (confidence metric only)."; } /** * Get the value of significanceLevel. * * @return Value of significanceLevel. */ public double getSignificanceLevel() { return m_significanceLevel; } /** * Set the value of significanceLevel. * * @param v Value to assign to significanceLevel. */ public void setSignificanceLevel(double v) { m_significanceLevel = v; } /** * Method that finds all large itemsets for the given set of instances. * * @param the instances to be used * @exception Exception if an attribute is numeric */ private void findLargeItemSets(Instances instances) throws Exception { FastVector kMinusOneSets, kSets; Hashtable hashtable; int necSupport, necMaxSupport,i = 0; m_instances = instances; // Find large itemsets // minimum support necSupport = (int)(m_minSupport * (double)instances.numInstances()+0.5); necMaxSupport = (int)(m_upperBoundMinSupport * (double)instances.numInstances()+0.5); kSets = ItemSet.singletons(instances); ItemSet.upDateCounters(kSets, instances); kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport); if (kSets.size() == 0) return; do { m_Ls.addElement(kSets); kMinusOneSets = kSets; kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i, instances.numInstances()); hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size()); m_hashtables.addElement(hashtable); kSets = ItemSet.pruneItemSets(kSets, hashtable); ItemSet.upDateCounters(kSets, instances); kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport); i++; } while (kSets.size() > 0); } /** * Method that finds all association rules and performs significance test. * * @exception Exception if an attribute is numeric */ private void findRulesBruteForce() throws Exception { FastVector[] rules; // Build rules for (int j = 1; j < m_Ls.size(); j++) { FastVector currentItemSets = (FastVector)m_Ls.elementAt(j); Enumeration enumItemSets = currentItemSets.elements(); while (enumItemSets.hasMoreElements()) { ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement(); rules=currentItemSet. generateRulesBruteForce(m_minMetric,m_metricType, m_hashtables,j+1, m_instances.numInstances(), m_significanceLevel); for (int k = 0; k < rules[0].size(); k++) { m_allTheRules[0].addElement(rules[0].elementAt(k)); m_allTheRules[1].addElement(rules[1].elementAt(k)); m_allTheRules[2].addElement(rules[2].elementAt(k)); m_allTheRules[3].addElement(rules[3].elementAt(k)); m_allTheRules[4].addElement(rules[4].elementAt(k)); m_allTheRules[5].addElement(rules[5].elementAt(k)); } } } } /** * Method that finds all association rules. * * @exception Exception if an attribute is numeric */ private void findRulesQuickly() throws Exception { FastVector[] rules; // Build rules for (int j = 1; j < m_Ls.size(); j++) { FastVector currentItemSets = (FastVector)m_Ls.elementAt(j); Enumeration enumItemSets = currentItemSets.elements(); while (enumItemSets.hasMoreElements()) { ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement(); rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1); for (int k = 0; k < rules[0].size(); k++) { m_allTheRules[0].addElement(rules[0].elementAt(k)); m_allTheRules[1].addElement(rules[1].elementAt(k)); m_allTheRules[2].addElement(rules[2].elementAt(k)); } } } } /** * Main method for testing this class. */ public static void main(String[] options) { String trainFileString; StringBuffer text = new StringBuffer(); Apriori apriori = new Apriori(); Reader reader; try { text.append("\n\nApriori options:\n\n"); text.append("-t <training file>\n"); text.append("\tThe name of the training file.\n"); Enumeration enum = apriori.listOptions(); while (enum.hasMoreElements()) { Option option = (Option)enum.nextElement(); text.append(option.synopsis()+'\n'); text.append(option.description()+'\n'); } trainFileString = Utils.getOption('t', options); if (trainFileString.length() == 0) throw new Exception("No training file given!"); apriori.setOptions(options); reader = new BufferedReader(new FileReader(trainFileString)); apriori.buildAssociations(new Instances(reader)); System.out.println(apriori); } catch(Exception e) { e.printStackTrace(); System.out.println("\n"+e.getMessage()+text); } } }