/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * HoeffdingTree.java * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand * */ package weka.classifiers.trees; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.List; import java.util.Vector; import weka.classifiers.AbstractClassifier; import weka.classifiers.UpdateableClassifier; import weka.classifiers.trees.ht.ActiveHNode; import weka.classifiers.trees.ht.GiniSplitMetric; import weka.classifiers.trees.ht.HNode; import weka.classifiers.trees.ht.InactiveHNode; import weka.classifiers.trees.ht.InfoGainSplitMetric; import weka.classifiers.trees.ht.LeafNode; import weka.classifiers.trees.ht.LearningNode; import weka.classifiers.trees.ht.NBNode; import weka.classifiers.trees.ht.NBNodeAdaptive; import weka.classifiers.trees.ht.SplitCandidate; import weka.classifiers.trees.ht.SplitMetric; import weka.classifiers.trees.ht.SplitNode; import weka.core.Attribute; import weka.core.Capabilities; import weka.core.Capabilities.Capability; import weka.core.CapabilitiesHandler; import weka.core.Drawable; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.Tag; import weka.core.TechnicalInformation; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; import weka.core.TechnicalInformationHandler; import weka.core.Utils; import weka.core.WeightedInstancesHandler; /** <!-- globalinfo-start --> * A Hoeffding tree (VFDT) is an incremental, anytime * decision tree induction algorithm that is capable of learning from massive * data streams, assuming that the distribution generating examples does not * change over time. Hoeffding trees exploit the fact that a small sample can * often be enough to choose an optimal splitting attribute. This idea is * supported mathematically by the Hoeffding bound, which quantifies the number * of observations (in our case, examples) needed to estimate some statistics * within a prescribed precision (in our case, the goodness of an attribute).<br/> * <br/> * A theoretically appealing feature of Hoeffding Trees not shared by * otherincremental decision tree learners is that it has sound guarantees of * performance. Using the Hoeffding bound one can show that its output is * asymptotically nearly identical to that of a non-incremental learner using * infinitely many examples. For more information see: <br/> * <br/> * Geoff Hulten, Laurie Spencer, Pedro Domingos: Mining time-changing data * streams. In: ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining, * 97-106, 2001. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * * <pre> * @inproceedings{Hulten2001, * author = {Geoff Hulten and Laurie Spencer and Pedro Domingos}, * booktitle = {ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining}, * pages = {97-106}, * publisher = {ACM Press}, * title = {Mining time-changing data streams}, * year = {2001} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: * <p/> * * <pre> * -L * The leaf prediction strategy to use. 0 = majority class, 1 = naive Bayes, 2 = naive Bayes adaptive. * (default = 0) * </pre> * * <pre> * -S * The splitting criterion to use. 0 = Gini, 1 = Info gain * (default = 0) * </pre> * * <pre> * -E * The allowable error in a split decision - values closer to zero will take longer to decide * (default = 1e-7) * </pre> * * <pre> * -H * Threshold below which a split will be forced to break ties * (default = 0.05) * </pre> * * <pre> * -M * Minimum fraction of weight required down at least two branches for info gain splitting * (default = 0.01) * </pre> * * <pre> * -G * Grace period - the number of instances a leaf should observe between split attempts * (default = 200) * </pre> * * <pre> * -N * The number of instances (weight) a leaf should observe before allowing naive Bayes to make predictions (NB or NB adaptive only) * (default = 0) * </pre> * * <pre> * -P * Print leaf models when using naive Bayes at the leaves. * </pre> * <!-- options-end --> * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @author Mark Hall (mhall{[at]}pentaho{[dot]}com) * @version $Revision: 9766 $ */ public class HoeffdingTree extends AbstractClassifier implements UpdateableClassifier, WeightedInstancesHandler, OptionHandler, CapabilitiesHandler, RevisionHandler, TechnicalInformationHandler, Drawable, Serializable { /** * For serialization */ private static final long serialVersionUID = 7117521775722396251L; protected Instances m_header; protected HNode m_root; /** The number of instances a leaf should observe between split attempts */ protected double m_gracePeriod = 200; /** * The allowable error in a split decision. Values closer to zero will take * longer to decide */ protected double m_splitConfidence = 0.0000001; /** Threshold below which a split will be forced to break ties */ protected double m_hoeffdingTieThreshold = 0.05; /** * The minimum fraction of weight required down at least two branches for info * gain splitting */ protected double m_minFracWeightForTwoBranchesGain = 0.01; /** The splitting metric to use */ protected int m_selectedSplitMetric = INFO_GAIN_SPLIT; protected SplitMetric m_splitMetric = new InfoGainSplitMetric( m_minFracWeightForTwoBranchesGain); /** The leaf prediction strategy to use */ protected int m_leafStrategy = LEAF_NB_ADAPTIVE; /** * The number of instances (total weight) a leaf should observe before * allowing naive Bayes to make predictions */ protected double m_nbThreshold = 0; protected int m_activeLeafCount; protected int m_inactiveLeafCount; protected int m_decisionNodeCount; public static final int GINI_SPLIT = 0; public static final int INFO_GAIN_SPLIT = 1; public static final Tag[] TAGS_SELECTION = { new Tag(GINI_SPLIT, "Gini split"), new Tag(INFO_GAIN_SPLIT, "Info gain split") }; public static final int LEAF_MAJ_CLASS = 0; public static final int LEAF_NB = 1; public static final int LEAF_NB_ADAPTIVE = 2; public static final Tag[] TAGS_SELECTION2 = { new Tag(LEAF_MAJ_CLASS, "Majority class"), new Tag(LEAF_NB, "Naive Bayes"), new Tag(LEAF_NB_ADAPTIVE, "Naive Bayes adaptive") }; /** * Print out leaf models in the case of naive Bayes or naive Bayes adaptive * leaves */ protected boolean m_printLeafModels; /** * Returns a string describing classifier * * @return a description suitable for displaying in the explorer/experimenter * gui */ public String globalInfo() { return "A Hoeffding tree (VFDT) is an incremental, anytime decision tree induction algorithm" + " that is capable of learning from massive data streams, assuming that the" + " distribution generating examples does not change over time. Hoeffding trees" + " exploit the fact that a small sample can often be enough to choose an optimal" + " splitting attribute. This idea is supported mathematically by the Hoeffding" + " bound, which quantifies the number of observations (in our case, examples)" + " needed to estimate some statistics within a prescribed precision (in our" + " case, the goodness of an attribute).\n\nA theoretically appealing feature " + " of Hoeffding Trees not shared by otherincremental decision tree learners is that " + " it has sound guarantees of performance. Using the Hoeffding bound one can show that " + " its output is asymptotically nearly identical to that of a non-incremental learner " + " using infinitely many examples. For more information see: \n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Geoff Hulten and Laurie Spencer and Pedro Domingos"); result.setValue(Field.TITLE, "Mining time-changing data streams"); result.setValue(Field.BOOKTITLE, "ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining"); result.setValue(Field.YEAR, "2001"); result.setValue(Field.PAGES, "97-106"); result.setValue(Field.PUBLISHER, "ACM Press"); return result; } protected void reset() { m_root = null; m_activeLeafCount = 0; m_inactiveLeafCount = 0; m_decisionNodeCount = 0; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); result.setMinimumNumberInstances(0); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration<Option> listOptions() { Vector<Option> newVector = new Vector<Option>(); newVector.add(new Option("\tThe leaf prediction strategy to use. 0 = " + "majority class, 1 = naive Bayes, 2 = naive Bayes adaptive.\n\t" + "(default = 2)", "L", 1, "-L")); newVector.add(new Option("\tThe splitting criterion to use. 0 = " + "Gini, 1 = Info gain\n\t" + "(default = 1)", "S", 1, "-S")); newVector.add(new Option("\tThe allowable error in a split decision " + "- values closer to zero will take longer to decide\n\t" + "(default = 1e-7)", "E", 1, "-E")); newVector.add(new Option( "\tThreshold below which a split will be forced to " + "break ties\n\t(default = 0.05)", "H", 1, "-H")); newVector.add(new Option( "\tMinimum fraction of weight required down at least two " + "branches for info gain splitting\n\t(default = 0.01)", "M", 1, "-M")); newVector.add(new Option("\tGrace period - the number of instances " + "a leaf should observe between split attempts\n\t" + "(default = 200)", "G", 1, "-G")); newVector .add(new Option("\tThe number of instances (weight) a leaf " + "should observe before allowing naive Bayes to make " + "predictions (NB or NB adaptive only)\n\t(default = 0)", "N", 1, "-N")); newVector.add(new Option("\tPrint leaf models when using naive Bayes " + "at the leaves.", "P", 0, "-P")); return newVector.elements(); } /** * Parses a given list of options. * <p/> * <!-- options-start --> * Valid options are: * <p/> * * <pre> * -L * The leaf prediction strategy to use. 0 = majority class, 1 = naive Bayes, 2 = naive Bayes adaptive. * (default = 0) * </pre> * * <pre> * -S * The splitting criterion to use. 0 = Gini, 1 = Info gain * (default = 0) * </pre> * * <pre> * -E * The allowable error in a split decision - values closer to zero will take longer to decide * (default = 1e-7) * </pre> * * <pre> * -H * Threshold below which a split will be forced to break ties * (default = 0.05) * </pre> * * <pre> * -M * Minimum fraction of weight required down at least two branches for info gain splitting * (default = 0.01) * </pre> * * <pre> * -G * Grace period - the number of instances a leaf should observe between split attempts * (default = 200) * </pre> * * <pre> * -N * The number of instances (weight) a leaf should observe before allowing naive Bayes to make predictions (NB or NB adaptive only) * (default = 0) * </pre> * * <pre> * -P * Print leaf models when using naive Bayes at the leaves. * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { reset(); super.setOptions(options); String opt = Utils.getOption('L', options); if (opt.length() > 0) { setLeafPredictionStrategy(new SelectedTag(Integer.parseInt(opt), TAGS_SELECTION2)); } opt = Utils.getOption('S', options); if (opt.length() > 0) { setSplitCriterion(new SelectedTag(Integer.parseInt(opt), TAGS_SELECTION)); } opt = Utils.getOption('E', options); if (opt.length() > 0) { setSplitConfidence(Double.parseDouble(opt)); } opt = Utils.getOption('H', options); if (opt.length() > 0) { setHoeffdingTieThreshold(Double.parseDouble(opt)); } opt = Utils.getOption('M', options); if (opt.length() > 0) { setMinimumFractionOfWeightInfoGain(Double.parseDouble(opt)); } opt = Utils.getOption('G', options); if (opt.length() > 0) { setGracePeriod(Double.parseDouble(opt)); } opt = Utils.getOption('N', options); if (opt.length() > 0) { setNaiveBayesPredictionThreshold(Double.parseDouble(opt)); } m_printLeafModels = Utils.getFlag('P', options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { ArrayList<String> options = new ArrayList<String>(); options.add("-L"); options.add("" + getLeafPredictionStrategy().getSelectedTag().getID()); options.add("-S"); options.add("" + getSplitCriterion().getSelectedTag().getID()); options.add("-E"); options.add("" + getSplitConfidence()); options.add("-H"); options.add("" + getHoeffdingTieThreshold()); options.add("-M"); options.add("" + getMinimumFractionOfWeightInfoGain()); options.add("-G"); options.add("" + getGracePeriod()); options.add("-N"); options.add("" + getNaiveBayesPredictionThreshold()); if (m_printLeafModels) { options.add("-P"); } return options.toArray(new String[1]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String printLeafModelsTipText() { return "Print leaf models (naive bayes leaves only)"; } public void setPrintLeafModels(boolean p) { m_printLeafModels = p; } public boolean getPrintLeafModels() { return m_printLeafModels; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String minimumFractionOfWeightInfoGainTipText() { return "Minimum fraction of weight required down at least two branches " + "for info gain splitting."; } /** * Set the minimum fraction of weight required down at least two branches for * info gain splitting * * @param m the minimum fraction of weight */ public void setMinimumFractionOfWeightInfoGain(double m) { m_minFracWeightForTwoBranchesGain = m; } /** * Get the minimum fraction of weight required down at least two branches for * info gain splitting * * @return the minimum fraction of weight */ public double getMinimumFractionOfWeightInfoGain() { return m_minFracWeightForTwoBranchesGain; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String gracePeriodTipText() { return "Number of instances (or total weight of instances) a leaf " + "should observe between split attempts."; } /** * Set the number of instances (or total weight of instances) a leaf should * observe between split attempts * * @param grace the grace period */ public void setGracePeriod(double grace) { m_gracePeriod = grace; } /** * Get the number of instances (or total weight of instances) a leaf should * observe between split attempts * * @return the grace period */ public double getGracePeriod() { return m_gracePeriod; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String hoeffdingTieThresholdTipText() { return "Theshold below which a split will be forced to break ties."; } /** * Set the threshold below which a split will be forced to break ties * * @param ht the threshold */ public void setHoeffdingTieThreshold(double ht) { m_hoeffdingTieThreshold = ht; } /** * Get the threshold below which a split will be forced to break ties * * @return the threshold */ public double getHoeffdingTieThreshold() { return m_hoeffdingTieThreshold; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String splitConfidenceTipText() { return "The allowable error in a split decision. Values closer to zero " + "will take longer to decide."; } /** * Set the allowable error in a split decision. Values closer to zero will * take longer to decide. * * @param sc the split confidence */ public void setSplitConfidence(double sc) { m_splitConfidence = sc; } /** * Get the allowable error in a split decision. Values closer to zero will * take longer to decide. * * @return the split confidence */ public double getSplitConfidence() { return m_splitConfidence; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String splitCriterionTipText() { return "The splitting criterion to use"; } /** * Set the split criterion to use (either Gini or info gain). * * @param crit the criterion to use */ public void setSplitCriterion(SelectedTag crit) { if (crit.getTags() == TAGS_SELECTION) { m_selectedSplitMetric = crit.getSelectedTag().getID(); } } /** * Get the split criterion to use (either Gini or info gain). * * @return the criterion to use */ public SelectedTag getSplitCriterion() { return new SelectedTag(m_selectedSplitMetric, TAGS_SELECTION); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String leafPredictionStrategyTipText() { return "The leaf prediction strategy to use"; } /** * Set the leaf prediction strategy to use (majority class, naive Bayes or * naive Bayes adaptive) * * @param strat the strategy to use */ public void setLeafPredictionStrategy(SelectedTag strat) { if (strat.getTags() == TAGS_SELECTION2) { m_leafStrategy = strat.getSelectedTag().getID(); } } /** * Get the leaf prediction strategy to use (majority class, naive Bayes or * naive Bayes adaptive) * * @return the strategy to use */ public SelectedTag getLeafPredictionStrategy() { return new SelectedTag(m_leafStrategy, TAGS_SELECTION2); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String naiveBayesPredictionThresholdTipText() { return "The number of instances (weight) a leaf should observe " + "before allowing naive Bayes (adaptive) to make predictions"; } /** * Set the number of instances (weight) a leaf should observe before allowing * naive Bayes to make predictions * * @param n the number/weight of instances */ public void setNaiveBayesPredictionThreshold(double n) { m_nbThreshold = n; } /** * Get the number of instances (weight) a leaf should observe before allowing * naive Bayes to make predictions * * @return the number/weight of instances */ public double getNaiveBayesPredictionThreshold() { return m_nbThreshold; } protected static double computeHoeffdingBound(double max, double confidence, double weight) { return Math.sqrt(((max * max) * Math.log(1.0 / confidence)) / (2.0 * weight)); } /** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { reset(); m_header = new Instances(data, 0); if (m_selectedSplitMetric == GINI_SPLIT) { m_splitMetric = new GiniSplitMetric(); } else { m_splitMetric = new InfoGainSplitMetric(m_minFracWeightForTwoBranchesGain); } data = new Instances(data); data.deleteWithMissingClass(); for (int i = 0; i < data.numInstances(); i++) { updateClassifier(data.instance(i)); } // can classifier handle the data? getCapabilities().testWithFail(data); } /** * Updates the classifier with the given instance. * * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model. */ @Override public void updateClassifier(Instance inst) throws Exception { if (inst.classIsMissing()) { return; } if (m_root == null) { m_root = newLearningNode(); } LeafNode l = m_root.leafForInstance(inst, null, null); HNode actualNode = l.m_theNode; if (actualNode == null) { actualNode = new ActiveHNode(); l.m_parentNode.setChild(l.m_parentBranch, actualNode); } if (actualNode instanceof LearningNode) { actualNode.updateNode(inst); if (/* m_growthAllowed && */actualNode instanceof ActiveHNode) { double totalWeight = actualNode.totalWeight(); if (totalWeight - ((ActiveHNode) actualNode).m_weightSeenAtLastSplitEval > m_gracePeriod) { // try a split trySplit((ActiveHNode) actualNode, l.m_parentNode, l.m_parentBranch); ((ActiveHNode) actualNode).m_weightSeenAtLastSplitEval = totalWeight; } } } } /** * Returns class probabilities for an instance. * * @param instance the instance to compute the distribution for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance inst) throws Exception { Attribute classAtt = inst.classAttribute(); double[] pred = new double[classAtt.numValues()]; if (m_root != null) { LeafNode l = m_root.leafForInstance(inst, null, null); HNode actualNode = l.m_theNode; if (actualNode == null) { actualNode = l.m_parentNode; } pred = actualNode.getDistribution(inst, classAtt); } else { // all class values equally likely for (int i = 0; i < classAtt.numValues(); i++) { pred[i] = 1; } Utils.normalize(pred); } // Utils.normalize(pred); return pred; } /** * Deactivate (prevent growth) from the supplied node * * @param toDeactivate the node to deactivate * @param parent the node's parent * @param parentBranch the branch leading to the node */ protected void deactivateNode(ActiveHNode toDeactivate, SplitNode parent, String parentBranch) { HNode leaf = new InactiveHNode(toDeactivate.m_classDistribution); if (parent == null) { m_root = leaf; } else { parent.setChild(parentBranch, leaf); } m_activeLeafCount--; m_inactiveLeafCount++; } /** * Activate (allow growth) the supplied node * * @param toActivate the node to activate * @param parent the node's parent * @param parentBranch the branch leading to the node */ protected void activateNode(InactiveHNode toActivate, SplitNode parent, String parentBranch) { HNode leaf = new ActiveHNode(); leaf.m_classDistribution = toActivate.m_classDistribution; if (parent == null) { m_root = leaf; } else { parent.setChild(parentBranch, leaf); } m_activeLeafCount++; m_inactiveLeafCount--; } /** * Try a split from the supplied node * * @param node the node to split * @param parent the parent of the node * @param parentBranch the branch leading to the node * @throws Exception if a problem occurs */ protected void trySplit(ActiveHNode node, SplitNode parent, String parentBranch) throws Exception { // non-pure? if (node.numEntriesInClassDistribution() > 1) { List<SplitCandidate> bestSplits = node.getPossibleSplits(m_splitMetric); Collections.sort(bestSplits); boolean doSplit = false; if (bestSplits.size() < 2) { doSplit = bestSplits.size() > 0; } else { // compute the Hoeffding bound double metricMax = m_splitMetric.getMetricRange(node.m_classDistribution); double hoeffdingBound = computeHoeffdingBound(metricMax, m_splitConfidence, node.totalWeight()); SplitCandidate best = bestSplits.get(bestSplits.size() - 1); SplitCandidate secondBest = bestSplits.get(bestSplits.size() - 2); if (best.m_splitMerit - secondBest.m_splitMerit > hoeffdingBound || hoeffdingBound < m_hoeffdingTieThreshold) { doSplit = true; } // TODO - remove poor attributes stuff? } if (doSplit) { SplitCandidate best = bestSplits.get(bestSplits.size() - 1); if (best.m_splitTest == null) { // preprune deactivateNode(node, parent, parentBranch); } else { SplitNode newSplit = new SplitNode(node.m_classDistribution, best.m_splitTest); for (int i = 0; i < best.numSplits(); i++) { ActiveHNode newChild = newLearningNode(); newChild.m_classDistribution = best.m_postSplitClassDistributions .get(i); newChild.m_weightSeenAtLastSplitEval = newChild.totalWeight(); String branchName = ""; if (m_header.attribute(best.m_splitTest.splitAttributes().get(0)) .isNumeric()) { branchName = i == 0 ? "left" : "right"; } else { Attribute splitAtt = m_header.attribute(best.m_splitTest .splitAttributes().get(0)); branchName = splitAtt.value(i); } newSplit.setChild(branchName, newChild); } m_activeLeafCount--; m_decisionNodeCount++; m_activeLeafCount += best.numSplits(); if (parent == null) { m_root = newSplit; } else { parent.setChild(parentBranch, newSplit); } } } } } /** * Create a new learning node (either majority class, naive Bayes or naive * Bayes adaptive) * * @return a new learning node * @throws Exception if a problem occurs */ protected ActiveHNode newLearningNode() throws Exception { ActiveHNode newChild; if (m_leafStrategy == LEAF_MAJ_CLASS) { newChild = new ActiveHNode(); } else if (m_leafStrategy == LEAF_NB) { newChild = new NBNode(m_header, m_nbThreshold); } else { newChild = new NBNodeAdaptive(m_header, m_nbThreshold); } return newChild; } /** * Return a textual description of the mode * * @return a String describing the model */ @Override public String toString() { if (m_root == null) { return "No model built yet!"; } return m_root.toString(m_printLeafModels); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 9766 $"); } public static void main(String[] args) { runClassifier(new HoeffdingTree(), args); } @Override public int graphType() { return Drawable.TREE; } @Override public String graph() throws Exception { if (m_root == null) { throw new Exception("No model built yet!"); } m_root.installNodeNums(0); StringBuffer buff = new StringBuffer(); buff.append("digraph HoeffdingTree {\n"); m_root.graphTree(buff); buff.append("}\n"); return buff.toString(); } }