/* * DecisionStump.java * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package tr.gov.ulakbim.jDenetX.classifiers; import tr.gov.ulakbim.jDenetX.classifiers.attributes.AttributeClassObserver; import tr.gov.ulakbim.jDenetX.classifiers.attributes.AttributeSplitSuggestion; import tr.gov.ulakbim.jDenetX.classifiers.attributes.GaussianNumericAttributeClassObserver; import tr.gov.ulakbim.jDenetX.classifiers.attributes.NominalAttributeClassObserver; import tr.gov.ulakbim.jDenetX.classifiers.splits.SplitCriterion; import tr.gov.ulakbim.jDenetX.core.AutoExpandVector; import tr.gov.ulakbim.jDenetX.core.DoubleVector; import tr.gov.ulakbim.jDenetX.core.Measurement; import tr.gov.ulakbim.jDenetX.options.ClassOption; import tr.gov.ulakbim.jDenetX.options.FlagOption; import tr.gov.ulakbim.jDenetX.options.IntOption; import weka.core.Instance; public class DecisionStump extends AbstractClassifier { private static final long serialVersionUID = 1L; public IntOption gracePeriodOption = new IntOption("gracePeriod", 'g', "The number of instances to observe between model changes.", 1000, 0, Integer.MAX_VALUE); public FlagOption binarySplitsOption = new FlagOption("binarySplits", 'b', "Only allow binary splits."); public ClassOption splitCriterionOption = new ClassOption("splitCriterion", 'c', "Split criterion to use.", SplitCriterion.class, "InfoGainSplitCriterion"); protected AttributeSplitSuggestion bestSplit; protected DoubleVector observedClassDistribution; protected AutoExpandVector<AttributeClassObserver> attributeObservers; protected double weightSeenAtLastSplit; @Override public void resetLearningImpl() { this.bestSplit = null; this.observedClassDistribution = new DoubleVector(); this.attributeObservers = new AutoExpandVector<AttributeClassObserver>(); this.weightSeenAtLastSplit = 0.0; } @Override protected Measurement[] getModelMeasurementsImpl() { Measurement[] measurement = null; return measurement; } @Override public void getModelDescription(StringBuilder out, int indent) { // TODO Auto-generated method stub } @Override public void trainOnInstanceImpl(Instance inst) { this.observedClassDistribution.addToValue((int) inst.classValue(), inst .weight()); for (int i = 0; i < inst.numAttributes() - 1; i++) { int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); AttributeClassObserver obs = this.attributeObservers.get(i); if (obs == null) { obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver() : newNumericClassObserver(); this.attributeObservers.set(i, obs); } obs.observeAttributeClass(inst.value(instAttIndex), (int) inst .classValue(), inst.weight()); } if (this.trainingWeightSeenByModel - this.weightSeenAtLastSplit >= this.gracePeriodOption .getValue()) { this.bestSplit = findBestSplit((SplitCriterion) getPreparedClassOption(this.splitCriterionOption)); this.weightSeenAtLastSplit = this.trainingWeightSeenByModel; } } public double[] getVotesForInstance(Instance inst) { if (this.bestSplit != null) { int branch = this.bestSplit.splitTest.branchForInstance(inst); if (branch >= 0) { return this.bestSplit .resultingClassDistributionFromSplit(branch); } } return this.observedClassDistribution.getArrayCopy(); } public boolean isRandomizable() { return false; } protected AttributeClassObserver newNominalClassObserver() { return new NominalAttributeClassObserver(); } protected AttributeClassObserver newNumericClassObserver() { return new GaussianNumericAttributeClassObserver(); } protected AttributeSplitSuggestion findBestSplit(SplitCriterion criterion) { AttributeSplitSuggestion bestFound = null; double bestMerit = Double.NEGATIVE_INFINITY; double[] preSplitDist = this.observedClassDistribution.getArrayCopy(); for (int i = 0; i < this.attributeObservers.size(); i++) { AttributeClassObserver obs = this.attributeObservers.get(i); if (obs != null) { AttributeSplitSuggestion suggestion = obs .getBestEvaluatedSplitSuggestion(criterion, preSplitDist, i, this.binarySplitsOption .isSet()); if (suggestion.merit > bestMerit) { bestMerit = suggestion.merit; bestFound = suggestion; } } } return bestFound; } }