package org.apache.samoa.moa.classifiers.core.attributeclassobservers; /* * #%L * SAMOA * %% * Copyright (C) 2014 - 2015 Apache Software Foundation * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; import org.apache.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; import org.apache.samoa.moa.core.AutoExpandVector; import org.apache.samoa.moa.core.DoubleVector; import org.apache.samoa.moa.core.GreenwaldKhannaQuantileSummary; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.core.Utils; import org.apache.samoa.moa.options.AbstractOptionHandler; import org.apache.samoa.moa.tasks.TaskMonitor; import com.github.javacliparser.IntOption; /** * Class for observing the class data distribution for a numeric attribute using Greenwald and Khanna methodology. This * observer monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data * statistics on leaves. * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @version $Revision: 7 $ */ public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { private static final long serialVersionUID = 1L; protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); public IntOption numTuplesOption = new IntOption("numTuples", 'n', "The number of tuples.", 10, 1, Integer.MAX_VALUE); @Override public void observeAttributeClass(double attVal, int classVal, double weight) { if (!Utils.isMissingValue(attVal)) { GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); if (valDist == null) { valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); this.attValDistPerClass.set(classVal, valDist); } // TODO: not taking weight into account valDist.insert(attVal); } } @Override public double probabilityOfAttributeValueGivenClass(double attVal, int classVal) { // TODO: NaiveBayes broken until implemented return 0.0; } @Override public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( SplitCriterion criterion, double[] preSplitDist, int attIndex, boolean binaryOnly) { AttributeSplitSuggestion bestSuggestion = null; for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { if (qs != null) { double[] cutpoints = qs.getSuggestedCutpoints(); for (double cutpoint : cutpoints) { double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { bestSuggestion = new AttributeSplitSuggestion( new NumericAttributeBinaryTest(attIndex, cutpoint, true), postSplitDists, merit); } } } } return bestSuggestion; } // assume all values equal to splitValue go to lhs public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { DoubleVector lhsDist = new DoubleVector(); DoubleVector rhsDist = new DoubleVector(); for (int i = 0; i < this.attValDistPerClass.size(); i++) { GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); if (estimator != null) { long countBelow = estimator.getCountBelow(splitValue); lhsDist.addToValue(i, countBelow); rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); } } return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; } @Override public void getDescription(StringBuilder sb, int indent) { // TODO Auto-generated method stub } @Override protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { // TODO Auto-generated method stub } @Override public void observeAttributeTarget(double attVal, double target) { throw new UnsupportedOperationException("Not supported yet."); } }