NominalConditionalSufficientStats.java example

Explorer
TimeSeriesClassification-master
- TimeSeriesClassification
  - src
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    NominalConditionalSufficientStats.java
 *    Copyright (C) 2013 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees.ht;

import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import weka.core.Utils;

/**
 * Maintains sufficient stats for the distribution of a nominal attribute
 * 
 * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: 9705 $
 */
public class NominalConditionalSufficientStats extends
    ConditionalSufficientStats implements Serializable {

  /**
   * For serialization
   */
  private static final long serialVersionUID = -669902060601313488L;

  /**
   * Inner class that implements a discrete distribution
   * 
   * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
   * 
   */
  protected class ValueDistribution {
    protected final Map<Integer, WeightMass> m_dist = new LinkedHashMap<Integer, WeightMass>();

    private double m_sum;

    public void add(int val, double weight) {
      WeightMass count = m_dist.get(val);
      if (count == null) {
        count = new WeightMass();
        count.m_weight = 1.0;
        m_sum += 1.0;
        m_dist.put(val, count);
      }
      count.m_weight += weight;
      m_sum += weight;
    }

    public void delete(int val, double weight) {
      WeightMass count = m_dist.get(val);
      if (count != null) {
        count.m_weight -= weight;
        m_sum -= weight;
      }
    }

    public double getWeight(int val) {
      WeightMass count = m_dist.get(val);
      if (count != null) {
        return count.m_weight;
      }

      return 0.0;
    }

    public double sum() {
      return m_sum;
    }
  }

  protected double m_totalWeight;
  protected double m_missingWeight;

  @SuppressWarnings("unchecked")
  @Override
  public void update(double attVal, String classVal, double weight) {
    if (Utils.isMissingValue(attVal)) {
      m_missingWeight += weight;
    } else {
      Integer attIndex = new Integer((int) attVal);
      ValueDistribution valDist = (ValueDistribution) m_classLookup
          .get(classVal);
      if (valDist == null) {
        valDist = new ValueDistribution();
        valDist.add((int) attVal, weight);
        m_classLookup.put(classVal, valDist);
      } else {
        valDist.add((int) attVal, weight);
      }
    }

    m_totalWeight += weight;
  }

  @Override
  public double probabilityOfAttValConditionedOnClass(double attVal,
      String classVal) {
    ValueDistribution valDist = (ValueDistribution) m_classLookup.get(classVal);
    if (valDist != null) {
      double prob = valDist.getWeight((int) attVal) / valDist.sum();
      return prob;
    }

    return 0;
  }

  protected List<Map<String, WeightMass>> classDistsAfterSplit() {

    // att index keys to class distribution
    Map<Integer, Map<String, WeightMass>> splitDists = new HashMap<Integer, Map<String, WeightMass>>();

    for (Map.Entry<String, Object> cls : m_classLookup.entrySet()) {
      String classVal = cls.getKey();
      ValueDistribution attDist = (ValueDistribution) cls.getValue();

      for (Map.Entry<Integer, WeightMass> att : attDist.m_dist.entrySet()) {
        Integer attVal = att.getKey();
        WeightMass attCount = att.getValue();

        Map<String, WeightMass> clsDist = splitDists.get(attVal);
        if (clsDist == null) {
          clsDist = new HashMap<String, WeightMass>();
          splitDists.put(attVal, clsDist);
        }

        WeightMass clsCount = clsDist.get(classVal);

        if (clsCount == null) {
          clsCount = new WeightMass();
          clsDist.put(classVal, clsCount);
        }

        clsCount.m_weight += attCount.m_weight;
      }

    }

    List<Map<String, WeightMass>> result = new LinkedList<Map<String, WeightMass>>();
    for (Map.Entry<Integer, Map<String, WeightMass>> v : splitDists.entrySet()) {
      result.add(v.getValue());
    }

    return result;
  }

  @Override
  public SplitCandidate bestSplit(SplitMetric splitMetric,
      Map<String, WeightMass> preSplitDist, String attName) {

    List<Map<String, WeightMass>> postSplitDists = classDistsAfterSplit();
    double merit = splitMetric.evaluateSplit(preSplitDist, postSplitDists);
    SplitCandidate candidate = new SplitCandidate(
        new UnivariateNominalMultiwaySplit(attName), postSplitDists, merit);

    return candidate;
  }
}