/*
* avenir: Predictive analytic based on Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.avenir.util;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
/**
* Info content based stat for based on entropy or gini index
* @author pranab
*
*/
public class InfoContentStat {
private Map<String, Integer> classValCount = new HashMap<String, Integer>();
private Map<String, Double> classValPr = new HashMap<String, Double>();
private int totalCount;
private static final Logger LOG = Logger.getLogger(InfoContentStat.class);
private String predicate;
private double stat;
/**
*
*/
public void initialize() {
classValCount.clear();
classValPr.clear();
totalCount = 0;
}
/**
* @param classVal
*/
public void incrClassValCount(String classVal) {
countClassVal(classVal,1);
}
/**
* @param classVal
* @param count
*/
public void countClassVal(String classVal, int count) {
LOG.debug("counting InfoContentStat " + " classVal:" + classVal + " count:" + count);
if (null == classValCount.get(classVal)) {
classValCount.put(classVal, 0);
}
classValCount.put(classVal, classValCount.get(classVal) + count);
}
/**
* Calculate info stat
* @param isAlgoEntropy
* @return
*/
public double processStat(boolean isAlgoEntropy) {
stat = 0.0;
totalCount = 0;
for (String key : classValCount.keySet()) {
totalCount += classValCount.get(key);
}
LOG.debug("processing total count:" + totalCount);
if (isAlgoEntropy) {
//entropy based
double log2 = Math.log(2);
for (String key : classValCount.keySet()) {
double pr = (double)classValCount.get(key) / totalCount;
stat -= pr * Math.log(pr) / log2;
classValPr.put(key, pr);
}
} else {
//gini index based
double prSquare = 0;
for (String key : classValCount.keySet()) {
int count = classValCount.get(key);
double pr = (double)count / totalCount;
LOG.debug("class val:" + key + " count:" + count);
prSquare += pr * pr;
classValPr.put(key, pr);
}
stat = 1.0 - prSquare;
}
return stat;
}
public int getTotalCount() {
return totalCount;
}
public double getStat() {
return stat;
}
public Map<String, Double> getClassValPr() {
return classValPr;
}
public String getPredicate() {
return predicate;
}
public void setPredicate(String predicate) {
this.predicate = predicate;
}
}