/*
* visitante: Web analytic using Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.visitante.mr.bda;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
* @author pranab
*/
public class ClassBoundary {
private List<String> lines = new ArrayList<String>();
private Histogram[] hists = new Histogram[2];
public void process(String fileName){
try {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String str;
while ((str = in.readLine()) != null) {
lines.add(str);
}
in.close();
hists[0] = new Histogram();
hists[1] = new Histogram();
int totalCount = 0;
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
for (String line : lines){
//System.out.println("processing:" + line);
String[] items = line.split(",");
if (items.length == 2){
int classVal = Integer.parseInt(items[0]);
int classCount = Integer.parseInt(items[1]);
Histogram hist = hists[classVal];
hist.setTotal(classCount);
totalCount += classCount;
} else {
int classVal = Integer.parseInt(items[0]);
int value = Integer.parseInt(items[1]);
int count = Integer.parseInt(items[2]);
Histogram hist = hists[classVal];
hist.addCount(value, count);
if (value < min) {
min = value;
}
if (value > max) {
max = value;
}
}
}
for (Histogram hist : hists){
hist.setSampleTotal(totalCount);
hist.calculateProbability();
}
findBoundary(min, max);
} catch (IOException e) {
}
}
private void findBoundary(int min, int max){
Histogram hist0 = hists[0];
Histogram hist1 = hists[1];
for (int value = min; value <= max; ++value){
//System.out.println("processing value:" + value);
double h0 = hist0.getProbability(value) * hist0.getClassProb();
double h1 = hist1.getProbability(value) * hist1.getClassProb();
double postprob = h1 / (h0 + h1);
System.out.println("" + value + "\t" + postprob);
}
}
private static class Histogram {
private Map<Integer, Integer> countDist = new HashMap<Integer, Integer>();
private Map<Integer, Double> probDensity = new HashMap<Integer, Double>();
private int total;
private int sampleTotal;
private double classProb;
/**
* @param total the total to set
*/
public void setTotal(int total) {
this.total = total;
System.out.println("total: " + total);
}
public void addCount(int value, int count) {
countDist.put(value, count);
//System.out.println("value: " + value + "count: " + count);
}
public void calculateProbability(){
classProb = ((double)total) / sampleTotal;
for (int value : countDist.keySet()){
int count = countDist.get(value);
double prob = ((double)count) / total;
probDensity.put(value, prob);
//System.out.println("value: " + value + "prob: " + prob);
}
}
public double getProbability(int value){
return probDensity.get(value);
}
/**
* @param sampleTotal the sampleTotal to set
*/
public void setSampleTotal(int sampleTotal) {
this.sampleTotal = sampleTotal;
}
/**
* @return the classProb
*/
public double getClassProb() {
return classProb;
}
}
public static void main(String[] args){
new ClassBoundary().process(args[0]);
}
}