package com.tlabs.speechalyzer.classifier;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
import com.felix.util.ArrayUtil;
import com.felix.util.FileUtil;
import com.felix.util.StatsUtil;
import com.felix.util.StringUtil;
import com.felix.util.Util;
import com.tlabs.speechalyzer.AudioFileManager;
import com.tlabs.speechalyzer.RecFile;
public class EvaluatorThread extends Thread {
private boolean isRunning = false;
private AudioFileManager _afm;
private Categories _categories;
private String _summary = "";
private Vector<Evaluatable> _evaluatables;
/**
* Constructor from inside with AudioFilemanager and Categories.
*
* @param afm
* @param categories
*/
public EvaluatorThread(AudioFileManager afm, Categories categories) {
super();
_afm = afm;
_categories = categories;
_evaluatables = new Vector<Evaluatable>();
Vector<RecFile> audioFiles = _afm.getAudioFiles();
for (Iterator<RecFile> iterator =audioFiles.iterator(); iterator
.hasNext();) {
RecFile recFile = (RecFile) iterator.next();
Evaluatable e = new Evaluatable(recFile.getStringLabel(), recFile
.getClassificationResult().getWinner().getCat());
_evaluatables.add(e);
}
}
/**
* Constructor from outside with categories String descriptor and file with
* samples.
*
* @param fileName
* Samples, format, one sample each line: <samplename> <truth>
* <hypothesis>
* @param categories
*/
public EvaluatorThread(String fileName, String categories) {
super();
_categories = new Categories(categories);
_evaluatables = new Vector<Evaluatable>();
try {
Vector<String> lines = FileUtil
.getFileLinesWithoutComments(fileName);
for (Iterator<String> iterator = lines.iterator(); iterator
.hasNext();) {
String string = (String) iterator.next();
String[] a = StringUtil.stringToArray(string);
String truth = _categories.getCategoryForJudgement(Double.parseDouble(a[1]));
Evaluatable e = new Evaluatable(truth, a[2]);
_evaluatables.add(e);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void run() {
isRunning = true;
try {
int catNum = _categories.getCatNumber() - 1;
// get the category names
String[] catArray = Util.subStringArray(_categories
.getCategoryArray(), 1, catNum + 1);
// initialize a square confusion matrix including "non available values
int[][] _confMatrix = ArrayUtil.getZeroQuadraticArray(catNum + 1);
// fill the confusion matrix
for (Iterator<Evaluatable> iterator = _evaluatables.iterator(); iterator
.hasNext();) {
Evaluatable e = (Evaluatable) iterator.next();
_confMatrix[_categories.getCategoryIndex(e._truth)][_categories
.getCategoryIndex(e._hypothesis)]++;
}
// ignore the "NA" values
_confMatrix = ArrayUtil
.subMatrix(_confMatrix, 1, catNum, 1, catNum);
// number of samples
int allNum = ArrayUtil.sum(_confMatrix);
// number of correct samples
int correctNum = ArrayUtil.diagSum(_confMatrix);
_summary += "Recall\n";
double allRecall = 0;
_summary += Util.arrayToString(catArray).replace(" ", "\t") + "\n";
for (int i = 0; i < catNum; i++) {
double catRecall = (double) _confMatrix[i][i]
/ ArrayUtil.rowSum(_confMatrix, i);
_summary += Util.cutDouble(catRecall) + "\t";
allRecall += catRecall;
}
_summary += "\n\nUnweighted Average Recall (UAR): ";
double uar =Util.cutDouble((allRecall / catNum)*100);
_summary += uar+ "\n";
double averageAccuracy =Util.cutDouble(((double) correctNum / allNum)*100);
_summary += "\nWAR (weighted average recall: DIV(correct,all)): ";
_summary += averageAccuracy+"\n";
_summary += "\nNumber of samples (all/correct): "+allNum+"/"+correctNum+"\n";
_summary += "\nDistribution of classes (abs/rel):\n";
_summary += Util.arrayToString(catArray).replace(" ", "\t") + "\n";
for (int i = 0; i < catNum; i++) {
_summary += ArrayUtil.rowSum(_confMatrix, i) + "\t";
}
_summary += "\n";
for (int i = 0; i < catNum; i++) {
_summary += ArrayUtil.percent(ArrayUtil.rowSum(_confMatrix, i),
allNum)
+ "\t";
}
_summary += "\nConfusion Matrix (abs/rel)";
_summary += "\n"
+ ArrayUtil.toStringConfMatrix(_confMatrix, catArray);
_summary += "\n\n"
+ ArrayUtil.toStringRelativeConfMatrix(_confMatrix,
catArray);
_summary += "\n";
_summary += "\nPrecision\n";
double allPrecision = 0;
_summary += Util.arrayToString(catArray).replace(" ", "\t") + "\n";
for (int i = 0; i < catNum; i++) {
double catPrecision = (double) _confMatrix[i][i]
/ ArrayUtil.colSum(_confMatrix, i);
_summary += Util.cutDouble(catPrecision) + "\t";
allPrecision += catPrecision;
}
_summary += "\nF1\n";
double allF1 = 0;
_summary += Util.arrayToString(catArray).replace(" ", "\t") + "\n";
for (int i = 0; i < catNum; i++) {
double catPrecision = (double) _confMatrix[i][i]
/ ArrayUtil.colSum(_confMatrix, i);
double catRecall = (double) _confMatrix[i][i]
/ ArrayUtil.rowSum(_confMatrix, i);
double catF1 = StatsUtil.f1(catRecall, catPrecision);
_summary += Util.cutDouble(catF1) + "\t";
allF1 += catF1;
}
_summary += "\n\nUnweighted Average F1: ";
_summary += allF1 / catNum + "\n";
_summary += "\nUnweighted Average Precision: ";
_summary += allPrecision / catNum + "\n";
System.out.println(_summary);
} catch (Exception e) {
e.printStackTrace();
}
isRunning = false;
}
public boolean isRunning() {
return isRunning;
}
public String getSummary() {
return _summary;
}
public static void main(String[] args) {
String useage = "usage: <progname> <samplefile> <catdesc>\n\tsamplefile format <id> <truth> <pred>\n\tcatdesc format <num_1>,<cat_1>;...;<num_n>,<cat_n> NOTE that a -1,NA field is expected for garbage samples!";
if (args.length == 2) {
try {
EvaluatorThread et = new EvaluatorThread(args[0], args[1]);
et.start();
while (et.isRunning) {
Thread.sleep(1000);
}
System.out.println(et.getSummary());
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println(useage);
}
}
public class Evaluatable {
public String _truth;
public String _hypothesis;
public Evaluatable(String truth, String hypothesis) {
super();
_truth = truth.trim();
_hypothesis = hypothesis.trim();
}
public Evaluatable(String truth) {
super();
_truth = truth.trim();
}
public boolean isRight() {
if (_truth.compareTo(_hypothesis) == 0)
return true;
return false;
}
public boolean isRight(String hypothesis) {
if (_truth.compareTo(hypothesis) == 0)
return true;
return false;
}
}
}