package edu.cmu.sphinx.decoder.search.stats;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import edu.cmu.sphinx.decoder.search.Token;
import edu.cmu.sphinx.linguist.WordSearchState;
import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.dictionary.Word;
/** A class that keeps track of word histories */
public class WordTracker {
final Map<WordSequence, WordStats> statMap;
final int frameNumber;
int stateCount;
int maxWordHistories;
/**
* Creates a word tracker for the given frame number
*
* @param frameNumber the frame number
*/
public WordTracker(int frameNumber) {
statMap = new HashMap<WordSequence, WordStats>();
this.frameNumber = frameNumber;
}
/**
* Adds a word history for the given token to the word tracker
*
* @param t the token to add
*/
public void add(Token t) {
stateCount++;
WordSequence ws = getWordSequence(t);
WordStats stats = statMap.get(ws);
if (stats == null) {
stats = new WordStats(ws);
statMap.put(ws, stats);
}
stats.update(t);
}
/** Dumps the word histories in the tracker */
public void dump() {
dumpSummary();
List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
Collections.sort(stats, WordStats.COMPARATOR);
for (WordStats stat : stats) {
System.out.println(" " + stat);
}
}
/** Dumps summary information in the tracker */
void dumpSummary() {
System.out.println("Frame: " + frameNumber + " states: " + stateCount
+ " histories " + statMap.size());
}
/**
* Given a token, gets the word sequence represented by the token
*
* @param token the token of interest
* @return the word sequence for the token
*/
private WordSequence getWordSequence(Token token) {
List<Word> wordList = new LinkedList<Word>();
while (token != null) {
if (token.isWord()) {
WordSearchState wordState = (WordSearchState) token
.getSearchState();
Word word = wordState.getPronunciation().getWord();
wordList.add(0, word);
}
token = token.getPredecessor();
}
return new WordSequence(wordList);
}
/** Keeps track of statistics for a particular word sequence */
static class WordStats {
public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
public int compare(WordStats ws1, WordStats ws2) {
if (ws1.maxScore > ws2.maxScore) {
return -1;
} else if (ws1.maxScore == ws2.maxScore) {
return 0;
} else {
return 1;
}
}
};
private int size;
private float maxScore;
private float minScore;
private final WordSequence ws;
/**
* Creates a word statistics for the given sequence
*
* @param ws the word sequence
*/
WordStats(WordSequence ws) {
size = 0;
maxScore = -Float.MAX_VALUE;
minScore = Float.MAX_VALUE;
this.ws = ws;
}
/**
* Updates the statistics based upon the scores for the given token
*
* @param t the token
*/
void update(Token t) {
size++;
if (t.getScore() > maxScore) {
maxScore = t.getScore();
}
if (t.getScore() < minScore) {
minScore = t.getScore();
}
}
/**
* Returns a string representation of the statistics
*
* @return a string representation
*/
@Override
public String toString() {
return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
+ ws;
}
}
}