/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.decoder.search; import edu.cmu.sphinx.decoder.scorer.Scoreable; import edu.cmu.sphinx.decoder.scorer.ScoreProvider; import edu.cmu.sphinx.frontend.Data; import edu.cmu.sphinx.frontend.FloatData; import edu.cmu.sphinx.linguist.HMMSearchState; import edu.cmu.sphinx.linguist.SearchState; import edu.cmu.sphinx.linguist.UnitSearchState; import edu.cmu.sphinx.linguist.WordSearchState; import edu.cmu.sphinx.linguist.acoustic.Unit; import edu.cmu.sphinx.linguist.dictionary.Pronunciation; import edu.cmu.sphinx.linguist.dictionary.Word; import java.text.DecimalFormat; import java.util.*; /** * Represents a single state in the recognition trellis. Subclasses of a token are used to represent the various * emitting state. * <p> * All scores are maintained in LogMath log base */ public class Token implements Scoreable { private static int curCount; private static int lastCount; private static final DecimalFormat scoreFmt = new DecimalFormat("0.0000000E00"); private static final DecimalFormat numFmt = new DecimalFormat("0000"); private Token predecessor; private float logLanguageScore; private float logTotalScore; private float logInsertionScore; private float logAcousticScore; private SearchState searchState; private long collectTime; private Data data; /** * Internal constructor for a token. Used by classes Token, CombineToken, ParallelToken * * @param predecessor the predecessor for this token * @param state the SentenceHMMState associated with this token * @param logTotalScore the total entry score for this token (in LogMath log base) * @param logInsertionScore the insertion score associated with this token (in LogMath log base) * @param logLanguageScore the language score associated with this token (in LogMath log base) * @param collectTime the frame collection time */ public Token(Token predecessor, SearchState state, float logTotalScore, float logInsertionScore, float logLanguageScore, long collectTime) { this.predecessor = predecessor; this.searchState = state; this.logTotalScore = logTotalScore; this.logInsertionScore = logInsertionScore; this.logLanguageScore = logLanguageScore; this.collectTime = collectTime; curCount++; } /** * Creates the initial token with the given word history depth * * @param state the SearchState associated with this token * @param collectTime collection time of this token */ public Token(SearchState state, long collectTime) { this(null, state, 0.0f, 0.0f, 0.0f, collectTime); } /** * Creates a Token with the given acoustic and language scores and predecessor. * * @param predecessor previous token * @param logTotalScore total score * @param logAcousticScore the log acoustic score * @param logInsertionScore the log insertion score * @param logLanguageScore the log language score */ public Token(Token predecessor, float logTotalScore, float logAcousticScore, float logInsertionScore, float logLanguageScore) { this(predecessor, null, logTotalScore, logInsertionScore, logLanguageScore, 0); this.logAcousticScore = logAcousticScore; } /** * Returns the predecessor for this token, or null if this token has no predecessors * * @return the predecessor */ public Token getPredecessor() { return predecessor; } /** * Collect time is different from frame number because some frames might be skipped in silence detector * * @return collection time in milliseconds */ public long getCollectTime() { return collectTime; } /** Sets the feature for this Token. * @param data features */ public void setData(Data data) { this.data = data; if (data instanceof FloatData) { collectTime = ((FloatData)data).getCollectTime(); } } /** * Returns the feature for this Token. * * @return the feature for this Token */ public Data getData() { return data; } /** * Returns the score for the token. The score is a combination of language and acoustic scores * * @return the score of this frame (in logMath log base) */ public float getScore() { return logTotalScore; } /** * Calculates a score against the given feature. The score can be retrieved * with get score. The token will keep a reference to the scored feature-vector. * * @param feature the feature to be scored * @return the score for the feature */ public float calculateScore(Data feature) { logAcousticScore = ((ScoreProvider) searchState).getScore(feature); logTotalScore += logAcousticScore; setData(feature); return logTotalScore; } public float[] calculateComponentScore(Data feature){ return ((ScoreProvider) searchState).getComponentScore(feature); } /** * Normalizes a previously calculated score * * @param maxLogScore the score to normalize this score with * @return the normalized score */ public float normalizeScore(float maxLogScore) { logTotalScore -= maxLogScore; logAcousticScore -= maxLogScore; return logTotalScore; } /** * Sets the score for this token * * @param logScore the new score for the token (in logMath log base) */ public void setScore(float logScore) { this.logTotalScore = logScore; } /** * Returns the language score associated with this token * * @return the language score (in logMath log base) */ public float getLanguageScore() { return logLanguageScore; } /** * Returns the insertion score associated with this token. * Insertion score is the score of the transition between * states. It might be transition score from the acoustic model, * phone insertion score or word insertion probability from * the linguist. * * @return the language score (in logMath log base) */ public float getInsertionScore() { return logInsertionScore; } /** * Returns the acoustic score for this token (in logMath log base). * Acoustic score is a sum of frame GMM. * * @return score */ public float getAcousticScore() { return logAcousticScore; } /** * Returns the SearchState associated with this token * * @return the searchState */ public SearchState getSearchState() { return searchState; } /** * Determines if this token is associated with an emitting state. An emitting state is a state that can be scored * acoustically. * * @return <code>true</code> if this token is associated with an emitting state */ public boolean isEmitting() { return searchState.isEmitting(); } /** * Determines if this token is associated with a final SentenceHMM state. * * @return <code>true</code> if this token is associated with a final state */ public boolean isFinal() { return searchState.isFinal(); } /** * Determines if this token marks the end of a word * * @return <code>true</code> if this token marks the end of a word */ public boolean isWord() { return searchState instanceof WordSearchState; } /** * Retrieves the string representation of this object * * @return the string representation of this object */ @Override public String toString() { return numFmt.format(getCollectTime()) + ' ' + scoreFmt.format(getScore()) + ' ' + scoreFmt.format(getAcousticScore()) + ' ' + scoreFmt.format(getLanguageScore()) + ' ' + getSearchState(); } /** dumps a branch of tokens */ public void dumpTokenPath() { dumpTokenPath(true); } /** * dumps a branch of tokens * * @param includeHMMStates if true include all sentence hmm states */ public void dumpTokenPath(boolean includeHMMStates) { Token token = this; List<Token> list = new ArrayList<Token>(); while (token != null) { list.add(token); token = token.getPredecessor(); } for (int i = list.size() - 1; i >= 0; i--) { token = list.get(i); if (includeHMMStates || (!(token.getSearchState() instanceof HMMSearchState))) { System.out.println(" " + token); } } System.out.println(); } /** * Returns the string of words leading up to this token. * * @param wantFiller if true, filler words are added * @param wantPronunciations if true append [ phoneme phoneme ... ] after each word * @return the word path */ public String getWordPath(boolean wantFiller, boolean wantPronunciations) { StringBuilder sb = new StringBuilder(); Token token = this; while (token != null) { if (token.isWord()) { WordSearchState wordState = (WordSearchState) token.getSearchState(); Pronunciation pron = wordState.getPronunciation(); Word word = wordState.getPronunciation().getWord(); // System.out.println(token.getFrameNumber() + " " + word + " " + token.logLanguageScore + " " + token.logAcousticScore); if (wantFiller || !word.isFiller()) { if (wantPronunciations) { sb.insert(0, ']'); Unit[] u = pron.getUnits(); for (int i = u.length - 1; i >= 0; i--) { if (i < u.length - 1) sb.insert(0, ','); sb.insert(0, u[i].getName()); } sb.insert(0, '['); } sb.insert(0, word.getSpelling()); sb.insert(0, ' '); } } token = token.getPredecessor(); } return sb.toString().trim(); } /** * Returns the string of words for this token, with no embedded filler words * * @return the string of words */ public String getWordPathNoFiller() { return getWordPath(false, false); } /** * Returns the string of words for this token, with embedded silences * * @return the string of words */ public String getWordPath() { return getWordPath(true, false); } /** * Returns the string of words and units for this token, with embedded silences. * * @return the string of words and units */ public String getWordUnitPath() { StringBuilder sb = new StringBuilder(); Token token = this; while (token != null) { SearchState searchState = token.getSearchState(); if (searchState instanceof WordSearchState) { WordSearchState wordState = (WordSearchState) searchState; Word word = wordState.getPronunciation().getWord(); sb.insert(0, ' ' + word.getSpelling()); } else if (searchState instanceof UnitSearchState) { UnitSearchState unitState = (UnitSearchState) searchState; Unit unit = unitState.getUnit(); sb.insert(0, ' ' + unit.getName()); } token = token.getPredecessor(); } return sb.toString().trim(); } /** * Returns the word of this Token, the search state is a WordSearchState. If the search state is not a * WordSearchState, return null. * * @return the word of this Token, or null if this is not a word token */ public Word getWord() { if (isWord()) { WordSearchState wordState = (WordSearchState) searchState; return wordState.getPronunciation().getWord(); } else { return null; } } /** Shows the token count */ public static void showCount() { System.out.println("Cur count: " + curCount + " new " + (curCount - lastCount)); lastCount = curCount; } /** * Determines if this branch is valid * * @return true if the token and its predecessors are valid */ public boolean validate() { return true; } /** * Return the DecimalFormat object for formatting the print out of scores. * * @return the DecimalFormat object for formatting score print outs */ protected static DecimalFormat getScoreFormat() { return scoreFmt; } /** * Return the DecimalFormat object for formatting the print out of numbers * * @return the DecimalFormat object for formatting number print outs */ protected static DecimalFormat getNumberFormat() { return numFmt; } public void update(Token predecessor, SearchState nextState, float logEntryScore, float insertionProbability, float languageProbability, long collectTime) { this.predecessor = predecessor; this.searchState = nextState; this.logTotalScore = logEntryScore; this.logInsertionScore = insertionProbability; this.logLanguageScore = languageProbability; this.collectTime = collectTime; } }