/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.result;
import static java.util.Collections.emptyList;
import static java.util.Collections.reverse;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import edu.cmu.sphinx.decoder.search.ActiveList;
import edu.cmu.sphinx.decoder.search.AlternateHypothesisManager;
import edu.cmu.sphinx.decoder.search.Token;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.TimeFrame;
/**
* Provides recognition results. Results can be partial or final. A result
* should not be modified before it is a final result. Note that a result may
* not contain all possible information.
* <p>
* The following methods are not yet defined but should be:
*
* <pre>
* public Result getDAG(int compressionLevel);
* </pre>
*/
public class Result {
private final ActiveList activeList;
private final List<Token> resultList;
private AlternateHypothesisManager alternateHypothesisManager;
private boolean isFinal;
private boolean wordTokenFirst;
private final long currentCollectTime;
private String reference;
private final LogMath logMath;
private final boolean toCreateLattice;
/**
* Creates a result
*
* @param alternateHypothesisManager hypothesis manager
* @param activeList
* the active list associated with this result
* @param resultList
* the result list associated with this result
* @param collectTime
* token time in a stream.
* @param isFinal
* if true, the result is a final result
* @param wordTokenFirst if word token goes first.
* @param toCreateLattice create lattice or not.
*/
public Result(AlternateHypothesisManager alternateHypothesisManager, ActiveList activeList, List<Token> resultList,
long collectTime, boolean isFinal, boolean wordTokenFirst, boolean toCreateLattice) {
this(activeList, resultList, collectTime, isFinal, wordTokenFirst, toCreateLattice);
this.alternateHypothesisManager = alternateHypothesisManager;
}
/**
* Creates a result
*
* @param activeList
* the active list associated with this result
* @param resultList
* the result list associated with this result
* @param collectTime
* token collect time in a stream.
* @param isFinal
* if true, the result is a final result. This means that the
* last frame in the speech segment has been decoded.
* @param wordTokenFirst if word token goes first.
* @param toCreateLattice create lattice or not.
*/
public Result(ActiveList activeList, List<Token> resultList, long collectTime, boolean isFinal, boolean wordTokenFirst, boolean toCreateLattice) {
this.activeList = activeList;
this.resultList = resultList;
this.currentCollectTime = collectTime;
this.isFinal = isFinal;
this.toCreateLattice = toCreateLattice;
this.wordTokenFirst = wordTokenFirst;
logMath = LogMath.getLogMath();
}
/**
* Determines if the result is a final result. A final result is guaranteed
* to no longer be modified by the SearchManager that generated it.
* Non-final results can be modifed by a
* <code>SearchManager.recognize</code> calls.
*
* @return true if the result is a final result
*/
public boolean isFinal() {
return isFinal;
}
/**
* Checks if it justified to build lattice for this result
*
* @return true if lattice created from this result can provide confidence
* scores and n-best list
*/
public boolean toCreateLattice() {
return toCreateLattice;
}
/**
* Returns the log math used for this Result.
*
* @return the log math used
*/
public LogMath getLogMath() {
return logMath;
}
/**
* Returns a list of active tokens for this result. The list contains zero
* or active <code>Token</code> objects that represents the leaf nodes of
* all active branches in the result (sometimes referred to as the
* 'lattice').
* <p>
* The lattice is live and may be modified by a SearchManager during a
* recognition. Once the Result is final, the lattice is fixed and will no
* longer be modified by the SearchManager. Applications can modify the
* lattice (to prepare for a re-recognition, for example) only after
* <code>isFinal</code> returns <code>true</code>
*
* @return a list containing the active tokens for this result
* @see Token
*/
public ActiveList getActiveTokens() {
return activeList;
}
/**
* Returns a list of result tokens for this result. The list contains zero
* or more result <code>Token</code> objects that represents the leaf nodes
* of all final branches in the result (sometimes referred to as the
* 'lattice').
* <p>
* The lattice is live and may be modified by a SearchManager during a
* recognition. Once the Result is final, the lattice is fixed and will no
* longer be modified by the SearchManager. Applications can modify the
* lattice (to prepare for a re-recognition, for example) only after
* <code>isFinal</code> returns <code>true</code>
*
* @return a list containing the final result tokens for this result
* @see Token
*/
public List<Token> getResultTokens() {
return resultList;
}
/**
* Returns the AlternateHypothesisManager Used to construct a Lattice
*
* @return the AlternateHypothesisManager
*/
public AlternateHypothesisManager getAlternateHypothesisManager() {
return alternateHypothesisManager;
}
/**
* Returns the current frame number
*
* @return the frame number
*/
public long getCollectTime() {
return currentCollectTime;
}
/**
* Returns the best scoring final token in the result. A final token is a
* token that has reached a final state in the current frame.
*
* @return the best scoring final token or null
*/
public Token getBestFinalToken() {
Token bestToken = null;
for (Token token : resultList) {
if (bestToken == null || token.getScore() > bestToken.getScore()) {
bestToken = token;
}
}
return bestToken;
}
/**
* Returns the best scoring token in the result. First, the best final token
* is retrieved. A final token is one that has reached the final state in
* the search space. If no final tokens can be found, then the best,
* non-final token is returned.
*
* @return the best scoring token or null
*/
public Token getBestToken() {
Token bestToken = getBestFinalToken();
if (bestToken == null) {
bestToken = getBestActiveToken();
}
return bestToken;
}
/**
* Returns the best scoring token in the active set
*
* @return the best scoring token or null
*/
public Token getBestActiveToken() {
Token bestToken = null;
if (activeList != null) {
for (Token token : activeList) {
if (bestToken == null || token.getScore() > bestToken.getScore()) {
bestToken = token;
}
}
}
return bestToken;
}
/**
* Searches through the n-best list to find the the branch that matches the
* given string
*
* @param text
* the string to search for
* @return the token at the head of the branch or null
*/
public Token findToken(String text) {
text = text.trim();
for (Token token : resultList) {
if (text.equals(token.getWordPathNoFiller())) {
return token;
}
}
return null;
}
/**
* Searches through the n-best list to find the the branch that matches the
* beginning of the given string
*
* @param text
* the string to search for
* @return the list token at the head of the branch
*/
public List<Token> findPartialMatchingTokens(String text) {
List<Token> list = new ArrayList<Token>();
text = text.trim();
for (Token token : activeList) {
if (text.startsWith(token.getWordPathNoFiller())) {
list.add(token);
}
}
return list;
}
/**
* Returns the best scoring token that matches the beginning of the given
* text.
*
* @param text
* the text to match
* @return best token
*/
public Token getBestActiveParitalMatchingToken(String text) {
List<Token> matchingList = findPartialMatchingTokens(text);
Token bestToken = null;
for (Token token : matchingList) {
if (bestToken == null || token.getScore() > bestToken.getScore()) {
bestToken = token;
}
}
return bestToken;
}
/**
* Returns detailed frame statistics for this result
*
* @return frame statistics for this result as an array, with one element
* per frame or <code>null</code> if no frame statistics are
* available.
*/
public FrameStatistics[] getFrameStatistics() {
return null; // [[[ TBD: write me ]]]
}
/**
* Gets the starting frame number for the result. Note that this method is
* currently not implemented, and always returns zero.
*
* @return the starting frame number for the result
*/
public int getStartFrame() {
return 0;
}
/**
* Gets the ending frame number for the result. Note that this method is
* currently not implemented, and always returns zero.
*
* @return the ending frame number for the result
*/
public int getEndFrame() {
return 0; // [[[ TBD: write me ]]]
}
/**
* Gets the feature frames associated with this result
*
* @return the set of feature frames associated with this result, or null if
* the frames are not available.
*/
public List<Data> getDataFrames() {
// find the best token, and then trace back for all the features
Token token = getBestToken();
if (token == null)
return null;
List<Data> featureList = new LinkedList<Data>();
do {
Data feature = token.getData();
if (feature != null)
featureList.add(0, feature);
token = token.getPredecessor();
} while (token != null);
return featureList;
}
/**
* Returns the string of the best result, removing any filler words. This
* method first attempts to return the best final result, that is, the
* result that has reached the final state of the search space. If there are
* no best final results, then the best non-final result, that is, the one
* that did not reach the final state, is returned.
*
* @return the string of the best result, removing any filler words
*/
public String getBestResultNoFiller() {
Token token = getBestToken();
if (token == null) {
return "";
} else {
return token.getWordPathNoFiller();
}
}
/**
* Returns the string of the best final result, removing any filler words. A
* final result is a path that has reached the final state. A Result object
* can also contain paths that did not reach the final state, and those
* paths are not returned by this method.
*
* @return the string of the best result, removing any filler words, or null
* if there are no best results
*/
public String getBestFinalResultNoFiller() {
Token token = getBestFinalToken();
if (token == null) {
return "";
} else {
return token.getWordPathNoFiller();
}
}
/**
* The method is used when the application wants the phonemes on the best
* final path. Note that words may have more than one pronunciation, so this
* is not equivalent to the word path e.g. one[HH,W,AH,N] to[T,UW]
* three[TH,R,IY]
*
* @return the String of words and associated phonemes on the best path
*/
public String getBestPronunciationResult() {
Token token = getBestFinalToken();
if (token == null) {
return "";
} else {
return token.getWordPath(false, true);
}
}
/**
* Returns the string of words (with timestamp) for this token.
*
* @param withFillers
* true if we want filler words included, false otherwise
* @return the string of words
*/
public List<WordResult> getTimedBestResult(boolean withFillers) {
Token token = getBestToken();
if (token == null) {
return emptyList();
} else {
if (wordTokenFirst) {
return getTimedWordPath(token, withFillers);
} else {
return getTimedWordTokenLastPath(token, withFillers);
}
}
}
/**
* Returns the string of words (with timestamp) for this token. This method
* assumes that the word tokens come before other types of token.
*
* @param withFillers
* true if we want filler words, false otherwise
* @return list of word with timestamps
*/
private List<WordResult> getTimedWordPath(Token token, boolean withFillers) {
long prevWordEnd = -1;
List<WordResult> result = new ArrayList<WordResult>();
while (token != null) {
if (prevWordEnd < 0)
prevWordEnd = token.getCollectTime();
if (token.isWord()) {
Word word = token.getWord();
if (withFillers || !word.isFiller()) {
TimeFrame timeFrame = new TimeFrame(token.getCollectTime(), prevWordEnd);
result.add(new WordResult(word, timeFrame, token.getScore(), 1.));
}
prevWordEnd = token.getCollectTime();
}
token = token.getPredecessor();
}
reverse(result);
return result;
}
/**
* Returns the string of words for this token, each with the starting sample
* number as the timestamp. This method assumes that the word tokens come
* after the unit and HMM tokens.
*
* @return the string of words, each with the starting sample number
*/
private List<WordResult> getTimedWordTokenLastPath(Token token, boolean withFillers) {
long lastWordEnd = -1;
long lastWordStart = -1;
Word word = null;
List<WordResult> result = new ArrayList<WordResult>();
while (token != null) {
if (token.isWord()) {
if (word != null && lastWordEnd >= 0) {
if (withFillers || !word.isFiller()) {
TimeFrame timeFrame = new TimeFrame(lastWordStart, lastWordEnd);
result.add(new WordResult(word, timeFrame, token.getScore(), 1.));
}
}
lastWordEnd = token.getCollectTime();
word = token.getWord();
}
lastWordStart = token.getCollectTime();
token = token.getPredecessor();
}
reverse(result);
return result;
}
/** Returns a string representation of this object */
@Override
public String toString() {
Token token = getBestToken();
if (token == null) {
return "";
} else {
return token.getWordPath();
}
}
/**
* Sets the results as a final result
*
* @param finalResult
* if true, the result should be made final
*/
void setFinal(boolean finalResult) {
this.isFinal = finalResult;
}
/**
* Determines if the Result is valid. This is used for testing and debugging
*
* @return true if the result is properly formed.
*/
public boolean validate() {
boolean valid = true;
for (Token token : activeList) {
if (!token.validate()) {
valid = false;
token.dumpTokenPath();
}
}
return valid;
}
/**
* Sets the reference text
*
* @param ref
* the reference text
*/
public void setReferenceText(String ref) {
reference = ref;
}
/**
* Retrieves the reference text. The reference text is a transcript of the
* text that was spoken.
*
* @return the reference text or null if no reference text exists.
*/
public String getReferenceText() {
return reference;
}
/**
* Getter for wordTokenFirst flag
*
* @return true if word tokens goes first, before data tokens
*/
public boolean getWordTokenFirst() {
return wordTokenFirst;
}
}