/*
*
* Copyright 1999-2004 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electronic Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.decoder.search;
import edu.cmu.sphinx.decoder.scorer.Scoreable;
import edu.cmu.sphinx.linguist.WordSearchState;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Integer;
import java.util.*;
/**
* A factory for WordActiveList. The word active list is active list designed to hold word tokens only. In addition to
* the usual active list properties such as absolute and relative beams, the word active list allows restricting the
* number of copies of any particular word in the word beam. Also the word active list can restrict the number of
* fillers in the beam.
*/
public class WordActiveListFactory extends ActiveListFactory {
/** property that sets the max paths for a single word. (zero disables this feature) */
@S4Integer(defaultValue = 0)
public final static String PROP_MAX_PATHS_PER_WORD = "maxPathsPerWord";
/** property that sets the max filler words allowed in the beam. (zero disables this feature) */
@S4Integer(defaultValue = 1)
public final static String PROP_MAX_FILLER_WORDS = "maxFillerWords";
private int maxPathsPerWord;
private int maxFiller;
/**
* Create factory for word active list
* @param absoluteBeamWidth beam for absolute pruning
* @param relativeBeamWidth beam for relative pruning
* @param maxPathsPerWord maximum number of path to keep per word
* @param maxFiller maximum number of fillers
*/
public WordActiveListFactory(int absoluteBeamWidth,
double relativeBeamWidth, int maxPathsPerWord, int maxFiller )
{
super(absoluteBeamWidth, relativeBeamWidth);
this.maxPathsPerWord = maxPathsPerWord;
this.maxFiller = maxFiller;
}
public WordActiveListFactory() {
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
*/
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
maxPathsPerWord = ps.getInt(PROP_MAX_PATHS_PER_WORD);
maxFiller = ps.getInt(PROP_MAX_FILLER_WORDS);
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
*/
@Override
public ActiveList newInstance() {
return new WordActiveList();
}
/**
* An active list that manages words. Guarantees only one version of a word.
* <p>
* <p>
* Note that all scores are maintained in the LogMath log domain
*/
class WordActiveList implements ActiveList {
private Token bestToken;
private List<Token> tokenList = new LinkedList<Token>();
/**
* Adds the given token to the list
*
* @param token the token to add
*/
public void add(Token token) {
tokenList.add(token);
if (bestToken == null || token.getScore() > bestToken.getScore()) {
bestToken = token;
}
}
/**
* Replaces an old token with a new token
*
* @param oldToken the token to replace (or null in which case, replace works like add).
* @param newToken the new token to be placed in the list.
*/
public void replace(Token oldToken, Token newToken) {
add(newToken);
if (oldToken != null) {
tokenList.remove(oldToken);
}
}
/**
* Purges excess members. Remove all nodes that fall below the relativeBeamWidth
*
* @return a (possible new) active list
*/
public ActiveList purge() {
int fillerCount = 0;
Map<Word, Integer> countMap = new HashMap<Word, Integer>();
Collections.sort(tokenList, Scoreable.COMPARATOR);
// remove word duplicates
for (Iterator<Token> i = tokenList.iterator(); i.hasNext();) {
Token token = i.next();
WordSearchState wordState = (WordSearchState)token.getSearchState();
Word word = wordState.getPronunciation().getWord();
// only allow maxFiller words
if (maxFiller > 0) {
if (word.isFiller()) {
if (fillerCount < maxFiller) {
fillerCount++;
} else {
i.remove();
continue;
}
}
}
if (maxPathsPerWord > 0) {
Integer count = countMap.get(word);
int c = count == null ? 0 : count;
// Since the tokens are sorted by score we only
// keep the n tokens for a particular word
if (c < maxPathsPerWord - 1) {
countMap.put(word, c + 1);
} else {
i.remove();
}
}
}
if (tokenList.size() > absoluteBeamWidth) {
tokenList = tokenList.subList(0, absoluteBeamWidth);
}
return this;
}
/**
* Retrieves the iterator for this tree.
*
* @return the iterator for this token list
*/
public Iterator<Token> iterator() {
return tokenList.iterator();
}
/**
* Gets the set of all tokens
*
* @return the set of tokens
*/
public List<Token> getTokens() {
return tokenList;
}
/**
* Returns the number of tokens on this active list
*
* @return the size of the active list
*/
public final int size() {
return tokenList.size();
}
/**
* gets the beam threshold best upon the best scoring token
*
* @return the beam threshold
*/
public float getBeamThreshold() {
return getBestScore() + logRelativeBeamWidth;
}
/**
* gets the best score in the list
*
* @return the best score
*/
public float getBestScore() {
float bestScore = -Float.MAX_VALUE;
if (bestToken != null) {
bestScore = bestToken.getScore();
}
return bestScore;
}
/**
* Sets the best scoring token for this active list
*
* @param token the best scoring token
*/
public void setBestToken(Token token) {
bestToken = token;
}
/**
* Gets the best scoring token for this active list
*
* @return the best scoring token
*/
public Token getBestToken() {
return bestToken;
}
/* (non-Javadoc)
* @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
*/
public ActiveList newInstance() {
return WordActiveListFactory.this.newInstance();
}
}
}