/** * Created on Jan 19, 2005 */ package edu.cmu.sphinx.linguist.language.classes; import java.io.IOException; import java.util.HashSet; import java.util.Set; import edu.cmu.sphinx.linguist.WordSequence; import edu.cmu.sphinx.linguist.dictionary.Word; import edu.cmu.sphinx.linguist.language.ngram.LanguageModel; import edu.cmu.sphinx.util.props.PropertyException; import edu.cmu.sphinx.util.props.PropertySheet; import edu.cmu.sphinx.util.props.S4Component; /** * An LM that computes a probability of a word sequence by converting words to * classes and asking the class-based probability from a delegate LM. * * @author Tanel Alumae */ public class ClassBasedLanguageModel implements LanguageModel { /** * The property that defines the classLanguageModel component. */ @S4Component(type = LanguageModel.class) public final static String PROP_CLASS_LANGUAGE_MODEL = "classLanguageModel"; /** * The property that defines the classMap component. */ @S4Component(type = ClassMap.class) public final static String PROP_CLASS_MAP = "classMap"; // ---------------------------- // Configuration data // ---------------------------- private LanguageModel classLM; private Set<String> vocabulary; private boolean allocated = false; private ClassMap classMap; public ClassBasedLanguageModel(ClassMap classMap, LanguageModel classLM) { this.classMap = classMap; this.classLM = classLM; } public ClassBasedLanguageModel() { } /* * (non-Javadoc) * @see * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx. * util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { if (allocated) { throw new PropertyException( ClassBasedLanguageModel.class.getName(), null, "Can't change properties after allocation"); } classMap = (ClassMap) ps.getComponent(PROP_CLASS_MAP); classLM = (LanguageModel) ps.getComponent(PROP_CLASS_LANGUAGE_MODEL); } /* * (non-Javadoc) * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate() */ public void allocate() throws IOException { if (!allocated) { allocated = true; classMap.allocate(); classLM.allocate(); makeVocabulary(); } } /* * (non-Javadoc) * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#deallocate() */ public void deallocate() throws IOException { allocated = false; classLM.deallocate(); classMap.deallocate(); vocabulary = null; } /** * Actual implementation of the class-based LM: P=P(W|C1)*P(C1|C2,C3..) * @see * edu.cmu.sphinx.linguist.language.ngram.LanguageModel#getProbability( * edu.cmu.sphinx.linguist.WordSequence) */ public float getProbability(WordSequence wordSequence) { Word[] classes = new Word[wordSequence.size()]; float wordToClassProb = 0; for (int i = 0; i < classes.length; i++) { Word sourceWord = wordSequence.getWord(i); ClassProbability classProbability = classMap.getClassProbability(sourceWord.getSpelling()); classes[i] = (classProbability == null ? sourceWord : classMap .getClassAsWord(classProbability.getClassName())); if (i == classes.length - 1) { if (classProbability != null) { // the first word of the word sequence is a class wordToClassProb = classProbability.getLogProbability(); } } } float classBasedProbability = classLM.getProbability(new WordSequence(classes)); return classBasedProbability + wordToClassProb; } /** * Gets the smear term for the given wordSequence * * @param wordSequence the word sequence * @return the smear term associated with this word sequence */ public float getSmear(WordSequence wordSequence) { return 0.0f; // TODO not implemented } /* * (non-Javadoc) * @see * edu.cmu.sphinx.linguist.language.ngram.LanguageModel#getVocabulary() */ public Set<String> getVocabulary() { return vocabulary; } /** * Returns the maximum depth of the language model * * @return the maximum depth of the language mdoel */ public int getMaxDepth() { return classLM.getMaxDepth(); } @Override public void onUtteranceEnd() { //TODO not implemented } /** * Converts a vocabulary of the class LM to a word vocabulary. */ private void makeVocabulary() { vocabulary = new HashSet<String>(); for (String name : classLM.getVocabulary()) { Set<String> wordsInClass = classMap.getWordsInClass(name); if (wordsInClass == null) { // 'name' not a class vocabulary.add(name); } else { vocabulary.addAll(wordsInClass); } } } }