/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.linguist.language.grammar;
import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;
import edu.cmu.sphinx.util.TimerPool;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/**
* Defines a simple grammar based upon a language model. It generates one {@link GrammarNode grammar node}per word. This
* grammar can deal with unigram and bigram grammars of up to 1000 or so words. Note that all probabilities are in the
* log math domain.
*/
public class LMGrammar extends Grammar {
/** The property for the language model to be used by this grammar */
@S4Component(type = LanguageModel.class)
public final static String PROP_LANGUAGE_MODEL = "languageModel";
// ------------------------
// Configuration data
// ------------------------
private LanguageModel languageModel;
public LMGrammar(LanguageModel languageModel, boolean showGrammar, boolean optimizeGrammar, boolean addSilenceWords, boolean addFillerWords, Dictionary dictionary) {
super(showGrammar,optimizeGrammar,addSilenceWords,addFillerWords,dictionary);
this.languageModel = languageModel;
}
public LMGrammar() {
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
*/
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
languageModel = (LanguageModel) ps.getComponent(PROP_LANGUAGE_MODEL);
}
/**
* Creates the grammar from the language model. This Grammar contains one word per grammar node. Each word (and
* grammar node) is connected to all other words with the given probability
*
* @return the initial grammar node
*/
@Override
protected GrammarNode createGrammar() throws IOException {
languageModel.allocate();
TimerPool.getTimer(this,"LMGrammar.create").start();
GrammarNode firstNode = null;
if (languageModel.getMaxDepth() > 2) {
System.out.println("Warning: LMGrammar limited to bigrams");
}
List<GrammarNode> nodes = new ArrayList<GrammarNode>();
Set<String> words = languageModel.getVocabulary();
// create all of the word nodes
for (String word : words) {
GrammarNode node = createGrammarNode(word);
if (node != null && !node.isEmpty()) {
if (node.getWord().equals(
getDictionary().getSentenceStartWord())) {
firstNode = node;
} else if (node.getWord().equals(
getDictionary().getSentenceEndWord())) {
node.setFinalNode(true);
}
nodes.add(node);
}
}
if (firstNode == null) {
throw new Error("No sentence start found in language model");
}
for (GrammarNode prevNode : nodes) {
// don't add any branches out of the final node
if (prevNode.isFinalNode()) {
continue;
}
for (GrammarNode nextNode : nodes) {
String prevWord = prevNode.getWord().getSpelling();
String nextWord = nextNode.getWord().getSpelling();
Word[] wordArray = {getDictionary().getWord(prevWord),
getDictionary().getWord(nextWord)};
float logProbability = languageModel
.getProbability((new WordSequence(wordArray)));
prevNode.add(nextNode, logProbability);
}
}
TimerPool.getTimer(this,"LMGrammar.create").stop();
languageModel.deallocate();
return firstNode;
}
}