/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.language.grammar; import edu.cmu.sphinx.util.ExtendedStreamTokenizer; import edu.cmu.sphinx.util.LogMath; import edu.cmu.sphinx.util.props.*; import java.io.IOException; import java.util.*; /** * Defines a grammar based upon a list of words in a file. The format of the file is just one word per line. For * example, for an isolated digits grammar the file will simply look like: * <pre> * zero * one * two * three * four * five * six * seven * eight * nine * </pre> * The path to the file is defined by the {@link #PROP_PATH PROP_PATH} property. If the {@link #PROP_LOOP PROP_LOOP} * property is true, the grammar created will be a looping grammar. Using the above digits grammar example, setting * PROP_LOOP to true will make it a connected-digits grammar. * <p> * All probabilities are maintained in LogMath log base. */ public class SimpleWordListGrammar extends Grammar implements Configurable { /** The property that defines the location of the word list grammar */ @S4String(defaultValue = "spelling.gram") public final static String PROP_PATH = "path"; /** The property that if true, indicates that this is a looping grammar */ @S4Boolean(defaultValue = true) public final static String PROP_LOOP = "isLooping"; // --------------------- // Configurable data // --------------------- private String path; private boolean isLooping; private LogMath logMath; public SimpleWordListGrammar(String path, boolean isLooping, boolean showGrammar, boolean optimizeGrammar, boolean addSilenceWords, boolean addFillerWords, edu.cmu.sphinx.linguist.dictionary.Dictionary dictionary) { super(showGrammar,optimizeGrammar,addSilenceWords,addFillerWords,dictionary); this.path = path; this.isLooping = isLooping; logMath = LogMath.getLogMath(); } public SimpleWordListGrammar() { } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ @Override public void newProperties(PropertySheet ps) throws PropertyException { super.newProperties(ps); path = ps.getString(PROP_PATH); isLooping = ps.getBoolean(PROP_LOOP); logMath = LogMath.getLogMath(); } /** * Create class from reference text (not implemented). * * @param bogusText dummy variable */ @Override protected GrammarNode createGrammar(String bogusText) throws NoSuchMethodException { throw new NoSuchMethodException("Does not create " + "grammar with reference text"); } /** Creates the grammar. */ @Override protected GrammarNode createGrammar() throws IOException { ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true); GrammarNode initialNode = createGrammarNode("<sil>"); GrammarNode branchNode = createGrammarNode(false); GrammarNode finalNode = createGrammarNode("<sil>"); finalNode.setFinalNode(true); List<GrammarNode> wordGrammarNodes = new LinkedList<GrammarNode>(); while (!tok.isEOF()) { String word; while ((word = tok.getString()) != null) { GrammarNode wordNode = createGrammarNode(word); wordGrammarNodes.add(wordNode); } } // now connect all the GrammarNodes together initialNode.add(branchNode, LogMath.LOG_ONE); float branchScore = logMath.linearToLog( 1.0 / wordGrammarNodes.size()); for (GrammarNode wordNode : wordGrammarNodes) { branchNode.add(wordNode, branchScore); wordNode.add(finalNode, LogMath.LOG_ONE); if (isLooping) { wordNode.add(branchNode, LogMath.LOG_ONE); } } return initialNode; } }