/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.language.grammar; import edu.cmu.sphinx.linguist.dictionary.Dictionary; import edu.cmu.sphinx.linguist.dictionary.Word; import edu.cmu.sphinx.util.ExtendedStreamTokenizer; import edu.cmu.sphinx.util.LogMath; import edu.cmu.sphinx.util.props.*; import java.io.IOException; import java.util.*; /** * Loads a grammar from a file representing a finite-state transducer (FST) in the 'ARPA' grammar format. The ARPA FST * format is like so (the explanation of the format is below): <br> * <pre> * I 2 * F 0 2.30259 * T 0 1 <unknown> <unknown> 2.30259 * T 0 4 wood wood 1.60951 * T 0 5 cindy cindy 1.60951 * T 0 6 pittsburgh pittsburgh 1.60951 * T 0 7 jean jean 1.60951 * F 1 2.89031 * T 1 0 , , 0.587725 * T 1 4 wood wood 0.58785 * F 2 3.00808 * T 2 0 , , 0.705491 * T 2 1 <unknown> <unknown> 0.58785 * F 3 2.30259 * T 3 0 * F 4 2.89031 * T 4 0 , , 0.587725 * T 4 6 pittsburgh pittsburgh 0.58785 * F 5 2.89031 * T 5 0 , , 0.587725 * T 5 7 jean jean 0.58785 * F 6 2.89031 * T 6 0 , , 0.587725 * T 6 5 cindy cindy 0.58785 * F 7 1.28093 * T 7 0 , , 0.454282 * T 7 4 wood wood 1.28093 * </pre> * <b>Key: </b> * <pre> * I - initial node, so "I 2" means node 2 is the initial node * F - final node, e.g., "F 0 2.30259" means that node 0 is a final node, * and the probability of finishing at node 0 is 2.30259 (in -ln) * T - transition, "T 0 4 wood wood 1.60951" means "transitioning from * node 0 to node 4, the output is wood and the machine is now * in the node wood, and the probability associated with the * transition is 1.60951 (in -ln)". "T 6 0 , , 0.587725" is * a backoff transition, and the output is null (epsilon in * the picture), and the machine is now in the null node. * </pre> * Probabilities read in from the FST file are in negative natural log format and are converted to the internal logMath * log base. * <p> * As the FST file is read in, a Grammar object that is structurally equivalent to the FST is created. The steps of * converting the FST file to a Grammar object are: * <ol> * <li><b>Create all the Grammar nodes </b> <br> Go through the entire FST file and for each word transition, take the * destination node ID and create a grammar node using that ID. These nodes are kept in a hashmap to make sure they * are created once for each ID. Therefore, we get one word per grammar node.</li> * <li><b>Create an end node for each Grammar node </b> <br> This is end node is used for backoff transitions into the * Grammar node, so that it will not go through the word itself, but instead go directly to the end of the word. * Moreover, we also add an <b>optional </b> silence node between the grammar node and its end node. The result of this * step on each grammar node (show in Figure 1 below as the circle with "word") is as follows. The end node is the empty * circle at the far right: <br> <img alt="Fst end node" src="doc-files/fst-end-node.jpg"> <br> <b>Figure 1: Addition of end node and the * <i>optional </i> silence. </b> </li> * <li><b>Create the transitions </b> <br> Read through the entire FST file, and for each line indicating a transition, * connect up the corresponding Grammar nodes. Backoff transitions and null transitions (i.e., the ones that do not * output a word) will be linked to the end node of a grammar node.</li> * </ol> */ public class FSTGrammar extends Grammar { /** The property for the location of the FST n-gram file. */ @S4String(defaultValue = "default.arpa_gram") public final static String PROP_PATH = "path"; // TODO: If this property turns out to be worthwhile, turn this // into a full fledged property private boolean addInitialSilenceNode; // TODO: If this property turns out to be worthwhile, turn this // into a full fledged property // ------------------------------ // Configuration data // ------------------------------- private boolean addOptionalSilence; private final boolean ignoreUnknownTransitions = true; private String path; private LogMath logMath; private final Map<String, GrammarNode> nodes = new HashMap<String, GrammarNode>(); private final Set<GrammarNode> expandedNodes = new HashSet<GrammarNode>(); /** * Create class from reference text (not implemented). * * @param bogusText dummy variable */ @Override protected GrammarNode createGrammar(String bogusText) throws NoSuchMethodException { throw new NoSuchMethodException("Does not create " + "grammar with reference text"); } public FSTGrammar(String path, boolean showGrammar, boolean optimizeGrammar, boolean addSilenceWords, boolean addFillerWords, Dictionary dictionary) { super(showGrammar,optimizeGrammar,addSilenceWords,addFillerWords,dictionary); logMath = LogMath.getLogMath(); this.path = path; } public FSTGrammar() { } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ @Override public void newProperties(PropertySheet ps) throws PropertyException { super.newProperties(ps); logMath = LogMath.getLogMath(); path = ps.getString(PROP_PATH); } /** * Creates the grammar. * * @return the initial node for the grammar. */ @Override protected GrammarNode createGrammar() throws IOException { GrammarNode initialNode = null; GrammarNode finalNode = null; // first pass create the FST nodes int maxNodeId = createNodes(path); // create the final node: finalNode = createGrammarNode(++maxNodeId, Dictionary.SILENCE_SPELLING); finalNode.setFinalNode(true); // replace each word node with a pair of nodes, which // consists of the word node and a new dummy end node, which is // for adding null or backoff transitions maxNodeId = expandWordNodes(maxNodeId); ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true); // Second pass, add all of the arcs while (!tok.isEOF()) { String token; tok.skipwhite(); token = tok.getString(); // System.out.println(token); if (token == null) { break; } else if (token.equals("I")) { assert initialNode == null; int initialID = tok.getInt("initial ID"); String nodeName = "G" + initialID; // TODO: FlatLinguist requires the initial grammar node // to contain a single silence. We'll do that for now, // but once the FlatLinguist is fixed, this should be // returned to its former method of creating an empty // initial grammar node // initialNode = createGrammarNode(initialID, false); initialNode = createGrammarNode(initialID, Dictionary.SILENCE_SPELLING); nodes.put(nodeName, initialNode); // optionally add a silence node if (addInitialSilenceNode) { GrammarNode silenceNode = createGrammarNode(++maxNodeId, Dictionary.SILENCE_SPELLING); initialNode.add(silenceNode, LogMath.LOG_ONE); silenceNode.add(initialNode, LogMath.LOG_ONE); } } else if (token.equals("T")) { int thisID = tok.getInt("this id"); int nextID = tok.getInt("next id"); GrammarNode thisNode = get(thisID); GrammarNode nextNode = get(nextID); // if the source node is an FSTGrammarNode, we want // to join the endNode to the destination node if (hasEndNode(thisNode)) { thisNode = getEndNode(thisNode); } float lnProb = 0f; // negative natural log String output = tok.getString(); if (output == null || output.equals(",")) { // these are epsilon (meaning backoff) transitions if (output != null && output.equals(",")) { tok.getString(); // skip the word lnProb = tok.getFloat("probability"); } // if the destination node has been expanded // we actually want to add the backoff transition // to the endNode if (hasEndNode(nextNode)) { nextNode = getEndNode(nextNode); } } else { String word = tok.getString(); // skip words lnProb = tok.getFloat("probability"); if (ignoreUnknownTransitions && word.equals("<unknown>")) { continue; } /* * System.out.println(nextNode + ": " + output); */ assert hasWord(nextNode); } thisNode.add(nextNode, convertProbability(lnProb)); } else if (token.equals("F")) { int thisID = tok.getInt("this id"); float lnProb = tok.getFloat("probability"); GrammarNode thisNode = get(thisID); GrammarNode nextNode = finalNode; if (hasEndNode(thisNode)) { thisNode = getEndNode(thisNode); } thisNode.add(nextNode, convertProbability(lnProb)); } } tok.close(); assert initialNode != null; return initialNode; } /** * Reads the FST file in the given path, and creates the nodes in the FST file. * * @param path the path of the FST file to read * @return the highest ID of all nodes * @throws java.io.IOException */ private int createNodes(String path) throws IOException { ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true); int maxNodeId = 0; while (!tok.isEOF()) { tok.skipwhite(); String token = tok.getString(); if (token == null) { break; } else if (token.equals("T")) { tok.getInt("src id"); // toss source node int id = tok.getInt("dest id"); // dest node numb if (id > maxNodeId) { maxNodeId = id; } String word1 = tok.getString(); // get word if (word1 == null) { continue; } String word2 = tok.getString(); // get word tok.getString(); // toss probability String nodeName = "G" + id; GrammarNode node = nodes.get(nodeName); if (node == null) { if (word2.equals(",")) { node = createGrammarNode(id, false); } else { node = createGrammarNode(id, word2); } nodes.put(nodeName, node); } else { if (!word2.equals(",")) { /* * if (!word2.equals(getWord(node))) { * System.out.println(node + ": " + word2 + ' ' + getWord(node)); } */ assert (word2.equals(getWord(node))); } } } } tok.close(); return maxNodeId; } /** * Expand each of the word nodes into a pair of nodes, as well as adding an optional silence node between the * grammar node and its end node. * * @param maxNodeID the node ID to start with for the new nodes * @return the last (or maximum) node ID */ private int expandWordNodes(int maxNodeID) { Collection<GrammarNode> allNodes = nodes.values(); String[][] silence = {{Dictionary.SILENCE_SPELLING}}; for (GrammarNode node :allNodes) { // if it has at least one word, then expand the node if (node.getNumAlternatives() > 0) { GrammarNode endNode = createGrammarNode(++maxNodeID, false); node.add(endNode, LogMath.LOG_ONE); // add an optional silence if (addOptionalSilence) { GrammarNode silenceNode = createGrammarNode(++maxNodeID, silence); node.add(silenceNode, LogMath.LOG_ONE); silenceNode.add(endNode, LogMath.LOG_ONE); } expandedNodes.add(node); } } return maxNodeID; } /** * Converts the probability from -ln to logmath * * @param lnProb the probability to convert. Probabilities in the arpa format in negative natural log format. We * convert them to logmath. * @return the converted probability in logMath log base */ private float convertProbability(float lnProb) { return logMath.lnToLog(-lnProb); } /** * Given an id returns the associated grammar node * * @param id the id of interest * @return the grammar node or null if none could be found with the proper id */ private GrammarNode get(int id) { String name = "G" + id; GrammarNode grammarNode = nodes.get(name); if (grammarNode == null) { grammarNode = createGrammarNode(id, false); nodes.put(name, grammarNode); } return grammarNode; } /** * Determines if the node has a word * * @param node the grammar node of interest * @return true if the node has a word */ private boolean hasWord(GrammarNode node) { return (node.getNumAlternatives() > 0); } /** * Gets the word from the given grammar ndoe * * @param node the node of interest * @return the word (or null if the node has no word) */ private String getWord(GrammarNode node) { String word = null; if (node.getNumAlternatives() > 0) { Word[][] alternatives = node.getAlternatives(); word = alternatives[0][0].getSpelling(); } return word; } /** * Determines if the given node has an end node associated with it. * * @param node the node of interest * @return <code>true</code> if the given node has an end node. */ private boolean hasEndNode(GrammarNode node) { return (expandedNodes.contains(node)); } /** * Retrieves the end node associated with the given node * * @param node the node of interest * @return the ending node or null if no end node is available */ private GrammarNode getEndNode(GrammarNode node) { GrammarArc[] arcs = node.getSuccessors(); assert arcs != null && arcs.length > 0; return arcs[0].getGrammarNode(); } }