/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.language.grammar; import edu.cmu.sphinx.linguist.dictionary.Word; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.*; /** * Represents a grammar node in a grammar. A {@link Grammar grammar} is represented as a graph of grammar nodes and * {@link GrammarArc arcs}. A grammar node usually represents a word or words, but it can also be a transition point or * simply silence. * <p> * Note that all probabilities are maintained in the LogMath log base */ public class GrammarNode { private final int identity; // the node id private boolean isFinal; // is this the final node? private Word[][] alternatives; // ordered words at this node private List<GrammarArc> arcList = new ArrayList<GrammarArc>(); // arcs to successors /** * Creates a GrammarNode with the given ID, Words. A GrammarNode with words is, by default, neither a silence nor a * final node. * * @param id the identity of this GrammarNode * @param alternatives the set of Words in this GrammarNode. This is a two dimensional array, the first index * corresponds to the set of alternative choices, the second index corresponds to a particular * word for the alternative */ public GrammarNode(int id, Word[][] alternatives) { this(id, false); this.alternatives = alternatives; } /** * Creates a GrammarNode with the given ID and silence or final attributes. A silence or final node does not have * any words by default. * * @param id the identity of this GrammarNode * @param isFinal if true this is a final node */ protected GrammarNode(int id, boolean isFinal) { this.identity = id; this.isFinal = isFinal; this.alternatives = new Word[0][0]; } /** * Returns the ID of this GrammarNode. * * @return the ID of this GrammarNode */ public int getID() { return identity; } /** * Retrieves the words associated with this grammar node * * @return the words associated with this grammar node */ public Word[][] getAlternatives() { return alternatives; } /** Optimize this grammar node. */ void optimize() { for (int i = 0; i < arcList.size(); i++) { GrammarArc arc = arcList.get(i); arcList.set(i, optimizeArc(arc)); } // now remove all self-looping empty arcs if (this.isEmpty()) { for (ListIterator<GrammarArc> i = arcList.listIterator(); i.hasNext();) { GrammarArc arc = i.next(); if (this == arc.getGrammarNode()) { i.remove(); } } } } /** * Optimize the given arc. If an arc branches to an empty node that has only one exit, the node can be bypassed by * making a new arc that skips the nodes. This can happen multiple times. * * @param arc the arc to optimize * @return the optimized arc */ GrammarArc optimizeArc(GrammarArc arc) { GrammarNode nextNode = arc.getGrammarNode(); while (nextNode.isEmpty() && nextNode.arcList.size() == 1) { GrammarArc nextArc = nextNode.arcList.get(0); arc = new GrammarArc(nextArc.getGrammarNode(), arc.getProbability() + nextArc.getProbability()); nextNode = arc.getGrammarNode(); } return arc; } /** * Retrieves the words associated with a specific alternative * * @param alternative the index of the alternative * @return the words associated with this grammar node */ public Word[] getWords(int alternative) { return alternatives[alternative]; } /** * Retrieve the single word associated with this grammar * * @return the word associated with this grammar node */ public Word getWord() { return alternatives[0][0]; } /** * Gets the number of alternatives * * @return the number of alternatives */ public int getNumAlternatives() { return alternatives.length; } /** * Determines if this grammar node is empty (that is, has no words). * * @return <code>true</code> if the node is empty, otherwise <code>false</code>. */ public boolean isEmpty() { return getNumAlternatives() == 0; } /** * Retrieves the set of transitions out of this node * * @return the transitions to the successors for this node. */ public GrammarArc[] getSuccessors() { return arcList.toArray(new GrammarArc[arcList.size()]); } /** * Determines if this grammar node is a final node in the grammar * * @return true if the node is a final node in the grammar */ public boolean isFinalNode() { return isFinal; } /** * Sets the 'final' state of the grammar node. A 'final' state grammar marks the end of a grammar * * @param isFinal if <code>true</code> the grammar node is a final node. */ public void setFinalNode(boolean isFinal) { this.isFinal = isFinal; } /** * Adds an arc to the given node * * @param node the node that this new arc goes to * @param logProbability the log probability of the transition occuring */ public void add(GrammarNode node, float logProbability) { // if we are an empty node, a loopback makes no sense. // this construct can be generated when dealing with recursive // grammars, so we check for them and toss them out. // if (isEmpty() && this == node) { return; } arcList.add(new GrammarArc(node, logProbability)); } /** Returns the string representation of this object */ @Override public String toString() { return "G" + getID(); } /** * Dumps this GrammarNode as a String. * * @param level the indent level * @param visitedNodes the set of visited nodes * @param logProb the probability of the transition (in logMath log domain) */ private String traverse(int level, Set<GrammarNode> visitedNodes, float logProb) { StringBuilder dump = new StringBuilder(); for (int i = 0; i < level; i++) { dump.append(" "); } dump.append("N(").append(getID()).append("):"); dump.append("p:").append(logProb); if (isFinalNode()) { dump.append(" !"); } Word[][] alternatives = getAlternatives(); for (int i = 0; i < alternatives.length; i++) { for (int j = 0; j < alternatives[i].length; j++) { dump.append(' ').append(alternatives[i][j].getSpelling()); } if (i < alternatives.length - 1) { dump.append('|'); } } System.out.println(dump); // Visit the children nodes if this node has never been visited. if (!isFinalNode() && !(visitedNodes.contains(this))) { visitedNodes.add(this); GrammarArc[] arcs = getSuccessors(); for (GrammarArc arc : arcs) { GrammarNode child = arc.getGrammarNode(); child.traverse(level + 1, visitedNodes, arc.getProbability()); } } else if (isFinalNode()) { // this node has no children, so just add it to the visitedNodes visitedNodes.add(this); } return dump.toString(); } /** * Traverse the grammar and dump out the nodes and arcs in GDL * * @param out print the gdl to this file * @param visitedNodes the set of visited nodes * @throws IOException if an error occurs while writing the file */ private void traverseGDL(PrintWriter out, Set<GrammarNode> visitedNodes) throws IOException { // Visit the children nodes if this node has never been visited. if (!(visitedNodes.contains(this))) { visitedNodes.add(this); out.println(" node: { title: " + getGDLID(this) + " label: " + getGDLLabel(this) + " shape: " + getGDLShape(this) + " color: " + getGDLColor(this) + '}'); GrammarArc[] arcs = getSuccessors(); for (GrammarArc arc : arcs) { GrammarNode child = arc.getGrammarNode(); float prob = arc.getProbability(); out.println(" edge: { source: " + getGDLID(this) + " target: " + getGDLID(child) + " label: \"" + prob + "\"}"); child.traverseGDL(out, visitedNodes); } } } /** * Gvien a node, return a GDL ID for the node * * @param node the node * @return the GDL id */ String getGDLID(GrammarNode node) { return "\"" + node.getID() + '\"'; } /** * Given a node, returns a GDL Label for the node * * @param node the node * @return a gdl label for the node */ String getGDLLabel(GrammarNode node) { String label = node.isEmpty() ? "" : node.getWord().getSpelling(); return '\"' + label + '\"'; } /** * Given a node, returns a GDL shape for the node * * @param node the node * @return a gdl shape for the node */ String getGDLShape(GrammarNode node) { return node.isEmpty() ? "circle" : "box"; } /** * Gets the color for the grammar node * * @param node the node of interest * @return the gdl label for the color */ String getGDLColor(GrammarNode node) { String color = "grey"; if (node.isFinalNode()) { color = "red"; } else if (!node.isEmpty()) { color = "green"; } return color; } /** * Dumps the grammar in GDL form * * @param path the path to write the gdl file to */ public void dumpGDL(String path) { try { PrintWriter out = new PrintWriter(new FileOutputStream(path)); out.println("graph: {"); out.println(" orientation: left_to_right"); out.println(" layout_algorithm: dfs"); traverseGDL(out, new HashSet<GrammarNode>()); out.println("}"); out.close(); } catch (FileNotFoundException fnfe) { System.out.println("Can't write to " + path + ' ' + fnfe); } catch (IOException ioe) { System.out.println("Trouble writing to " + path + ' ' + ioe); } } /** Dumps the grammar */ public void dump() { System.out.println(traverse(0, new HashSet<GrammarNode>(), 1.0f)); } /** * Splits this node into a pair of nodes. The first node in the pair retains the word info, and a single branch to * the new second node. The second node retains all of the original successor branches. * * @param id the id of the new node * @return the newly created second node. */ GrammarNode splitNode(int id) { GrammarNode branchNode = new GrammarNode(id, false); branchNode.arcList = arcList; arcList = new ArrayList<GrammarArc>(); add(branchNode, 0.0f); return branchNode; } public void dumpDot(String path) { try { PrintWriter out = new PrintWriter(new FileOutputStream(path)); out.println("digraph \"" + path + "\" {"); out.println("rankdir = LR\n"); traverseDot(out, new HashSet<GrammarNode>()); out.println("}"); out.close(); } catch (FileNotFoundException fnfe) { System.out.println("Can't write to " + path + ' ' + fnfe); } } private void traverseDot(PrintWriter out, Set<GrammarNode> visitedNodes) { if (!(visitedNodes.contains(this))) { visitedNodes.add(this); out.println("\tnode" + this.getID() + " [ label=" + getGDLLabel(this) + ", color=" + getGDLColor(this) + ", shape=" + getGDLShape(this) + " ]\n"); GrammarArc[] arcs = getSuccessors(); for (GrammarArc arc : arcs) { GrammarNode child = arc.getGrammarNode(); float prob = arc.getProbability(); out.write("\tnode" + this.getID() + " -> node" + child.getID() + " [ label=" + prob + " ]\n"); child.traverseDot(out, visitedNodes); } } } }