/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.lextree; import edu.cmu.sphinx.linguist.WordSequence; import edu.cmu.sphinx.linguist.acoustic.HMM; import edu.cmu.sphinx.linguist.acoustic.HMMPool; import edu.cmu.sphinx.linguist.acoustic.HMMPosition; import edu.cmu.sphinx.linguist.acoustic.Unit; import edu.cmu.sphinx.linguist.dictionary.Dictionary; import edu.cmu.sphinx.linguist.dictionary.Pronunciation; import edu.cmu.sphinx.linguist.dictionary.Word; import edu.cmu.sphinx.linguist.language.ngram.LanguageModel; import edu.cmu.sphinx.util.LogMath; import edu.cmu.sphinx.util.Utilities; import java.util.*; import java.util.logging.Logger; /** Represents a node in the HMM Tree */ // For large vocabularies we may create millions of these objects, // therefore they are extremely space sensitive. So we want to make // these objects as small as possible. The requirements for these // objects when building the tree of nodes are very different from once // we have built it. When building, we need to easily add successor // nodes and quickly identify duplicate children nodes. After the tree // is built we just need to quickly identify successors. We want the // flexibility of a map to manage successors at startup, but we don't // want the space penalty (at least 5 32 bit fields per map), instead // we'd like an array. To support this dual mode, we manage the // successors in an Object which can either be a Map or a List // depending upon whether the node has been frozen or not. class Node { private static int nodeCount; private static int successorCount; /** * This can be either Map during tree construction or Array after * tree freeze. Conversion to array helps to save memory. */ private Object successors; private float logUnigramProbability; /** * Creates a node * * @param probability the unigram probability for the node */ Node(float probability) { logUnigramProbability = probability; nodeCount++; // if ((nodeCount % 10000) == 0) { // System.out.println("NC " + nodeCount); // } } /** * Returns the unigram probability * * @return the unigram probability */ public float getUnigramProbability() { return logUnigramProbability; } /** * Sets the unigram probability * * @param probability the unigram probability */ public void setUnigramProbability(float probability) { logUnigramProbability = probability; } /** * Given an object get the set of successors for this object * * @param key the object key * @return the node containing the successors */ private Node getSuccessor(Object key) { Map<Object, Node> successors = getSuccessorMap(); return successors.get(key); } /** * Add the child to the set of successors * * @param key the object key * @param child the child to add */ void putSuccessor(Object key, Node child) { Map<Object, Node> successors = getSuccessorMap(); successors.put(key, child); } /** * Gets the successor map for this node * * @return the successor map */ @SuppressWarnings({"unchecked"}) public Map<Object, Node> getSuccessorMap() { if (successors == null) { successors = new HashMap<Object, Node>(4); } assert successors instanceof Map; return (Map<Object, Node>) successors; } /** Freeze the node. Convert the successor map into an array list */ void freeze() { if (successors instanceof Map<?,?>) { Map<Object, Node> map = getSuccessorMap(); successors = map.values().toArray(new Node[map.size()]); for (Node node : map.values()) { node.freeze(); } successorCount += map.size(); } } static void dumpNodeInfo() { System.out.println("Nodes: " + nodeCount + " successors " + successorCount + " avg " + (successorCount / nodeCount)); } /** * Adds a child node holding an hmm to the successor. If a node similar to the child has already been added, we use * the previously added node, otherwise we add this. Also, we record the base unit of the child in the set of right * context * * @param hmm the hmm to add * @return the node that holds the hmm (new or old) */ Node addSuccessor(HMM hmm, float probability) { Node child = null; Node matchingChild = getSuccessor(hmm); if (matchingChild == null) { child = new HMMNode(hmm, probability); putSuccessor(hmm, child); } else { if (matchingChild.getUnigramProbability() < probability) { matchingChild.setUnigramProbability(probability); } child = matchingChild; } return child; } /** * Adds a child node holding a pronunciation to the successor. If a node similar to the child has already been * added, we use the previously added node, otherwise we add this. Also, we record the base unit of the child in the * set of right context * * @param pronunciation the pronunciation to add * @param wordNodeMap * @return the node that holds the pronunciation (new or old) */ WordNode addSuccessor(Pronunciation pronunciation, float probability, Map<Pronunciation, WordNode> wordNodeMap) { WordNode child = null; WordNode matchingChild = (WordNode) getSuccessor(pronunciation); if (matchingChild == null) { child = wordNodeMap.get(pronunciation); if (child == null) { child = new WordNode(pronunciation, probability); wordNodeMap.put(pronunciation, child); } putSuccessor(pronunciation, child); } else { if (matchingChild.getUnigramProbability() < probability) { matchingChild.setUnigramProbability(probability); } child = matchingChild; } return child; } void addSuccessor(WordNode wordNode) { putSuccessor(wordNode, wordNode); } /** * Adds an EndNode to the set of successors for this node If a node similar to the child has already been added, we * use the previously added node, otherwise we add this. * * @param child the endNode to add * @param probability probability for this transition * @return the node that holds the endNode (new or old) */ EndNode addSuccessor(EndNode child, float probability) { Unit baseUnit = child.getBaseUnit(); EndNode matchingChild = (EndNode) getSuccessor(baseUnit); if (matchingChild == null) { putSuccessor(baseUnit, child); } else { if (matchingChild.getUnigramProbability() < probability) { matchingChild.setUnigramProbability(probability); } child = matchingChild; } return child; } /** * Adds a child node to the successor. If a node similar to the child has already been added, we use the previously * added node, otherwise we add this. Also, we record the base unit of the child in the set of right context * * @param child the child to add * @return the node (may be different than child if there was already a node attached holding the hmm held by * child) */ UnitNode addSuccessor(UnitNode child) { UnitNode matchingChild = (UnitNode) getSuccessor(child.getKey()); if (matchingChild == null) { putSuccessor(child.getKey(), child); } else { child = matchingChild; } return child; } /** * Returns the successors for this node * * @return the set of successor nodes */ Node[] getSuccessors() { if (successors instanceof Map<?, ?>) { freeze(); } return (Node[])successors; } /** * Returns the string representation for this object * * @return the string representation of the object */ @Override public String toString() { return "Node "; } } /** A node representing a word in the HMM tree */ class WordNode extends Node { private final Pronunciation pronunciation; private final boolean isFinal; /** * Creates a word node * * @param pronunciation the pronunciation to wrap in this node * @param probability the word unigram probability */ WordNode(Pronunciation pronunciation, float probability) { super(probability); this.pronunciation = pronunciation; this.isFinal = pronunciation.getWord().isSentenceEndWord(); } /** * Gets the word associated with this node * * @return the word */ Word getWord() { return pronunciation.getWord(); } /** * Gets the pronunciation associated with this node * * @return the pronunciation */ Pronunciation getPronunciation() { return pronunciation; } /** * Gets the last unit for this word * * @return the last unit */ Unit getLastUnit() { Unit[] units = pronunciation.getUnits(); return units[units.length - 1]; } /** * Returns the successors for this node * * @return the set of successor nodes */ @Override Node[] getSuccessors() { throw new Error("Not supported"); } /** * Returns a string representation for this object * * @return a string representation */ @Override public String toString() { return "WordNode " + pronunciation + " p " + getUnigramProbability(); } public boolean isFinal() { return isFinal; } } /** * A class that represents the initial word in the search space. It is treated specially because we need to keep track * of the context as well. The context is embodied in the parent node */ class InitialWordNode extends WordNode { final HMMNode parent; /** * Creates an InitialWordNode * * @param pronunciation the pronunciation * @param parent the parent node */ InitialWordNode(Pronunciation pronunciation, HMMNode parent) { super(pronunciation, LogMath.LOG_ONE); this.parent = parent; } /** * Gets the parent for this word node * * @return the parent */ HMMNode getParent() { return parent; } } abstract class UnitNode extends Node { public final static int SIMPLE_UNIT = 1; public final static int WORD_BEGINNING_UNIT = 2; public final static int SILENCE_UNIT = 3; public final static int FILLER_UNIT = 4; private int type; /** * Creates the UnitNode * * @param probablilty the probability for the node */ UnitNode(float probablilty) { super(probablilty); } /** * Returns the base unit for this hmm node * * @return the base unit */ abstract Unit getBaseUnit(); abstract Object getKey(); abstract HMMPosition getPosition(); /** * Gets the unit type (one of SIMPLE_UNIT, WORD_BEGINNING_UNIT, SIMPLE_UNIT or FILLER_UNIT * * @return the unit type */ int getType() { return type; } /** * Sets the unit type * * @param type the unit type */ void setType(int type) { this.type = type; } } /** A node that represents an HMM in the hmm tree */ class HMMNode extends UnitNode { private final HMM hmm; // There can potentially be a large number of nodes (millions), // therefore it is important to conserve space as much as // possible. While building the HMMNodes, we keep right contexts // in a set to allow easy pruning of duplicates. Once the tree is // entirely built, we no longer need to manage the right contexts // as a set, a simple array will do. The freeze method converts // the set to the array of units. This rcSet object holds the set // during construction and the array after the freeze. private Object rcSet; /** * Creates the node, wrapping the given hmm * * @param hmm the hmm to hold */ HMMNode(HMM hmm, float probablilty) { super(probablilty); this.hmm = hmm; Unit base = getBaseUnit(); int type = SIMPLE_UNIT; if (base.isSilence()) { type = SILENCE_UNIT; } else if (base.isFiller()) { type = FILLER_UNIT; } else if (hmm.getPosition().isWordBeginning()) { type = WORD_BEGINNING_UNIT; } setType(type); } /** * Returns the base unit for this hmm node * * @return the base unit */ @Override Unit getBaseUnit() { // return hmm.getUnit().getBaseUnit(); return hmm.getBaseUnit(); } /** * Returns the hmm for this node * * @return the hmm */ HMM getHMM() { return hmm; } @Override HMMPosition getPosition() { return hmm.getPosition(); } @Override HMM getKey() { return getHMM(); } /** * Returns a string representation for this object * * @return a string representation */ @Override public String toString() { return "HMMNode " + hmm + " p " + getUnigramProbability(); } /** * Adds a right context to the set of possible right contexts for this node. This is typically only needed for hmms * at the ends of words. * * @param rc the right context. */ void addRC(Unit rc) { getRCSet().add(rc); } /** Freeze this node. Convert the set into an array to reduce memory overhead */ @Override @SuppressWarnings({"unchecked"}) void freeze() { super.freeze(); if (rcSet instanceof Set) { Set<Unit> set = (Set<Unit>) rcSet; rcSet = set.toArray(new Unit[set.size()]); } } /** * Gets the rc as a set. If we've already been frozen it is an error * * @return the set of right contexts */ @SuppressWarnings({"unchecked"}) private Set<Unit> getRCSet() { if (rcSet == null) { rcSet = new HashSet<Unit>(); } assert rcSet instanceof HashSet; return (Set<Unit>) rcSet; } /** * returns the set of right contexts for this node * * @return the set of right contexts */ Unit[] getRC() { if (rcSet instanceof HashSet<?>) { freeze(); } return (Unit[]) rcSet; } } class EndNode extends UnitNode { final Unit baseUnit; final Unit leftContext; final Integer key; /** * Creates the node, wrapping the given hmm * * @param baseUnit the base unit for this node * @param lc the left context * @param probablilty the probability for the transition to this node */ EndNode(Unit baseUnit, Unit lc, float probablilty) { super(probablilty); this.baseUnit = baseUnit; this.leftContext = lc; key = baseUnit.getBaseID() * 121 + leftContext.getBaseID(); } /** * Returns the base unit for this hmm node * * @return the base unit */ @Override Unit getBaseUnit() { return baseUnit; } /** * Returns the base unit for this hmm node * * @return the base unit */ Unit getLeftContext() { return leftContext; } @Override Integer getKey() { return key; } @Override HMMPosition getPosition() { return HMMPosition.END; } /** * Returns a string representation for this object * * @return a string representation */ @Override public String toString() { return "EndNode base:" + baseUnit + " lc " + leftContext + ' ' + key; } /** Freeze this node. Convert the set into an array to reduce memory overhead */ @Override void freeze() { super.freeze(); } } /** * Represents the vocabulary as a lex tree with nodes in the tree representing either words (WordNode) or units * (HMMNode). HMMNodes may be shared. */ class HMMTree { private final HMMPool hmmPool; private InitialWordNode initialNode; private Dictionary dictionary; private LanguageModel lm; private final boolean addFillerWords; private final boolean addSilenceWord = true; private final Set<Unit> entryPoints = new HashSet<Unit>(); private Set<Unit> exitPoints = new HashSet<Unit>(); private Set<Word> allWords; private EntryPointTable entryPointTable; private boolean debug; private final float languageWeight; private final Map<Object, HMMNode[]> endNodeMap; private final Map<Pronunciation, WordNode> wordNodeMap; private WordNode sentenceEndWordNode; private Logger logger; /** * Creates the HMMTree * * @param pool the pool of HMMs and units * @param dictionary the dictionary containing the pronunciations * @param lm the source of the set of words to add to the lex tree * @param addFillerWords if <code>false</code> add filler words * @param languageWeight the languageWeight */ HMMTree(HMMPool pool, Dictionary dictionary, LanguageModel lm, boolean addFillerWords, float languageWeight) { this.hmmPool = pool; this.dictionary = dictionary; this.lm = lm; this.endNodeMap = new HashMap<Object, HMMNode[]>(); this.wordNodeMap = new HashMap<Pronunciation, WordNode>(); this.addFillerWords = addFillerWords; this.languageWeight = languageWeight; logger = Logger.getLogger(HMMTree.class.getSimpleName()); compile(); } /** * Given a base unit and a left context, return the set of entry points into the lex tree * * @param lc the left context * @param base the center unit * @return the set of entry points */ public Node[] getEntryPoint(Unit lc, Unit base) { EntryPoint ep = entryPointTable.getEntryPoint(base); return ep.getEntryPointsFromLeftContext(lc).getSuccessors(); } /** * Gets the set of hmm nodes associated with the given end node * * @param endNode the end node * @return an array of associated hmm nodes */ public HMMNode[] getHMMNodes(EndNode endNode) { HMMNode[] results = endNodeMap.get(endNode.getKey()); if (results == null) { // System.out.println("Filling cache for " + endNode.getKey() // + " size " + endNodeMap.size()); Map<HMM, HMMNode> resultMap = new HashMap<HMM, HMMNode>(); Unit baseUnit = endNode.getBaseUnit(); Unit lc = endNode.getLeftContext(); for (Unit rc : entryPoints) { HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.END); HMMNode hmmNode = resultMap.get(hmm); if (hmmNode == null) { hmmNode = new HMMNode(hmm, LogMath.LOG_ONE); resultMap.put(hmm, hmmNode); } hmmNode.addRC(rc); for (Node node : endNode.getSuccessors()) { WordNode wordNode = (WordNode)node; hmmNode.addSuccessor(wordNode); } } // cache it results = resultMap.values().toArray(new HMMNode[resultMap.size()]); endNodeMap.put(endNode.getKey(), results); } // System.out.println("GHN: " + endNode + " " + results.length); return results; } /** * Returns the word node associated with the sentence end word * * @return the sentence end word node */ public WordNode getSentenceEndWordNode() { assert sentenceEndWordNode != null; return sentenceEndWordNode; } // private Object getKey(EndNode endNode) { // Unit base = endNode.getBaseUnit(); // Unit lc = endNode.getLeftContext(); // return null; // } /** Compiles the vocabulary into an HMM Tree */ private void compile() { collectEntryAndExitUnits(); entryPointTable = new EntryPointTable(entryPoints); addWords(); entryPointTable.createEntryPointMaps(); freeze(); } /** Dumps the tree */ void dumpTree() { System.out.println("Dumping Tree ..."); Map<Node, Node> dupNode = new HashMap<Node, Node>(); dumpTree(0, getInitialNode(), dupNode); System.out.println("... done Dumping Tree"); } /** * Dumps the tree * * @param level the level of the dump * @param node the root of the tree to dump * @param dupNode map of visited nodes */ private void dumpTree(int level, Node node, Map<Node, Node> dupNode) { if (dupNode.get(node) == null) { dupNode.put(node, node); System.out.println(Utilities.pad(level) + node); if (!(node instanceof WordNode)) { for (Node nextNode : node.getSuccessors()) { dumpTree(level + 1, nextNode, dupNode); } } } } /** Collects all of the entry and exit points for the vocabulary. */ private void collectEntryAndExitUnits() { Collection<Word> words = getAllWords(); for (Word word : words) { for (int j = 0; j < word.getPronunciations().length; j++) { Pronunciation p = word.getPronunciations()[j]; Unit first = p.getUnits()[0]; Unit last = p.getUnits()[p.getUnits().length - 1]; entryPoints.add(first); exitPoints.add(last); } } if (debug) { System.out.println("Entry Points: " + entryPoints.size()); System.out.println("Exit Points: " + exitPoints.size()); } } /** * Called after the lex tree is built. Frees all temporary structures. After this is called, no more words can be * added to the lex tree. */ private void freeze() { entryPointTable.freeze(); dictionary = null; lm = null; exitPoints = null; allWords = null; wordNodeMap.clear(); endNodeMap.clear(); } /** Adds the given collection of words to the lex tree */ private void addWords() { Set<Word> words = getAllWords(); for (Word word : words) { addWord(word); } } /** * Adds a single word to the lex tree * * @param word the word to add */ private void addWord(Word word) { float prob = getWordUnigramProbability(word); Pronunciation[] pronunciations = word.getPronunciations(); for (Pronunciation pronunciation : pronunciations) { addPronunciation(pronunciation, prob); } } /** * Adds the given pronunciation to the lex tree * * @param pronunciation the pronunciation * @param probability the unigram probability */ private void addPronunciation(Pronunciation pronunciation, float probability) { Unit baseUnit; Unit lc; Unit rc; Node curNode; WordNode wordNode; Unit[] units = pronunciation.getUnits(); baseUnit = units[0]; EntryPoint ep = entryPointTable.getEntryPoint(baseUnit); ep.addProbability(probability); if (units.length > 1) { curNode = ep.getNode(); lc = baseUnit; for (int i = 1; i < units.length - 1; i++) { baseUnit = units[i]; rc = units[i + 1]; HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.INTERNAL); if (hmm == null) { logger.severe("Missing HMM for unit " + baseUnit.getName() + " with lc=" + lc.getName() + " rc=" + rc.getName()); } else { curNode = curNode.addSuccessor(hmm, probability); } lc = baseUnit; // next lc is this baseUnit } // now add the last unit as an end unit baseUnit = units[units.length - 1]; EndNode endNode = new EndNode(baseUnit, lc, probability); curNode = curNode.addSuccessor(endNode, probability); wordNode = curNode.addSuccessor(pronunciation, probability, wordNodeMap); if (wordNode.getWord().isSentenceEndWord()) { sentenceEndWordNode = wordNode; } } else { ep.addSingleUnitWord(pronunciation); } } /** * Gets the unigram probability for the given word * * @param word the word * @return the unigram probability for the word. */ private float getWordUnigramProbability(Word word) { float prob = LogMath.LOG_ONE; if (!word.isFiller()) { Word[] wordArray = new Word[1]; wordArray[0] = word; prob = lm.getProbability((new WordSequence(wordArray))); // System.out.println("gwup: " + word + " " + prob); prob *= languageWeight; } return prob; } /** * Returns the entire set of words, including filler words * * @return the set of all words (as Word objects) */ private Set<Word> getAllWords() { if (allWords == null) { allWords = new HashSet<Word>(); for (String spelling : lm.getVocabulary()) { Word word = dictionary.getWord(spelling); if (word != null) { allWords.add(word); } } if (addFillerWords) { allWords.addAll(Arrays.asList(dictionary.getFillerWords())); } else if (addSilenceWord) { allWords.add(dictionary.getSilenceWord()); } } return allWords; } /** * Returns the initial node for this lex tree * * @return the initial lex node */ InitialWordNode getInitialNode() { return initialNode; } /** The EntryPoint table is used to manage the set of entry points into the lex tree. */ class EntryPointTable { private final Map<Unit, EntryPoint> entryPoints; /** * Create the entry point table give the set of all possible entry point units * * @param entryPointCollection the set of possible entry points */ EntryPointTable(Collection<Unit> entryPointCollection) { entryPoints = new HashMap<Unit, EntryPoint>(); for (Unit unit : entryPointCollection) { entryPoints.put(unit, new EntryPoint(unit)); } } /** * Given a CI unit, return the EntryPoint object that manages the entry point for the unit * * @param baseUnit the unit of interest (A ci unit) * @return the object that manages the entry point for the unit */ EntryPoint getEntryPoint(Unit baseUnit) { return entryPoints.get(baseUnit); } /** Creates the entry point maps for all entry points. */ void createEntryPointMaps() { for (EntryPoint ep : entryPoints.values()) { ep.createEntryPointMap(); } } /** Freezes the entry point table */ void freeze() { for (EntryPoint ep : entryPoints.values()) { ep.freeze(); } } /** Dumps the entry point table */ void dump() { for (EntryPoint ep : entryPoints.values()) { ep.dump(); } } } /** Manages a single entry point. */ class EntryPoint { final Unit baseUnit; final Node baseNode; // second units and beyond start here final Map<Unit, Node> unitToEntryPointMap; List<Pronunciation> singleUnitWords; int nodeCount; Set<Unit> rcSet; float totalProbability; /** * Creates an entry point for the given unit * * @param baseUnit the EntryPoint is created for this unit */ EntryPoint(Unit baseUnit) { this.baseUnit = baseUnit; this.baseNode = new Node(LogMath.LOG_ZERO); this.unitToEntryPointMap = new HashMap<Unit, Node>(); this.singleUnitWords = new ArrayList<Pronunciation>(); this.totalProbability = LogMath.LOG_ZERO; } /** * Given a left context get a node that represents a single set of entry points into this unit * * @param leftContext the left context of interest * @return the node representing the entry point */ Node getEntryPointsFromLeftContext(Unit leftContext) { return unitToEntryPointMap.get(leftContext); } /** * Accumulates the probability for this entry point * * @param probability a new probability */ void addProbability(float probability) { if (probability > totalProbability) { totalProbability = probability; } } /** * Returns the probability for all words reachable from this node * * @return the log probability */ float getProbability() { return totalProbability; } /** Once we have built the full entry point we can eliminate some fields */ void freeze() { for (Node node : unitToEntryPointMap.values()) { node.freeze(); } singleUnitWords = null; rcSet = null; } /** * Gets the base node for this entry point * * @return the base node */ Node getNode() { return baseNode; } /** * Adds a one-unit word to this entry point. Such single unit words need to be dealt with specially. * * @param p the pronunciation of the single unit word */ void addSingleUnitWord(Pronunciation p) { singleUnitWords.add(p); } /** * Gets the set of possible right contexts that we can transition to from this entry point * * @return the set of possible transition points. */ private Collection<Unit> getEntryPointRC() { if (rcSet == null) { rcSet = new HashSet<Unit>(); for (Node node : baseNode.getSuccessorMap().values()) { UnitNode unitNode = (UnitNode) node; rcSet.add(unitNode.getBaseUnit()); } } return rcSet; } /** * A version of createEntryPointMap that compresses common hmms across all entry points. */ void createEntryPointMap() { HashMap<HMM, Node> map = new HashMap<HMM, Node>(); HashMap<HMM, HMMNode> singleUnitMap = new HashMap<HMM, HMMNode>(); for (Unit lc : exitPoints) { Node epNode = new Node(LogMath.LOG_ZERO); for (Unit rc : getEntryPointRC()) { HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.BEGIN); Node addedNode; if ((addedNode = map.get(hmm)) == null) { addedNode = epNode.addSuccessor(hmm, getProbability()); map.put(hmm, addedNode); } else { epNode.putSuccessor(hmm, addedNode); } nodeCount++; connectEntryPointNode(addedNode, rc); } connectSingleUnitWords(lc, epNode, singleUnitMap); unitToEntryPointMap.put(lc, epNode); } } /** * Connects the single unit words associated with this entry point. The singleUnitWords list contains all * single unit pronunciations that have as their sole unit, the unit associated with this entry point. Entry * points for these words are added to the epNode for all possible left (exit) and right (entry) contexts. * * @param lc the left context * @param epNode the entry point node */ private void connectSingleUnitWords(Unit lc, Node epNode, HashMap<HMM, HMMNode> map) { if (!singleUnitWords.isEmpty()) { for (Unit rc : entryPoints) { HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.SINGLE); HMMNode tailNode; if (( tailNode = map.get(hmm)) == null) { tailNode = (HMMNode) epNode.addSuccessor(hmm, getProbability()); map.put(hmm, tailNode); } else { epNode.putSuccessor(hmm, tailNode); } WordNode wordNode; tailNode.addRC(rc); nodeCount++; for (Pronunciation p : singleUnitWords) { if (p.getWord() == dictionary.getSentenceStartWord()) { initialNode = new InitialWordNode(p, tailNode); } else { float prob = getWordUnigramProbability(p.getWord()); wordNode = tailNode.addSuccessor(p, prob, wordNodeMap); if (p.getWord() == dictionary.getSentenceEndWord()) { sentenceEndWordNode = wordNode; } } nodeCount++; } } } } /** * Connect the entry points that match the given rc to the given epNode * * @param epNode add matching successors here * @param rc the next unit */ private void connectEntryPointNode(Node epNode, Unit rc) { for (Node node : baseNode.getSuccessors()) { UnitNode successor = (UnitNode) node; if (successor.getBaseUnit() == rc) { epNode.addSuccessor(successor); } } } /** Dumps the entry point */ void dump() { System.out.println("EntryPoint " + baseUnit + " RC Followers: " + getEntryPointRC().size()); int count = 0; Collection<Unit> rcs = getEntryPointRC(); System.out.print(" "); for (Unit rc : rcs) { System.out.print(Utilities.pad(rc.getName(), 4)); if (count++ >= 12) { count = 0; System.out.println(); System.out.print(" "); } } System.out.println(); } } }