/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.trainer; import edu.cmu.sphinx.linguist.acoustic.AcousticModel; import edu.cmu.sphinx.linguist.acoustic.HMM; import edu.cmu.sphinx.linguist.acoustic.HMMPosition; import edu.cmu.sphinx.linguist.acoustic.Unit; import edu.cmu.sphinx.linguist.acoustic.UnitManager; import edu.cmu.sphinx.linguist.acoustic.tiedstate.SenoneHMM; import edu.cmu.sphinx.linguist.dictionary.Dictionary; import edu.cmu.sphinx.linguist.dictionary.Pronunciation; import edu.cmu.sphinx.util.LogMath; /** This class builds an HMM from a transcript, at increasing levels of details. */ public class BuildTranscriptHMM { private Graph wordGraph; private Graph phonemeGraph; private Graph contextDependentPhoneGraph; private Graph hmmGraph; private TrainerDictionary dictionary; private AcousticModel acousticModel; private UnitManager unitManager; /** * Constructor for class BuildTranscriptHMM. When called, this method creates graphs for the transcript at several * levels of detail, subsequently mapping from a word graph to a phone graph, to a state graph. * * @param context this object's context * @param transcript the transcript to be converted to HMM * @param acousticModel the acoustic model to be used * @param unitManager the manager for units */ public BuildTranscriptHMM(String context, Transcript transcript, AcousticModel acousticModel, UnitManager unitManager) { this.acousticModel = acousticModel; this.unitManager = unitManager; wordGraph = buildWordGraph(transcript); assert wordGraph.validate() : "Word graph not validated"; phonemeGraph = buildPhonemeGraph(wordGraph); assert phonemeGraph.validate() : "Phone graph not validated"; contextDependentPhoneGraph = buildContextDependentPhonemeGraph(phonemeGraph); assert contextDependentPhoneGraph.validate() : "Context dependent graph not validated"; hmmGraph = buildHMMGraph(contextDependentPhoneGraph); assert hmmGraph.validate() : "HMM graph not validated"; // hmmGraph.printGraph(); } /** * Returns the graph. * * @return the graph. */ public Graph getGraph() { return hmmGraph; } /* * Build a word graph from this transcript */ private Graph buildWordGraph(Transcript transcript) { Graph graph; Dictionary transcriptDict = transcript.getDictionary(); // Make sure the dictionary is a TrainerDictionary before we cast assert transcriptDict.getClass().getName().endsWith("TrainerDictionary"); dictionary = (TrainerDictionary) transcriptDict; transcript.startWordIterator(); /* Shouldn't node and edge be part of the graph class? */ /* The wordgraph must always begin with the <s> */ graph = new Graph(); Node initialNode = new Node(NodeType.UTTERANCE_BEGIN); graph.addNode(initialNode); graph.setInitialNode(initialNode); if (transcript.isExact()) { Node prevNode = initialNode; for (transcript.startWordIterator(); transcript.hasMoreWords();) { /* create a new node for the next word */ Node wordNode = new Node(NodeType.WORD, transcript.nextWord()); /* Link the new node into the graph */ graph.linkNodes(prevNode, wordNode); prevNode = wordNode; } /* All words are done. Just add the </s> */ Node wordNode = new Node(NodeType.UTTERANCE_END); graph.linkNodes(prevNode, wordNode); graph.setFinalNode(wordNode); } else { /* Begin the utterance with a loopy silence */ Node silLoopBack = new Node(NodeType.SILENCE_WITH_LOOPBACK); graph.linkNodes(initialNode, silLoopBack); // Create links with words from the transcript for (transcript.startWordIterator(); transcript.hasMoreWords();) { String word = transcript.nextWord(); Pronunciation[] pronunciations = dictionary.getWord(word).getPronunciations(); int numberOfPronunciations = pronunciations.length; Node[] pronNode = new Node[numberOfPronunciations]; // Create node at the beginning of the word Node dummyWordBeginNode = new Node(NodeType.DUMMY); // Allow the silence to be skipped // TODO: don't link this, for debugging. // graph.linkNodes(prevNode, dummyWordBeginNode); // Link the latest silence to the dummy too graph.linkNodes(silLoopBack, dummyWordBeginNode); // Add word ending dummy node Node dummyWordEndNode = new Node(NodeType.DUMMY); for (int i = 0; i < numberOfPronunciations; i++) { String wordAlternate = pronunciations[i].getWord().getSpelling(); if (i > 0) { wordAlternate += "(" + i + ')'; } pronNode[i] = new Node(NodeType.WORD, wordAlternate); graph.linkNodes(dummyWordBeginNode, pronNode[i]); graph.linkNodes(pronNode[i], dummyWordEndNode); } /* Add silence */ silLoopBack = new Node(NodeType.SILENCE_WITH_LOOPBACK); graph.linkNodes(dummyWordEndNode, silLoopBack); } Node wordNode = new Node(NodeType.UTTERANCE_END); // Link previous node, a dummy word end node // TODO: disable this link for now. // graph.linkNodes(prevNode, wordNode); // Link also the previous silence node graph.linkNodes(silLoopBack, wordNode); graph.setFinalNode(wordNode); } return graph; } /** Convert word graph to phoneme graph */ private Graph buildPhonemeGraph(Graph wordGraph) { Graph phonemeGraph = new Graph(); phonemeGraph.copyGraph(wordGraph); for (Node node : phonemeGraph.nodeToArray()) { if (node.getType().equals(NodeType.WORD)) { String word = node.getID(); // "false" means graph won't have additional dummy // nodes surrounding the word Graph pronunciationGraph = dictionary.getWordGraph(word, false); phonemeGraph.insertGraph(pronunciationGraph, node); } } return phonemeGraph; } /** * Convert phoneme graph to a context sensitive phoneme graph. This graph expands paths out to have separate phoneme * nodes for phonemes in different contexts. * * @param phonemeGraph the phoneme graph * @return a context dependendent phoneme graph */ public Graph buildContextDependentPhonemeGraph(Graph phonemeGraph) { // TODO: Dummy stub for now - return a copy of the original graph Graph cdGraph = new Graph(); cdGraph.copyGraph(phonemeGraph); return cdGraph; } /** * Convert the phoneme graph to an HMM. * * @param cdGraph a context dependent phoneme graph * @return an HMM graph for a context dependent phoneme graph */ public Graph buildHMMGraph(Graph cdGraph) { Graph hmmGraph = new Graph(); hmmGraph.copyGraph(cdGraph); for (Node node : hmmGraph.nodeToArray()) { Unit unit = null; if (node.getType().equals(NodeType.PHONE)) { unit = unitManager.getUnit(node.getID()); } else if (node.getType().equals(NodeType.SILENCE_WITH_LOOPBACK)) { unit = unitManager.getUnit("SIL"); } else { // if it's not a phone, and it's not silence, it's a // dummy node, and we don't care. continue; } HMM hmm = acousticModel.lookupNearestHMM(unit, HMMPosition.UNDEFINED, false); Graph modelGraph = buildModelGraph((SenoneHMM)hmm); modelGraph.validate(); hmmGraph.insertGraph(modelGraph, node); } return hmmGraph; } /** * Build a graph given an HMM. The graph will not be surrounded by dummy nodes. The number of nodes in the graph is * the number of emitting states in the hmm plus one, to account for a final, non-emitting state. * * @param hmm the HMM * @return the graph */ private Graph buildModelGraph(SenoneHMM hmm) { Graph graph = new Graph(); Node prevNode; Node stateNode = null; float[][] tmat = hmm.getTransitionMatrix(); prevNode = new Node(NodeType.DUMMY); graph.addNode(prevNode); graph.setInitialNode(prevNode); // 'hmm.getOrder() + 1' to account for final, non-emitting state. for (int i = 0; i < hmm.getOrder() + 1; i++) { /* create a new node for the next hmmState */ stateNode = new Node(NodeType.STATE, hmm.getUnit().getName()); stateNode.setObject(hmm.getState(i)); graph.addNode(stateNode); /* Link the new node into the graph */ if (i == 0) { graph.linkNodes(prevNode, stateNode); } for (int j = 0; j <= i; j++) { // System.out.println("TMAT: " + j + " " + i + " " + // tmat[j][i]); if (tmat[j][i] != LogMath.LOG_ZERO) { // 'j + 1' to account for the initial dummy node graph.linkNodes(graph.getNode(j + 1), stateNode); } } prevNode = stateNode; } /* All words are done. Just add the final dummy */ // stateNode = new Node(NodeType.DUMMY); // graph.linkNodes(prevNode, stateNode); graph.setFinalNode(stateNode); return graph; } }