/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.flat; import edu.cmu.sphinx.linguist.acoustic.Unit; import edu.cmu.sphinx.linguist.acoustic.UnitManager; import edu.cmu.sphinx.linguist.dictionary.Pronunciation; import edu.cmu.sphinx.linguist.dictionary.Word; import edu.cmu.sphinx.linguist.language.grammar.GrammarArc; import edu.cmu.sphinx.linguist.language.grammar.GrammarNode; import java.util.ArrayList; import java.util.List; /** * Manages a particular point in a grammar. The GrammarPoint is used to manage the look-ahead for generating * right-contexts. Since we haven't built the HMM tree yet, looking ahead can be difficult. The GrammarPoint class * points to a particular unit within a pronunciation/word/grammar. From a particular grammar point, it is possible to * get the set of next grammar points. */ public class GrammarPoint { private GrammarNode node; // the grammar node private int alternativeIndex; // which alternative in the grammar private int wordIndex; // which word in the alternative private int pronunciationIndex; // which pronunciation in the word private int unitIndex; // which unit in the pronunciation private static boolean bounded; /** * Creates a grammar point that points to the given unit of the given pronunciation state. * * @param state the pronunciation of interest */ public GrammarPoint(SentenceHMMState state) { while (state != null) { if (state instanceof UnitState) { unitIndex = state.getWhich(); } else if (state instanceof PronunciationState) { pronunciationIndex = state.getWhich(); } else if (state instanceof WordState) { wordIndex = state.getWhich(); } else if (state instanceof AlternativeState) { alternativeIndex = state.getWhich(); } else if (state instanceof GrammarState) { node = ((GrammarState) state).getGrammarNode(); } state = state.getParent(); } assert node != null; } /** * Creates a grammar node that points to the first unit of the first pronunciation of the first word of the given * grammar node * * @param node the grammar node of interest */ public GrammarPoint(GrammarNode node) { this(node, -1, 0, 0, 0); } /** * Creates a GrammarPoint that corresponds to the given unit of the given pronunciation * * @param state the pronunciation state * @param which the index of the unit */ public GrammarPoint(PronunciationState state, int which) { this(state); unitIndex = which; } /** * Creates a GrammarPoint that points to a fully specified unit * * @param node the grammar node * @param alternativeIndex the index of alternative * @param wordIndex the index of the word in the node * @param pronunciationIndex the index of the pronunciation in the word. * @param unitIndex the index of the unit in the pronunciation */ public GrammarPoint(GrammarNode node, int alternativeIndex, int wordIndex, int pronunciationIndex, int unitIndex) { assert node != null; this.node = node; this.alternativeIndex = alternativeIndex; this.wordIndex = wordIndex; this.pronunciationIndex = pronunciationIndex; this.unitIndex = unitIndex; } /** * Gets the unit associated with this point in the grammar * * @return the unit, or null if there is no unit associated with this point in the grammar */ private Unit getUnit() { Unit unit = null; Word[][] alternatives = node.getAlternatives(); if (alternativeIndex != -1 && alternativeIndex < alternatives.length) { Word[] words = alternatives[alternativeIndex]; if (wordIndex < words.length) { Pronunciation[] pronunciations = words[wordIndex].getPronunciations(); if (pronunciationIndex < pronunciations.length) { Unit[] units = pronunciations[pronunciationIndex].getUnits(); if (unitIndex < units.length) { unit = units[unitIndex]; } } } } return unit; } /** * Gets the unit associated with this point in the grammar. If there is no unit, return filler * * @return the unit for this grammar node or a filler unit */ private Unit getUnitOrFill() { Unit unit = getUnit(); if (unit == null) { unit = UnitManager.SILENCE; } return unit; } /** * Gets all of the right contexts for this grammar point. The contexts returned are guaranteed to be 'size' units * in length, The number of contexts returned depends upon the perplexity of the grammar downstream from this * GrammarPoint * * @param size the size of each context returned * @param startWithCurrent include the current state in the context * @param maxContexts the maxium number of right contexts to return * @return a list of containing Unit[] contexts. */ public List<Unit[]> getRightContexts(int size, boolean startWithCurrent, int maxContexts) { List<Unit[]> contexts = new ArrayList<Unit[]>(); List<GrammarPoint> nextPoints = getNextGrammarPoints(startWithCurrent); if (nextPoints.isEmpty()) { Unit[] units = Unit.getEmptyContext(size); addContext(contexts, units); } else { for (GrammarPoint gp : nextPoints) { if (size == 1) { Unit[] units = new Unit[size]; units[0] = gp.getUnitOrFill(); addContext(contexts, units); } else { List<Unit[]> rc = gp.getRightContexts(size - 1, false, maxContexts - contexts.size()); for (Unit[] rcUnits : rc) { Unit[] units = Unit.getEmptyContext(rcUnits.length + 1); units[0] = gp.getUnitOrFill(); System.arraycopy(rcUnits, 0, units, 1, rcUnits.length); addContext(contexts, units); } } if (contexts.size() >= maxContexts) { break; } } } return contexts; } /** * Add a context to a list of contexts after ensuring that no identical contexts exist on the list. When a right * context is collected it may contain duplicates in certain cases (when this unit is the last unit in a grammar * node, and there is a branch to multiple words in subsequent nodes, for instance) * * @param contexts the list of contexts to add the new units to * @param units the units to add to the context */ private void addContext(List<Unit[]> contexts, Unit[] units) { for (Unit[] onList : contexts) { if (Unit.isContextMatch(onList, units)) { return; // found on list so bailout } } contexts.add(units); } /** * Returns a list of next GrammarPoints for this GrammarPoint. If there are no more downstream grammar points with * words, an empty list is returned. * * @param startWithCurrent include the current state in the context * @return the (possibly empty) list of next GrammarPoint objects */ private List<GrammarPoint> getNextGrammarPoints(boolean startWithCurrent) { List<GrammarPoint> nextPoints = new ArrayList<GrammarPoint>(); int unitsLength = 0; // if this GrammarPoint is associated with a grammar node // and the grannar node has alternatives, add points for each // alternative if (alternativeIndex == -1 && node.getAlternatives().length > 0) { for (int i = 0; i < node.getAlternatives().length; i++) { GrammarPoint gp = new GrammarPoint(node, i, 0, 0, 0); nextPoints.add(gp); } } // If we don't have any alternatives, (i.e. this grammar node // has no words at all associated with it, then just go and // find the set of next grammar nodes with words, collect // them up, expand them and return that set. else if (node.getAlternatives().length == 0) { addNextGrammarPointsWithWords(node, nextPoints); } else { // At this point we are at a node with a set of alternatives GrammarPoint next; if (startWithCurrent) { next = this; } else { next = new GrammarPoint(node, alternativeIndex, wordIndex, pronunciationIndex, unitIndex + 1); } Pronunciation[] pronunciations = node. getAlternatives()[alternativeIndex][wordIndex]. getPronunciations(); unitsLength = pronunciations[pronunciationIndex].getUnits().length; if (next.unitIndex < unitsLength) { nextPoints.add(next); } else { next.unitIndex = 0; Word[] alternative = next.node.getAlternatives()[alternativeIndex]; if (++next.wordIndex < alternative.length) { Word word = alternative[next.wordIndex]; for (int i = 0; i < word.getPronunciations().length; i++) { GrammarPoint newGP = new GrammarPoint(next.node, next.alternativeIndex, next.wordIndex, i, 0); nextPoints.add(newGP); } } else if (!bounded) { addNextGrammarPointsWithWords(next.node, nextPoints); } } } return nextPoints; } /** * Given a GrammarNode return a list of successors GrammarNodes that contain words * * @param node successors are gathered from this node * @return list the list of grammar nodes */ private static List<GrammarNode> getNextGrammarNodesWithWords(GrammarNode node) { List<GrammarNode> list = new ArrayList<GrammarNode>(); for (GrammarArc arc : node.getSuccessors()) { GrammarNode gnode = arc.getGrammarNode(); if (gnode.getAlternatives().length == 0) { if (gnode.isFinalNode()) { list.add(gnode); } else { list.addAll(getNextGrammarNodesWithWords(gnode)); } } else { list.add(gnode); } } return list; } /** * Adds the next set of grammar points that contain words to the given list * * @param node the grammar node * @param nextPoints where the grammar points should be added */ private static void addNextGrammarPointsWithWords(GrammarNode node, List<GrammarPoint> nextPoints) { for (GrammarNode nextNode : getNextGrammarNodesWithWords(node)) { for (int j = 0; j < nextNode.getAlternatives().length; j++) { GrammarPoint gp = new GrammarPoint(nextNode, j, 0, 0, 0); nextPoints.add(gp); } } } /** * Sets the state of the bounded configuration flag * * @param state if true searches for context will not cross grammar nodes. */ static void setBounded(boolean state) { bounded = state; } }