/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.acoustic; import edu.cmu.sphinx.util.TimerPool; import java.util.Iterator; import java.util.Map; import java.util.EnumMap; import java.util.logging.Level; import java.util.logging.Logger; /** * The HMMPool provides the ability to manage units via small integer IDs. Context Independent units and context * dependent units can be converted to an ID. IDs can be used to quickly retrieve a unit or an hmm associated with the * unit. This class operates under the constraint that context sizes are exactly one, which is generally only valid for * large vocabulary tasks. */ public class HMMPool { private AcousticModel model; private Unit[] unitTable; private Map<HMMPosition, HMM[]> hmmTable; private int numCIUnits; private Logger logger; private UnitManager unitManager; protected HMMPool(){ } /** * Constructs a HMMPool object. * * @param model the model to use for the pool * @param logger the logger for messages * @param unitManager manager for units */ public HMMPool(AcousticModel model, Logger logger, UnitManager unitManager) { this.logger = logger; int maxCIUnits = 0; this.model = model; this.unitManager = unitManager; if (model.getLeftContextSize() != 1) throw new Error("LexTreeLinguist: Unsupported left context size"); if (model.getRightContextSize() != 1) throw new Error("LexTreeLinguist: Unsupported right context size"); // count CI units: for (Iterator<Unit> i = model.getContextIndependentUnitIterator(); i.hasNext();) { Unit unit = i.next(); logger.fine("CI unit " + unit); if (unit.getBaseID() > maxCIUnits) { maxCIUnits = unit.getBaseID(); } } numCIUnits = maxCIUnits + 1; unitTable = new Unit[numCIUnits * numCIUnits * numCIUnits]; for (Iterator<HMM> i = model.getHMMIterator(); i.hasNext();) { HMM hmm = i.next(); Unit unit = hmm.getUnit(); int id = getID(unit); unitTable[id] = unit; if (logger.isLoggable(Level.FINER)) { logger.finer("Unit " + unit + " id " + id); } } // build up the hmm table to allow quick access to the hmms hmmTable = new EnumMap<HMMPosition, HMM[]>(HMMPosition.class); for (HMMPosition position : HMMPosition.values()) { HMM[] hmms = new HMM[unitTable.length]; hmmTable.put(position, hmms); for (int j = 1; j < unitTable.length; j++) { Unit unit = unitTable[j]; if (unit == null) { unit = synthesizeUnit(j); } if (unit != null) { hmms[j] = model.lookupNearestHMM(unit, position, false); assert hmms[j] != null; } } } } public AcousticModel getModel() { return model; } /** * Given a unit ID, generate a full context dependent unit that will allow us to look for a suitable hmm * * @param id the unit id * @return a context dependent unit for the ID */ private Unit synthesizeUnit(int id) { int centralID = getCentralUnitID(id); int leftID = getLeftUnitID(id); int rightID = getRightUnitID(id); if (centralID == 0 || leftID == 0 || rightID == 0) { return null; } Unit centralUnit = unitTable[centralID]; Unit leftUnit = unitTable[leftID]; Unit rightUnit = unitTable[rightID]; assert centralUnit != null; assert leftUnit != null; assert rightUnit != null; Unit[] lc = new Unit[1]; Unit[] rc = new Unit[1]; lc[0] = leftUnit; rc[0] = rightUnit; LeftRightContext context = LeftRightContext.get(lc, rc); Unit unit = unitManager.getUnit( centralUnit.getName(), centralUnit.isFiller(), context); if (logger.isLoggable(Level.FINER)) { logger.finer("Missing " + getUnitNameFromID(id) + " returning " + unit); } return unit; } /** * Returns the number of CI units * * @return the number of CI Units */ public int getNumCIUnits() { return numCIUnits; } /** * Gets the unit for the given id * * @param unitID the id for the unit * @return the unit associated with the ID */ public Unit getUnit(int unitID) { return unitTable[unitID]; } /** * Given a unit id and a position, return the HMM associated with the * unit/position. * * @param unitID the id of the unit * @param position the position within the word * @return the hmm associated with the unit/position */ public HMM getHMM(int unitID, HMMPosition position) { return hmmTable.get(position)[unitID]; } /** * given a unit return its ID * * @param unit the unit * @return an ID */ public int getID(Unit unit) { if (unit.isContextDependent()) { LeftRightContext context = (LeftRightContext) unit.getContext(); assert context.getLeftContext().length == 1; assert context.getRightContext().length == 1; return buildID(getSimpleUnitID(unit), getSimpleUnitID(context.getLeftContext()[0]), getSimpleUnitID(context.getRightContext()[0])); } else { return getSimpleUnitID(unit); } } /** * Returns a context independent ID * * @param unit the unit of interest * @return the ID of the central unit (ignoring any context) */ private int getSimpleUnitID(Unit unit) { return unit.getBaseID(); } public boolean isValidID(int unitID) { return unitID >= 0 && unitID < unitTable.length && unitTable[unitID] != null; } /** * Builds an id from the given unit and its left and right unit ids * * @param unitID the id of the central unit * @param leftID the id of the left context unit * @param rightID the id of the right context unit * @return the id for the context dependent unit */ public int buildID(int unitID, int leftID, int rightID) { // special case ... if the unitID is associated with // filler than we have no context ... so use the CI // form if (unitTable[unitID] == null) return -1; int id; if (unitTable[unitID].isFiller()) { id = unitID; } else { id = unitID * (numCIUnits * numCIUnits) + (leftID * numCIUnits) + rightID; } assert id < unitTable.length; return id; } /** * Given a unit id extract the left context unit id * * @param id the unit id * @return the unit id of the left context (0 means no left context) */ private int getLeftUnitID(int id) { return (id / numCIUnits) % numCIUnits; } /** * Given a unit id extract the right context unit id * * @param id the unit id * @return the unit id of the right context (0 means no right context) */ private int getRightUnitID(int id) { return id % numCIUnits; } /** * Given a unit id extract the central unit id * * @param id the unit id * @return the central unit id */ private int getCentralUnitID(int id) { return id / (numCIUnits * numCIUnits); } /** * Given an ID, build up a name for display * * @return the name baed on the ID */ private String getUnitNameFromID(int id) { int centralID = getCentralUnitID(id); int leftID = getLeftUnitID(id); int rightID = getRightUnitID(id); String cs = unitTable[centralID] == null ? "(" + centralID + ')' : unitTable[centralID].toString(); String ls = unitTable[leftID] == null ? ("(" + leftID + ')') : unitTable[leftID].toString(); String rs = unitTable[rightID] == null ? "(" + rightID + ')' : unitTable[rightID].toString(); return cs + '[' + ls + ',' + rs + ']'; } /** * Retrieves an HMM for a unit in context. If there is no direct match, the * nearest match will be used. Note that we are currently only dealing with, * at most, single unit left and right contexts. * * @param base * the base CI unit * @param lc * the left context * @param rc * the right context * @param pos * the position of the base unit within the word * @return the HMM. (This should never return null) */ public HMM getHMM(Unit base, Unit lc, Unit rc, HMMPosition pos) { int id = -1; int bid = getID(base); int lid = getID(lc); int rid = getID(rc); if (!isValidID(bid)) { logger.severe("Bad HMM Unit: " + base.getName()); return null; } if (!isValidID(lid)) { logger.severe("Bad HMM Unit: " + lc.getName()); return null; } if (!isValidID(rid)) { logger.severe("Bad HMM Unit: " + rc.getName()); return null; } id = buildID(bid, lid, rid); if (id < 0) { logger.severe("Unable to build HMM Unit ID for " + base.getName() + " lc=" + lc.getName() + " rc=" + rc.getName()); return null; } HMM hmm = getHMM(id, pos); if (hmm == null) { logger.severe("Missing HMM Unit for " + base.getName() + " lc=" + lc.getName() + " rc=" + rc.getName()); } return hmm; } /** Dumps out info about this pool */ public void dumpInfo() { logger.info("Max CI Units " + numCIUnits); logger.info("Unit table size " + unitTable.length); if (logger.isLoggable(Level.FINER)) { for (int i = 0; i < unitTable.length; i++) { logger.finer(String.valueOf(i) + ' ' + unitTable[i]); } } } /** * A quick and dirty benchmark to get an idea how long the HMM lookups will take. This experiment shows that on a * 1GHZ sparc system, the lookup takes a little less than 1uSec. This is probably fast enough. */ static final HMMPosition[] pos = { HMMPosition.BEGIN, HMMPosition.END, HMMPosition.SINGLE, HMMPosition.INTERNAL}; static final int[] ids = {9206, 9320, 9620, 9865, 14831, 15836}; void benchmark() { int nullCount = 0; System.out.println("benchmarking ..."); TimerPool.getTimer(this,"hmmPoolBenchmark").start(); for (int i = 0; i < 1000000; i++) { int id = ids[i % ids.length]; HMMPosition position = pos[i % pos.length]; HMM hmm = getHMM(id, position); if (hmm == null) { nullCount++; } } TimerPool.getTimer(this,"hmmPoolBenchmark").stop(); System.out.println("null count " + nullCount); } }