/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.linguist.acoustic.tiedstate;
// Placeholder for a package import
import edu.cmu.sphinx.linguist.acoustic.*;
import edu.cmu.sphinx.util.props.*;
import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Loads a tied-state acoustic model generated by the Sphinx-3 trainer.
* <p>
* It is not the goal of this documentation to provide an explanation about the concept of HMMs. The explanation below
* is superficial, and provided only in a way that the files in the acoustic model package make sense.
* <p>
* An HMM models a process using a sequence of states. Associated with each state, there is a probability density
* function. A popular choice for this function is a Gaussian mixture, that is, a summation of Gaussians. As you may
* recall, a single Gaussian is defined by a mean and a variance, or, in the case of a multidimensional Gaussian, by a
* mean vector and a covariance matrix, or, under some simplifying assumptions, a variance vector. The "means" and
* "variances" files in the "continuous" directory contain exactly this: a table in which each line contains a mean
* vector or a variance vector respectively. The dimension of these vectors is the same as the incoming data, the
* encoded speech signal. The Gaussian mixture is a summation of Gaussians, with different weights for different
* Gaussians. The "mixture_weights" file contains this: each line contains the weights for a combination of Gaussians.
* <p>
* The HMM is a model with a set of states. The transitions between states have an associated probability. These
* probabilities make up the transition matrices stored in the "transition_matrices" file.
* <p>
* The files in the "continuous" directory are, therefore, tables, or pools, of means, variances, mixture weights, and
* transition probabilities.
* <p>
* The dictionary is a file that maps words to their phonetic transcriptions, that is, it maps words to sequences of
* phonemes.
* <p>
* The language model contains information about probabilities of words in a language. These probabilities could be for
* individual words or for sequences of two or three words.
* <p>
* The model definition file in a way ties everything together. If the recognition system models phonemes, there is an
* HMM for each phoneme. The model definition file has one line for each phoneme. The phoneme could be in a context
* dependent or independent. Each line, therefore, identifies a unique HMM. This line has the phoneme identification,
* the non-required left or right context, the index of a transition matrix, and, for each state, the index of a mean
* vector, a variance vector, and a set of mixture weights.
*/
public class TiedStateAcousticModel implements AcousticModel {
/** The property that defines the component used to load the acoustic model */
@S4Component(type = Loader.class)
public final static String PROP_LOADER = "loader";
/** The property that defines the unit manager */
@S4Component(type = UnitManager.class)
public final static String PROP_UNIT_MANAGER = "unitManager";
/** Controls whether we generate composites or CI units when no context is given during a lookup. */
@S4Boolean(defaultValue = true)
public final static String PROP_USE_COMPOSITES = "useComposites";
// -----------------------------
// Configured variables
// -----------------------------
protected String name;
protected Logger logger;
protected Loader loader;
protected UnitManager unitManager;
private boolean useComposites;
private Properties properties;
// ----------------------------
// internal variables
// -----------------------------
final transient private Map<String, SenoneSequence> compositeSenoneSequenceCache = new HashMap<String, SenoneSequence>();
private boolean allocated;
public TiedStateAcousticModel( Loader loader, UnitManager unitManager, boolean useComposites) {
this.loader = loader;
this.unitManager = unitManager;
this.useComposites = useComposites;
this.logger = Logger.getLogger(getClass().getName());
}
public TiedStateAcousticModel() {
}
public void newProperties(PropertySheet ps) throws PropertyException {
loader = (Loader) ps.getComponent(PROP_LOADER);
unitManager = (UnitManager) ps.getComponent(PROP_UNIT_MANAGER);
useComposites = ps.getBoolean(PROP_USE_COMPOSITES);
logger = ps.getLogger();
}
/**
* initialize this acoustic model with the given name and context.
*
* @throws IOException if the model could not be loaded
*/
public void allocate() throws IOException {
if (!allocated) {
loader.load();
logInfo();
allocated = true;
}
}
/* (non-Javadoc)
* @see edu.cmu.sphinx.linguist.acoustic.AcousticModel#deallocate()
*/
public void deallocate() {
}
/**
* Returns the name of this AcousticModel, or null if it has no name.
*
* @return the name of this AcousticModel, or null if it has no name
*/
public String getName() {
return name;
}
/**
* Gets a composite HMM for the given unit and context
*
* @param unit the unit for the hmm
* @param position the position of the unit within the word
* @return a composite HMM
*/
private HMM getCompositeHMM(Unit unit, HMMPosition position) {
Unit ciUnit = unitManager.getUnit(unit.getName(), unit.isFiller(),
Context.EMPTY_CONTEXT);
SenoneSequence compositeSequence = getCompositeSenoneSequence(unit,
position);
SenoneHMM contextIndependentHMM = (SenoneHMM) lookupNearestHMM(ciUnit,
HMMPosition.UNDEFINED, true);
float[][] tmat = contextIndependentHMM.getTransitionMatrix();
return new SenoneHMM(unit, compositeSequence, tmat, position);
}
/**
* Given a unit, returns the HMM that best matches the given unit. If exactMatch is false and an exact match is not
* found, then different word positions are used. If any of the contexts are non-silence filler units. a silence
* filler unit is tried instead.
*
* @param unit the unit of interest
* @param position the position of the unit of interest
* @param exactMatch if true, only an exact match is acceptable.
* @return the HMM that best matches, or null if no match could be found.
*/
public HMM lookupNearestHMM(Unit unit, HMMPosition position,
boolean exactMatch) {
if (exactMatch)
return lookupHMM(unit, position);
HMMManager mgr = loader.getHMMManager();
HMM hmm = mgr.get(position, unit);
if (hmm != null) {
return hmm;
}
// no match, try a composite
if (useComposites && hmm == null) {
if (isComposite(unit)) {
hmm = getCompositeHMM(unit, position);
if (hmm != null) {
mgr.put(hmm);
}
}
}
// no match, try at other positions
if (hmm == null) {
hmm = getHMMAtAnyPosition(unit);
}
// still no match, try different filler
if (hmm == null) {
hmm = getHMMInSilenceContext(unit, position);
}
// still no match, backoff to base phone
if (hmm == null) {
Unit ciUnit = lookupUnit(unit.getName());
assert unit.isContextDependent();
if (ciUnit == null) {
logger.severe("Can't find HMM for " + unit.getName());
}
assert ciUnit != null;
assert !ciUnit.isContextDependent();
hmm = mgr.get(HMMPosition.UNDEFINED, ciUnit);
}
assert hmm != null;
// System.out.println("PROX match for "
// + unit + " at " + position + ":" + hmm);
return hmm;
}
/**
* Determines if a unit is a composite unit
*
* @param unit the unit to test
* @return true if the unit is missing a right context
*/
private boolean isComposite(Unit unit) {
if (unit.isFiller()) {
return false;
}
Context context = unit.getContext();
if (context instanceof LeftRightContext) {
LeftRightContext lrContext = (LeftRightContext) context;
if (lrContext.getRightContext() == null) {
return true;
}
if (lrContext.getLeftContext() == null) {
return true;
}
}
return false;
}
/**
* Looks up the context independent unit given the name
*
* @param name the name of the unit
* @return the unit or null if the unit was not found
*/
private Unit lookupUnit(String name) {
return loader.getContextIndependentUnits().get(name);
}
/**
* Returns an iterator that can be used to iterate through all the HMMs of the acoustic model
*
* @return an iterator that can be used to iterate through all HMMs in the model. The iterator returns objects of
* type <code>HMM</code>.
*/
public Iterator<HMM> getHMMIterator() {
return loader.getHMMManager().iterator();
}
/**
* Returns an iterator that can be used to iterate through all the CI units in the acoustic model
*
* @return an iterator that can be used to iterate through all CI units. The iterator returns objects of type
* <code>Unit</code>
*/
public Iterator<Unit> getContextIndependentUnitIterator() {
return loader.getContextIndependentUnits().values().iterator();
}
/**
* Get a composite senone sequence given the unit.
*
* The unit should have a LeftRightContext, where one or two of 'left' or
* 'right' may be null to indicate that the match should succeed on any
* context.
*
* @param unit the unit
* @param position position in HMM
* @return senone sequence
*/
public SenoneSequence getCompositeSenoneSequence(Unit unit,
HMMPosition position)
{
String unitStr = unit.toString();
SenoneSequence compositeSenoneSequence;
compositeSenoneSequence = compositeSenoneSequenceCache.get(unitStr);
if (logger.isLoggable(Level.FINE))
logger.fine("getCompositeSenoneSequence: "
+ unit +
compositeSenoneSequence == null ? "" : "Cached");
if (compositeSenoneSequence != null)
return compositeSenoneSequence;
// Iterate through all HMMs looking for
// a) An hmm with a unit that has the proper base
// b) matches the non-null context
Context context = unit.getContext();
List<SenoneSequence> senoneSequenceList;
senoneSequenceList = new ArrayList<SenoneSequence>();
// collect all senone sequences that match the pattern
for (Iterator<HMM> i = getHMMIterator(); i.hasNext();) {
SenoneHMM hmm = (SenoneHMM) i.next();
if (hmm.getPosition() == position) {
Unit hmmUnit = hmm.getUnit();
if (hmmUnit.isPartialMatch(unit.getName(), context)) {
if (logger.isLoggable(Level.FINE)) {
logger.fine("collected: " + hmm.getUnit());
}
senoneSequenceList.add(hmm.getSenoneSequence());
}
}
}
// couldn't find any matches, so at least include the CI unit
if (senoneSequenceList.isEmpty()) {
Unit ciUnit = unitManager.getUnit(unit.getName(), unit.isFiller());
SenoneHMM baseHMM = lookupHMM(ciUnit, HMMPosition.UNDEFINED);
senoneSequenceList.add(baseHMM.getSenoneSequence());
}
// Add this point we have all of the senone sequences that
// match the base/context pattern collected into the list.
// Next we build a CompositeSenone consisting of all of the
// senones in each position of the list.
// First find the longest senone sequence
int longestSequence = 0;
for (SenoneSequence ss : senoneSequenceList) {
if (ss.getSenones().length > longestSequence) {
longestSequence = ss.getSenones().length;
}
}
// now collect all of the senones at each position into
// arrays so we can create CompositeSenones from them
// QUESTION: is is possible to have different size senone
// sequences. For now lets assume the worst case.
List<CompositeSenone> compositeSenones = new ArrayList<CompositeSenone>();
float logWeight = 0.0f;
for (int i = 0; i < longestSequence; i++) {
Set<Senone> compositeSenoneSet = new HashSet<Senone>();
for (SenoneSequence senoneSequence : senoneSequenceList) {
if (i < senoneSequence.getSenones().length) {
Senone senone = senoneSequence.getSenones()[i];
compositeSenoneSet.add(senone);
}
}
compositeSenones.add(CompositeSenone.create(
compositeSenoneSet, logWeight));
}
compositeSenoneSequence = SenoneSequence.create(compositeSenones);
compositeSenoneSequenceCache.put(unit.toString(),
compositeSenoneSequence);
if (logger.isLoggable(Level.FINE)) {
logger.fine(unit + " consists of " + compositeSenones.size() + " composite senones");
if (logger.isLoggable(Level.FINEST)) {
compositeSenoneSequence.dump("am");
}
}
return compositeSenoneSequence;
}
/**
* Returns the size of the left context for context dependent units
*
* @return the left context size
*/
public int getLeftContextSize() {
return loader.getLeftContextSize();
}
/**
* Returns the size of the right context for context dependent units
*
* @return the left context size
*/
public int getRightContextSize() {
return loader.getRightContextSize();
}
/**
* Given a unit, returns the HMM that exactly matches the given unit.
*
* @param unit the unit of interest
* @param position the position of the unit of interest
* @return the HMM that exactly matches, or null if no match could be found.
*/
private SenoneHMM lookupHMM(Unit unit, HMMPosition position) {
return (SenoneHMM) loader.getHMMManager().get(position, unit);
}
public Senone getSenone(long id) {
return loader.getSenonePool().get((int)id);
}
/** Dumps information about this model to the logger */
protected void logInfo() {
if (loader != null) {
loader.logInfo();
}
logger.info("CompositeSenoneSequences: " +
compositeSenoneSequenceCache.size());
}
/**
* Searches an hmm at any position
*
* @param unit the unit to search for
* @return hmm the hmm or null if it was not found
*/
private SenoneHMM getHMMAtAnyPosition(Unit unit) {
HMMManager mgr = loader.getHMMManager();
for (HMMPosition pos : HMMPosition.values()) {
SenoneHMM hmm = (SenoneHMM)mgr.get(pos, unit);
if (hmm != null)
return hmm;
}
return null;
}
/**
* Given a unit, search for the HMM associated with this unit by replacing all non-silence filler contexts with the
* silence filler context
*
* @param unit the unit of interest
* @return the associated hmm or null
*/
private SenoneHMM getHMMInSilenceContext(Unit unit, HMMPosition position) {
SenoneHMM hmm = null;
HMMManager mgr = loader.getHMMManager();
Context context = unit.getContext();
if (context instanceof LeftRightContext) {
LeftRightContext lrContext = (LeftRightContext) context;
Unit[] lc = lrContext.getLeftContext();
Unit[] rc = lrContext.getRightContext();
Unit[] nlc;
Unit[] nrc;
if (hasNonSilenceFiller(lc)) {
nlc = replaceNonSilenceFillerWithSilence(lc);
} else {
nlc = lc;
}
if (hasNonSilenceFiller(rc)) {
nrc = replaceNonSilenceFillerWithSilence(rc);
} else {
nrc = rc;
}
if (nlc != lc || nrc != rc) {
Context newContext = LeftRightContext.get(nlc, nrc);
Unit newUnit = unitManager.getUnit(unit.getName(),
unit.isFiller(), newContext);
hmm = (SenoneHMM) mgr.get(position, newUnit);
if (hmm == null) {
hmm = getHMMAtAnyPosition(newUnit);
}
}
}
return hmm;
}
/**
* Returns true if the array of units contains a non-silence filler
*
* @param units the units to check
* @return true if the array contains a filler that is not the silence filler
*/
private boolean hasNonSilenceFiller(Unit[] units) {
if (units == null) {
return false;
}
for (Unit unit : units) {
if (unit.isFiller() &&
!unit.equals(UnitManager.SILENCE)) {
return true;
}
}
return false;
}
/**
* Returns a unit array with all non-silence filler units replaced with the silence filler a non-silence filler
*
* @param context the context to check
* @return true if the array contains a filler that is not the silence filler
*/
private Unit[] replaceNonSilenceFillerWithSilence(Unit[] context) {
Unit[] replacementContext = new Unit[context.length];
for (int i = 0; i < context.length; i++) {
if (context[i].isFiller() &&
!context[i].equals(UnitManager.SILENCE)) {
replacementContext[i] = UnitManager.SILENCE;
} else {
replacementContext[i] = context[i];
}
}
return replacementContext;
}
/**
* Returns the properties of this acoustic model.
*
* @return the properties of this acoustic model
*/
public Properties getProperties() {
if (properties == null) {
properties = new Properties();
try {
properties.load
(TiedStateAcousticModel.class.getResource("model.props").openStream());
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
return properties;
}
}