/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.linguist.language.ngram; import java.io.IOException; import java.util.Set; import edu.cmu.sphinx.linguist.WordSequence; import edu.cmu.sphinx.linguist.dictionary.Dictionary; import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist; import edu.cmu.sphinx.util.props.*; /** * Represents the generic interface to an N-Gram language model. * <p> * Note that all probabilities are in LogMath log base, except as otherwise * noted. */ public interface LanguageModel extends Configurable { /** The property specifying the location of the language model. */ @S4String(defaultValue = ".") public final static String PROP_LOCATION = "location"; /** The property specifying the unigram weight */ @S4Double(defaultValue = 1.0) public final static String PROP_UNIGRAM_WEIGHT = "unigramWeight"; /** * The property specifying the maximum depth reported by the language model * (from a getMaxDepth()) call. If this property is set to (-1) (the * default) the language model reports the implicit depth of the model. * This property allows a deeper language model to be used. For instance, a * trigram language model could be used as a bigram model by setting this * property to 2. Note if this property is set to a value greater than the * implicit depth, the implicit depth is used. Legal values for this * property are 1..N and -1. */ @S4Integer(defaultValue = -1) public final static String PROP_MAX_DEPTH = "maxDepth"; /** The property specifying the dictionary to use */ @S4Component(type = Dictionary.class) public final static String PROP_DICTIONARY = "dictionary"; /** * Create the language model * * @throws java.io.IOException if error occurrs */ public void allocate() throws IOException; /** * Deallocate resources allocated to this language model * * @throws IOException if error occurrs */ public void deallocate() throws IOException; /** * Gets the n-gram probability of the word sequence represented by the word * list * * @param wordSequence the wordSequence * @return the probability of the word sequence in log base */ public float getProbability(WordSequence wordSequence); /** * Gets the smear term for the given wordSequence. Used in * {@link LexTreeLinguist}. See * {@link LexTreeLinguist#PROP_WANT_UNIGRAM_SMEAR} for details. * * @param wordSequence the word sequence * @return the smear term associated with this word sequence */ public float getSmear(WordSequence wordSequence); /** * Returns the set of words in the language model. The set is unmodifiable. * * @return the unmodifiable set of words */ public Set<String> getVocabulary(); /** * Returns the maximum depth of the language model * * @return the maximum depth of the language model */ public int getMaxDepth(); /** * Called on utterance end to clear cache if needed */ public void onUtteranceEnd(); }