/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder.ff.lm;
import joshua.decoder.Support;
import joshua.corpus.vocab.SymbolTable;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* This class provides a default implementation for the Equivalent
* LM State optimization (namely, don't back off anywhere). It also
* provides some default implementations for more general functions
* on the interface to fall back to more specific ones (e.g. from
* ArrayList<Integer> to int[]) and a default implementation for
* sentenceLogProbability which enumerates the n-grams and calls
* calls ngramLogProbability for each of them.
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @author wren ng thornton <wren@users.sourceforge.net>
* @version $LastChangedDate: 2009-12-30 10:10:38 -0600 (Wed, 30 Dec 2009) $
*/
public abstract class DefaultNGramLanguageModel implements NGramLanguageModel {
/** Logger for this class. */
private static final Logger logger =
Logger.getLogger(DefaultNGramLanguageModel.class.getName());
protected final SymbolTable symbolTable;
protected final int ngramOrder;
//===============================================================
// Constructors
//===============================================================
public DefaultNGramLanguageModel(SymbolTable symbolTable, int order) {
this.symbolTable = symbolTable;
this.ngramOrder = order;
}
//===============================================================
// Attributes
//===============================================================
public final int getOrder() {
return this.ngramOrder;
}
//===============================================================
// NGramLanguageModel Methods
//===============================================================
public double sentenceLogProbability(
List<Integer> sentence, int order, int startIndex
) {
if (sentence==null) return 0.0;
int sentenceLength = sentence.size();
if (sentenceLength <= 0) return 0.0;
double probability = 0.0;
// partial ngrams at the begining
for (int j = startIndex; j < order && j <= sentenceLength; j++) {
//TODO: startIndex dependents on the order, e.g., this.ngramOrder-1 (in srilm, for 3-gram lm, start_index=2. othercase, need to check)
int[] ngram = Support.subIntArray(sentence, 0, j);
double logProb = ngramLogProbability(ngram, order);
if (logger.isLoggable(Level.FINE)) {
String words = symbolTable.getWords(ngram);
logger.fine("\tlogp ( " + words + " ) = " + logProb);
}
probability += logProb;
}
// regular-order ngrams
for (int i = 0; i <= sentenceLength - order; i++) {
int[] ngram = Support.subIntArray(sentence, i, i + order);
double logProb = ngramLogProbability(ngram, order);
if (logger.isLoggable(Level.FINE)) {
String words = symbolTable.getWords(ngram);
logger.fine("\tlogp ( " + words + " ) = " + logProb);
}
probability += logProb;
}
return probability;
}
/** @deprecated this function is much slower than the int[] version */
@Deprecated
public double ngramLogProbability(List<Integer> ngram, int order) {
return ngramLogProbability(
Support.subIntArray(ngram, 0, ngram.size()), order);
}
public double ngramLogProbability(int[] ngram) {
return this.ngramLogProbability(ngram, this.ngramOrder);
}
public abstract double ngramLogProbability(int[] ngram, int order);
/**
* Will never be called, because BACKOFF_LEFT_LM_STATE_SYM_ID
* token will never exist. However, were it to be called,
* it should return a probability of 1 (logprob of 0).
*/
public double logProbOfBackoffState(List<Integer> ngram, int order, int qtyAdditionalBackoffWeight) {
return 0; // log(1) == 0;
}
/**
* Will never be called, because BACKOFF_LEFT_LM_STATE_SYM_ID
* token will never exist. However, were it to be called,
* it should return a probability of 1 (logprob of 0).
*/
public double logProbabilityOfBackoffState(int[] ngram, int order, int qtyAdditionalBackoffWeight) {
return 0; // log(1) == 0;
}
public int[] leftEquivalentState(int[] originalState, int order, double[] cost) {
return originalState;
}
public int[] rightEquivalentState(int[] originalState, int order) {
return originalState;
}
}