/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus.vocab;
import joshua.decoder.ff.lm.srilm.SWIGTYPE_p_Ngram;
import joshua.decoder.ff.lm.srilm.srilm;
import joshua.decoder.ff.tm.hiero.HieroFormatReader;
import java.io.IOException;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2010-01-20 10:47:45 -0600 (Wed, 20 Jan 2010) $
*/
public class SrilmSymbol extends DefaultSymbol {
private final SWIGTYPE_p_Ngram p_srilm;
/** Logger for this class. */
private static final Logger logger =
Logger.getLogger(SrilmSymbol.class.getName());
/**
* Construct an empty SRILM symbol table.
*
* @param lmOrder Language model n-gram order
*/
public SrilmSymbol(int lmOrder) {
System.loadLibrary("srilm"); //load once
this.p_srilm = srilm.initLM(lmOrder, lmStartSymID, lmEndSymID );
logger.info("Construct the symbol table on the fly");
addNonterminal(X_STRING);
addNonterminal(X1_STRING);
addNonterminal(X2_STRING);
addNonterminal(S_STRING);
addNonterminal(S1_STRING);
}
/**
* Construct an SRILM symbol table using the provided file.
*
* @param fname File name
* @param lmOrder Language model n-gram order
* @throws IOException
*/
public SrilmSymbol(String fname, int lmOrder) throws IOException {
// We have to call the following two functions before we add any symbol into the SRILM table
// This is unfortunate as we need to provide lm_order, which seems unrelated
System.loadLibrary("srilm"); //load once
this.p_srilm = srilm.initLM(lmOrder, lmStartSymID, lmEndSymID );
//now we can begin to add symbols
if(fname !=null){
logger.info("Construct the symbol table from a file " +fname);
initializeSymTblFromFile(fname);
}else{
logger.info("Construct the symbol table on the fly");
}
}
/**
* Construct an SRILM symbol table using the symbol mapping
* from the provided symbol table.
*
* @param vocab Existing symbol table
* @param lmOrder Language model n-gram order
*/
public SrilmSymbol(SymbolTable vocab, int lmOrder) {
int vocabLow = vocab.getLowestID();
int vocabHigh = vocab.getHighestID();
if (logger.isLoggable(Level.FINEST)) logger.finest("In existing symbol table, lowestID=="+vocabLow+ " and highestID=="+vocabHigh);
int start = 1;//(vocabLow>0) ? vocabLow - 1 : -4;
int end = lmEndSymID - lmStartSymID;
System.loadLibrary("srilm"); //load once
this.p_srilm = srilm.initLM(lmOrder, start, end);
// if (logger.isLoggable(Level.FINEST)) {
// logger.fine(this.getWord(1));
// logger.fine(this.getWord(2));
// logger.fine(this.getWord(3));
// logger.fine(this.getWord(4));
// }
// Add all symbols from the supplied symbol table, in order
// for (int i=vocabLow; i<=vocabHigh; i++) {
// String symbol = vocab.getWord(i);
// if (vocab.isNonterminal(i)) {
// int id = this.addNonterminal(symbol);
// logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
// } else {
// int id = this.addTerminal(symbol);
// logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
// }
// }
int lowestNonNegative = (vocabLow < 0) ? 1 : vocabLow;
for (int i=lowestNonNegative; i<=vocabHigh; i++) {
String symbol = vocab.getWord(i);
if (symbol != null) {
if (vocab.isNonterminal(i)) {
int id = this.addNonterminal(symbol);
logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
if (id!=i || !symbol.equals(this.getWord(id))) {
throw new RuntimeException("Symbol mismatch between " + id + " and " + i + " for nonterminal symbol " + symbol);
}
} else {
int id = this.addTerminal(symbol);
logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
if (id!=i || !symbol.equals(this.getWord(id))) {
throw new RuntimeException("Symbol mismatch between " + id + " and " + i + " for terminal symbol " + symbol);
}
}
}
}
if (vocabLow < 0) {
for (int i=-1; i>=vocabLow; i--) {
String symbol = vocab.getWord(i);
if (symbol != null) {
if (vocab.isNonterminal(i)) {
int id = this.addNonterminal(symbol);
logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
if (id!=i || !symbol.equals(this.getWord(id))) {
throw new RuntimeException("Symbol mismatch between " + id + " and " + i + " for nonterminal symbol " + symbol);
}
} else {
int id = this.addTerminal(symbol);
logger.fine("Added symbol " + symbol + " with id " + id + "; original id was " + i + " " + this.getWord(id));
if (id!=i || !symbol.equals(this.getWord(id))) {
throw new RuntimeException("Symbol mismatch between " + id + " and " + i + " for terminal symbol " + symbol);
}
}
}
}
}
if (logger.isLoggable(Level.FINEST)) {
for (int i=vocabLow+1; i<0; i++) {
String symbol = this.getWord(i);
logger.fine("ID " + i + " => " + symbol);
}
for (int i=1; i<=vocabHigh; i++) {
String symbol = this.getWord(i);
logger.fine("ID " + i + " => " + symbol);
}
}
}
public SWIGTYPE_p_Ngram getSrilmPointer(){
return this.p_srilm;
}
/* This will automatically add str into srilm table if it is not there
* */
public int addTerminal(String str){
// if (HieroFormatReader.isNonTerminal(str)) {
// throw new RuntimeException("Attempting to add nonterminal " + str + " as a terminal");
// }
int id = (int) srilm.getIndexForWord(str);
return id;
}
public String getTerminal(int id){
String res = (String) srilm.getWordForIndex(id);
if(res == null){
//throw new UnknownSymbolException(id);
logger.warning("null string for id="+id);
}
return res;
}
public Collection<Integer> getAllIDs() {
//TODO Implement this method
throw new RuntimeException("Method not yet implemented");
}
public int getID(String wordString) {
if (HieroFormatReader.isNonTerminal(wordString)) {//TODO: this is so wrong
return addNonterminal(wordString);
} else {
return addTerminal(wordString);
}
}
}