/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus.vocab;
import java.util.HashMap;
/**
* Partial basic implementation of a symbol table.
*
* @author Lane Schwartz
* @author Zhifei Li
* @version $LastChangedDate: 2009-11-30 16:47:19 -0600 (Mon, 30 Nov 2009) $
*/
public abstract class AbstractSymbolTable implements SymbolTable {
/*a speed up trick*/
HashMap<Integer, Integer> targetNonterminalIntexCache = new HashMap<Integer, Integer> ();
/* See Javadoc for SymbolTable interface. */
final public int[] addTerminals(String sentence){
return addTerminals(sentence.split("\\s+"));
}
/* See Javadoc for SymbolTable interface. */
final public int[] addTerminals(String[] words){
int[] res =new int[words.length];
for(int t=0; t<words.length; t++)
res[t]=addTerminal(words[t]);
return res;
}
/* See Javadoc for SymbolTable interface. */
final public int getTargetNonterminalIndex(int id) {
if (! isNonterminal(id)) {
return -1;
} else {
Integer res = targetNonterminalIntexCache.get(id);
if(res!=null)
return res;
else{
res = getTargetNonterminalIndex( getWord(id) );//convert to string, and then get the index
targetNonterminalIntexCache.put(id, res);
return res;
}
}
}
/* See Javadoc for SymbolTable interface. */
final public int getTargetNonterminalIndex(String wrd) {
// Assumes the last character is a digit
// and extracts it, starting from one.
// Assumes the whole prefix is the
// nonterminal-ID portion of the string
//TODO: this function is called exponentially many times, we should speed it up further
return Integer.parseInt( wrd.substring(wrd.length() - 2, wrd.length() - 1) ) - 1;
/*
String nonterminalID =
wrd.substring(wrd.length() - 2, wrd.length() - 1);
if (FormatUtil.isNumber(nonterminalID)) {//!!!!!!!!!!!!!! this function causes the decoding 7 times slow, confirmed by zhifei
return Integer.parseInt( nonterminalID ) - 1;
} else {
throw new MalformedNonterminalException(
"Substring '" +nonterminalID+ "' " +
"of string '" +wrd+ "' is not a number");
}*/
}
/* See Javadoc for SymbolTable interface. */
public String getUnknownWord() {
return SymbolTable.UNKNOWN_WORD_STRING;
}
/* See Javadoc for SymbolTable interface. */
public int getUnknownWordID() {
return SymbolTable.UNKNOWN_WORD;
}
/* See Javadoc for SymbolTable interface. */
public String getWords(int[] wordIDs, boolean ntIndexIncrements) {
StringBuilder s = new StringBuilder();
int nextNTIndex = 1;
for (int t=0; t<wordIDs.length; t++) {
if(t>0) {
s.append(' ');
}
int wordID = wordIDs[t];
// if (wordID >= vocabList.size()) {
// s.append(UNKNOWN_WORD_STRING);
// } else
if (wordID < 0 && ntIndexIncrements) {
s.append("[X,"); //XXX This should NOT be hardcoded here!
if (ntIndexIncrements) {
s.append(nextNTIndex++);
} else {
s.append(-1*wordID);
}
s.append(']');
} else {
String word = getWord(wordID);
s.append(word);
}
}
return s.toString();
}
/* See Javadoc for SymbolTable interface. */
public boolean isNonterminal(int id) {
return (id < 0);
}
}