/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder.ff.lm.distributed_lm;
import joshua.decoder.ff.lm.AbstractLM;
import joshua.corpus.vocab.SymbolTable;
import joshua.util.io.LineReader;
import joshua.util.Regex;
import java.io.IOException;
import java.util.Hashtable;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* this class implement
* (1) get the list of lm servers
* (2) setup network connection
* (3) get lm probablity for n-gram remotely
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2009-05-19 19:58:48 -0500 (Tue, 19 May 2009) $
*/
//PATH: this => LMClient => network => LMServer => LMGrammar => LMGrammar_JAVA/SRILM; and then reverse the path
public class LMGrammarRemote extends AbstractLM {
private static final Logger logger = Logger.getLogger(LMGrammarRemote.class.getName());
// if remote method is used
private LMClient p_lm_client = null;
//!!! we assume both suffix and lm are remoted, if one is remoted
public LMGrammarRemote(SymbolTable psymbolTable, int order, String f_server_lists, int num_servers)
throws IOException {
super(psymbolTable, order);
logger.info("use remote suffix and lm server");
String[] hosts = new String[num_servers];
int[] ports = new int[num_servers];
double[] weights = new double[num_servers];
read_lm_server_lists(f_server_lists, num_servers, hosts, ports,weights);
if (1 == num_servers) {
p_lm_client = new LMClientSingle(hosts[0], ports[0]);
} else {
p_lm_client = new LMClientMultiServer(hosts, ports, weights, num_servers);
}
}
//TODO This method is never used. Perhaps it should be removed.
@SuppressWarnings("unused")
private void end_lm_grammar() {
p_lm_client.close_client();
}
// format: lm_file host port weight
private void read_lm_server_lists(String f_server_lists, int num_servers, String[] l_lm_server_hosts, int[] l_lm_server_ports, double[] l_lm_server_weights) throws IOException {
int count = 0;
LineReader reader = new LineReader(f_server_lists);
try { for (String line : reader) {
String fname = line.trim();
Hashtable<String,?> res_conf = read_config_file(fname);
String lm_file = (String) res_conf.get("lm_file");
String host = (String) res_conf.get("hostname");
int port = (Integer) res_conf.get("port");
double weight = (Double) res_conf.get("weight");
l_lm_server_hosts[count] = host;
l_lm_server_ports[count] = port;
l_lm_server_weights[count] = weight;
count++;
logger.fine("lm server: "
+ "lm_file: " + lm_file
+ "; host: " + host
+ "; port: " + port
+ "; weight: " + weight);
} } finally { reader.close(); }
if (count != num_servers) {
throw new IllegalArgumentException("num of lm servers does not match");
}
}
// BUG: this is duplicating code in JoshuaConfiguration, needs unifying
@SuppressWarnings("unchecked")
private static Hashtable<String,?> read_config_file(String config_file)
throws IOException {
Hashtable res = new Hashtable();
LineReader configReader = new LineReader(config_file);
try { for (String line : configReader) {
//line = line.trim().toLowerCase();
line = line.trim();
if (Regex.commentOrEmptyLine.matches(line)) continue;
if (-1 != line.indexOf("=")) { // parameters
String[] fds = Regex.equalsWithSpaces.split(line);
if (fds.length != 2) {
throw new IllegalArgumentException(
"Wrong config line: " + line);
}
if ("lm_file".equals(fds[0])) {
String lm_file = fds[1].trim();
res.put("lm_file", lm_file);
if (logger.isLoggable(Level.FINE))
logger.fine(String.format("lm file: %s", lm_file));
} else if ("remote_lm_server_port".equals(fds[0])) {
int port = Integer.parseInt(fds[1]);
res.put("port", port);
if (logger.isLoggable(Level.FINE))
logger.fine(String.format("remote_lm_server_port: %s", port));
} else if ("hostname".equals(fds[0])) {
String host_name = fds[1].trim();
res.put("hostname", host_name);
if (logger.isLoggable(Level.FINE))
logger.fine(String.format("host name is: %s", host_name));
} else if ("interpolation_weight".equals(fds[0])) {
double interpolation_weight = Double.parseDouble(fds[1]);
res.put("weight", interpolation_weight);
if (logger.isLoggable(Level.FINE))
logger.fine(String.format("interpolation_weightt: %s", interpolation_weight));
} else {
logger.warning("LMGrammarRemote doesn't use config line: " + line);
//System.exit(1);
}
}
} } finally { configReader.close(); }
return res;
}
//this should be called by decoder only
protected double ngramLogProbability_helper(int[] ngram, int order) {
return p_lm_client.get_prob(ngram, ngram.length);
}
protected double logProbabilityOfBackoffState_helper(
int[] ngram, int order, int qtyAdditionalBackoffWeight
) {
throw new UnsupportedOperationException("probabilityOfBackoffState_helper undefined for distributed_lm");
}
public void write_vocab_map_srilm(String fname) {
throw new RuntimeException("call write_vocab_map_srilm in remote, must exit");
}
}