package edu.cmu.sphinx.linguist.language.ngram; import java.io.*; import java.net.Socket; import java.net.URL; import java.util.HashSet; import java.util.Set; import edu.cmu.sphinx.linguist.WordSequence; import edu.cmu.sphinx.linguist.dictionary.Word; import edu.cmu.sphinx.linguist.util.LRUCache; import edu.cmu.sphinx.util.LogMath; import edu.cmu.sphinx.util.props.*; /* * The client of the SRILM language model server. It needs to read the * vocabulary from a vocabulary file though. */ public class NetworkLanguageModel implements LanguageModel { /** The property specifying the host of the language model server. */ @S4String(defaultValue = "localhost") public final static String PROP_HOST = "host"; /** The property specifying the port of the language model server. */ @S4Integer(defaultValue = 2525) public final static String PROP_PORT = "port"; LogMath logMath; private String host; private int port; private URL location; int maxDepth; Socket socket; private BufferedReader inReader; private PrintWriter outWriter; LRUCache<WordSequence, Float> cache; private boolean allocated; /** * Creates network language model client * * @param host server host * @param port server port * @param location URL of the file with vocabulary (only needed for 1-stage * model) * @param maxDepth depth of the model */ public NetworkLanguageModel(String host, int port, URL location, int maxDepth) { this.host = host; this.port = port; this.maxDepth = maxDepth; this.location = location; logMath = LogMath.getLogMath(); } public NetworkLanguageModel() { } /* * (non-Javadoc) * @see * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util * .props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { if (allocated) { throw new RuntimeException("Can't change properties after allocation"); } host = ps.getString(PROP_HOST); port = ps.getInt(PROP_PORT); location = ConfigurationManagerUtils.getResource(PROP_LOCATION, ps); maxDepth = ps.getInt(PROP_MAX_DEPTH); if (maxDepth == -1) maxDepth = 3; } public void allocate() throws IOException { allocated = true; socket = new Socket(host, port); inReader = new BufferedReader(new InputStreamReader(socket.getInputStream())); outWriter = new PrintWriter(socket.getOutputStream(), true); String greeting = inReader.readLine(); if (!greeting.equals("probserver ready")) { throw new IOException("Incorrect input"); } cache = new LRUCache<WordSequence, Float>(1000); } public void deallocate() { allocated = false; try { socket.close(); } catch (IOException e) { e.printStackTrace(); } } public int getMaxDepth() { return maxDepth; } public float getProbability(WordSequence wordSequence) { Float probability = cache.get(wordSequence); if (probability != null) { return probability.floatValue(); } StringBuilder builder = new StringBuilder(); if (wordSequence.size() == 0) return 0.0f; for (Word w : wordSequence.getWords()) { builder.append(w.toString()); builder.append(' '); } outWriter.println(builder.toString()); String result = "0"; try { result = inReader.readLine(); if (result.charAt(0) == 0) result = result.substring(1); } catch (IOException e) { e.printStackTrace(); } if (!result.equals("-inf")) probability = logMath.log10ToLog(Float.parseFloat(result)); else probability = LogMath.LOG_ZERO; cache.put(wordSequence, probability); return probability.floatValue(); } public float getSmear(WordSequence wordSequence) { return 0.0f; } public Set<String> getVocabulary() { Set<String> result = new HashSet<String>(); try { BufferedReader reader = new BufferedReader(new InputStreamReader(location.openStream())); String line; while (true) { line = reader.readLine(); if (line == null) break; if (line.length() == 0) continue; result.add(line.trim()); } reader.close(); } catch (IOException e) { e.printStackTrace(); } return result; } @Override public void onUtteranceEnd() { //TODO not implemented } }