package edu.berkeley.nlp.lm.io; import edu.berkeley.nlp.lm.StupidBackoffLm; import edu.berkeley.nlp.lm.util.Logger; /** * Like {@link MakeLmBinaryFromGoogle}, except it only writes the NgramMap * portion of the LM, meaning the binary does not contain the vocabulary. We * have used this internally to build binaries that we provide for download. * Since these binaries are useless without the vocabulary provided with the * Google n-gram corpus, we can distribute them without incurring the wrath of * the LDC. * <p> * These binaries can be read in used * {@link LmReaders#readGoogleLmBinary(String, edu.berkeley.nlp.lm.WordIndexer, String)} * * @author adampauls * */ public class MakeNgramMapBinaryFromGoogle { /** * */ private static void usage() { System.err.println("Usage: <Google n-grams dir> <outputfile>"); System.exit(1); } public static void main(final String[] argv) { if (argv.length != 2) usage(); Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err)); Logger.startTrack("Reading Lm File " + argv[0] + " . . . "); final String lmFile = argv[1]; final StupidBackoffLm<String> lm = (StupidBackoffLm<String>) LmReaders.readLmFromGoogleNgramDir(lmFile, true, false); Logger.endTrack(); final String outFile = argv[1]; Logger.startTrack("Writing to file " + outFile + " . . . "); IOUtils.writeObjFileHard(outFile, lm.getNgramMap()); Logger.endTrack(); } }