/** * * Copyright 1999-2012 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.fst; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.HashMap; import edu.cmu.sphinx.fst.semiring.Semiring; import edu.cmu.sphinx.fst.utils.Utils; /** * Provides the required functionality in order to convert from/to openfst's * text format * * @author John Salatas */ public class Convert { /** * Default private Constructor. */ private Convert() { } /** * Exports an fst to the openfst text format Several files are created as * follows: - basename.input.syms - basename.output.syms - basename.fst.txt * See <a * href="http://www.openfst.org/twiki/bin/view/FST/FstQuickTour">OpenFst * Quick Tour</a> * * @param fst * the fst to export * @param basename * the files' base name * @throws IOException IO went wrong */ public static void export(Fst fst, String basename) throws IOException { exportSymbols(fst.getIsyms(), basename + ".input.syms"); exportSymbols(fst.getOsyms(), basename + ".output.syms"); exportFst(fst, basename + ".fst.txt"); } /** * Exports an fst to the openfst text format * * @param fst * the fst to export * @param filename * the openfst's fst.txt filename * @throws IOException IO went wrong */ private static void exportFst(Fst fst, String filename) throws IOException { FileWriter file; file = new FileWriter(filename); PrintWriter out = new PrintWriter(file); // print start first State start = fst.getStart(); out.println(start.getId() + "\t" + start.getFinalWeight()); // print all states int numStates = fst.getNumStates(); for (int i = 0; i < numStates; i++) { State s = fst.getState(i); if (s.getId() != fst.getStart().getId()) { out.println(s.getId() + "\t" + s.getFinalWeight()); } } String[] isyms = fst.getIsyms(); String[] osyms = fst.getOsyms(); numStates = fst.getNumStates(); for (int i = 0; i < numStates; i++) { State s = fst.getState(i); int numArcs = s.getNumArcs(); for (int j = 0; j < numArcs; j++) { Arc arc = s.getArc(j); String isym = (isyms != null) ? isyms[arc.getIlabel()] : Integer.toString(arc.getIlabel()); String osym = (osyms != null) ? osyms[arc.getOlabel()] : Integer.toString(arc.getOlabel()); out.println(s.getId() + "\t" + arc.getNextState().getId() + "\t" + isym + "\t" + osym + "\t" + arc.getWeight()); } } out.close(); } /** * Exports a symbols' map to the openfst text format * * @param syms * the symbols' map * @param filename * the the openfst's symbols filename * @throws IOException IO went wrong */ private static void exportSymbols(String[] syms, String filename) throws IOException { if (syms == null) return; FileWriter file = new FileWriter(filename); PrintWriter out = new PrintWriter(file); for (int i = 0; i < syms.length; i++) { String key = syms[i]; out.println(key + "\t" + i); } out.close(); } /** * Imports an openfst's symbols file * * @param filename * the symbols' filename * @return HashMap containing the imported string-to-id mapping * @throws IOException IO went wrong * @throws NumberFormatException import failed due to input data format */ private static HashMap<String, Integer> importSymbols(String filename) throws NumberFormatException, IOException { File symfile = new File(filename); if (!(symfile.exists() && symfile.isFile())) { return null; } FileInputStream fis = new FileInputStream(filename); DataInputStream dis = new DataInputStream(fis); BufferedReader br = new BufferedReader(new InputStreamReader(dis)); HashMap<String, Integer> syms = new HashMap<String, Integer>(); String strLine; while ((strLine = br.readLine()) != null) { String[] tokens = strLine.split("\\t"); String sym = tokens[0]; Integer index = Integer.parseInt(tokens[1]); syms.put(sym, index); } br.close(); return syms; } /** * Imports an openfst text format Several files are imported as follows: - * basename.input.syms - basename.output.syms - basename.fst.txt * * @param basename * the files' base name * @param semiring * the fst's semiring * @return imported FST * @throws IOException IO went wrong * @throws NumberFormatException load failed due to data format issues */ public static Fst importFst(String basename, Semiring semiring) throws NumberFormatException, IOException { Fst fst = new Fst(semiring); HashMap<String, Integer> isyms = importSymbols(basename + ".input.syms"); if (isyms == null) { isyms = new HashMap<String, Integer>(); isyms.put("<eps>", 0); } HashMap<String, Integer> osyms = importSymbols(basename + ".output.syms"); if (osyms == null) { osyms = new HashMap<String, Integer>(); osyms.put("<eps>", 0); } HashMap<String, Integer> ssyms = importSymbols(basename + ".states.syms"); // Parse input FileInputStream fis = new FileInputStream(basename + ".fst.txt"); DataInputStream dis = new DataInputStream(fis); BufferedReader br = new BufferedReader(new InputStreamReader(dis, "UTF-8")); boolean firstLine = true; String strLine; HashMap<Integer, State> stateMap = new HashMap<Integer, State>(); while ((strLine = br.readLine()) != null) { String[] tokens = strLine.split("\\t"); Integer inputStateId; if (ssyms == null) { inputStateId = Integer.parseInt(tokens[0]); } else { inputStateId = ssyms.get(tokens[0]); } State inputState = stateMap.get(inputStateId); if (inputState == null) { inputState = new State(semiring.zero()); fst.addState(inputState); stateMap.put(inputStateId, inputState); } if (firstLine) { firstLine = false; fst.setStart(inputState); } if (tokens.length > 2) { Integer nextStateId; if (ssyms == null) { nextStateId = Integer.parseInt(tokens[1]); } else { nextStateId = ssyms.get(tokens[1]); } State nextState = stateMap.get(nextStateId); if (nextState == null) { nextState = new State(semiring.zero()); fst.addState(nextState); stateMap.put(nextStateId, nextState); } // Adding arc if (isyms.get(tokens[2]) == null) { isyms.put(tokens[2], isyms.size()); } int iLabel = isyms.get(tokens[2]); if (osyms.get(tokens[3]) == null) { osyms.put(tokens[3], osyms.size()); } int oLabel = osyms.get(tokens[3]); float arcWeight; if (tokens.length > 4) { arcWeight = Float.parseFloat(tokens[4]); } else { arcWeight = 0; } Arc arc = new Arc(iLabel, oLabel, arcWeight, nextState); inputState.addArc(arc); } else { if (tokens.length > 1) { float finalWeight = Float.parseFloat(tokens[1]); inputState.setFinalWeight(finalWeight); } else { inputState.setFinalWeight(0.0f); } } } dis.close(); fst.setIsyms(Utils.toStringArray(isyms)); fst.setOsyms(Utils.toStringArray(osyms)); return fst; } }