package edu.cmu.sphinx.linguist.acoustic.tiedstate.kaldi;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.InputMismatchException;
import java.util.List;
import java.util.Scanner;
public class KaldiTextParser {
private final Scanner scanner;
public KaldiTextParser(String path)
throws IOException, MalformedURLException
{
// TODO: rewrite with StreamTokenizer, see ExtendedStreamTokenizer.
File modelFile = new File(path, "final.mdl");
InputStream modelStream = new URL(modelFile.getPath()).openStream();
File treeFile = new File(path, "tree");
InputStream treeStream = new URL(treeFile.getPath()).openStream();
InputStream stream = new SequenceInputStream(modelStream, treeStream);
scanner = new Scanner(stream);
}
public String getToken() {
return scanner.next();
}
public int getInt() {
return scanner.nextInt();
}
public float parseFloat() {
return scanner.nextFloat();
}
public int[] getIntArray() {
List<Integer> ints = new ArrayList<Integer>();
for (String token : getTokenList("[", "]"))
ints.add(Integer.parseInt(token));
int[] result = new int[ints.size()];
for (int i = 0 ; i < result.length; ++i)
result[i] = ints.get(i);
return result;
}
public float[] getFloatArray() {
List<Float> floats = new ArrayList<Float>();
for (String token : getTokenList("[", "]"))
floats.add(Float.parseFloat(token));
float[] result = new float[floats.size()];
for (int i = 0; i < result.length; ++i)
result[i] = floats.get(i);
return result;
}
public List<String> getTokenList(String openToken, String closeToken) {
expectToken(openToken);
List<String> tokens = new ArrayList<String>();
String token;
while (!closeToken.equals(token = scanner.next()))
tokens.add(token);
return tokens;
}
public void expectToken(String expected) {
String actual = scanner.next();
assertToken(expected, actual);
}
public void assertToken(String expected, String actual) {
if (actual.equals(expected))
return;
String msg;
msg = String.format("'%s' expected, '%s' got", expected, actual);
throw new InputMismatchException(msg);
}
}