/* * Copyright (c) 2004-2009, Jean-Marc François. All Rights Reserved. * Licensed under the New BSD license. See the LICENSE file. */ package be.ac.ulg.montefiore.run.jahmm.io; import java.io.*; import java.util.ArrayList; import java.util.List; import be.ac.ulg.montefiore.run.jahmm.*; /** * This class can read Hidden Markov Models represented as text files. * The file syntax is as follows. * <p> * A '#' character induces a comment ; the rest of the line is skipped. * Words must be separated with a white space (space, tab or new line). * The file is case-sensitive. * The file must begin with the words <tt>Hmm</tt> and <tt>v1.0</tt>. * <p> * The next word must be <tt>NbStates</tt> followed by a number. This * number is the HMM's number of states. * <p> * Then comes a description of each state. The n-th description is * related to the n-th state. A state description begins with the keywords * <tt>State</tt> and </tt>Pi</tt>, followed by the initial probability of * this state. Then comes the letter <tt>A</tt> followed by the state * transition probabilities separated by a space, in the right order. * Then comes a description of an observation distribution which depends on * the type of observation handled by the HMM. * <p> * The opdfs associated with all the states must have the same type. * A HMM description file thus looks like this: * <pre> * # A simple Hmm * Hmm * v1.0 * NbStates 2 * * State * Pi 0.7 * A 0.1 0.9 * IntegerOPDF [ .2 .3 .4 .1 ] * * State * Pi 0.3 * A 0.4 0.6 * IntegerOPDF [ .1 .1 .1 .7 ] * </pre> * The lines starting with 'IntegerOPDF' are distributions descriptions. */ public class HmmReader { /** * Reads a HMM from a text file. * * @param reader The reader to read the HMM description from. * @param opdfReader The {@link OpdfReader} used to read the observation * distributions. */ public static <O extends Observation> Hmm<O> read(Reader reader, OpdfReader<? extends Opdf<O>> opdfReader) throws IOException, FileFormatException { StreamTokenizer st = new StreamTokenizer(reader); initSyntaxTable(st); readWords(st, "Hmm", "v1.0", "NbStates"); int nbStates = (int) readNumber(st); double[] pi = new double[nbStates]; double[][] a = new double[nbStates][nbStates]; List<Opdf<O>> opdfs = new ArrayList<Opdf<O>>(nbStates); for (int i = 0; i < nbStates; i++) { // System.out.println("read state = " + i); readState(st, nbStates, i, pi, a, opdfs, opdfReader); } return new Hmm<O>(pi, a, opdfs); } static private <O extends Observation> void readState(StreamTokenizer st, int nbStates, int stateNb, double[] pi, double[][] a, List<Opdf<O>> opdfs, OpdfReader<? extends Opdf<O>> opdfReader) throws IOException, FileFormatException { readWords(st, "State", "Pi"); pi[stateNb] = readNumber(st); readWords(st, "A"); for (int i = 0; i < nbStates; i++) a[stateNb][i] = readNumber(st); opdfs.add(opdfReader.read(st)); } /** * Reads some keywords out of a {@link StreamTokenizer}. * * @param st A stream tokenizer. * @param words The words to read, in the right order. */ static void readWords(StreamTokenizer st, String... words) throws IOException, FileFormatException { for (String word : words) { st.nextToken(); if (st.ttype == StreamTokenizer.TT_WORD) if (st.sval.equals(word)) continue; else throw new FileFormatException(st.lineno(), "Syntax error: unexpected token '" + st.sval + "', ('" + word + "' expected)"); if (st.ttype > 0) // Single character token if (word.length() == 1 && st.ttype == (int) word.charAt(0)) continue; else throw new FileFormatException(st.lineno(), "Syntax error: unexpected token '" + (char) st.ttype + "' (" + word + "' expected)"); throw new FileFormatException(st.lineno(), "Syntax error: '" + word + "' expected"); } } static double readNumber(StreamTokenizer st) throws IOException, FileFormatException { st.nextToken(); if (st.ttype != StreamTokenizer.TT_NUMBER) throw new FileFormatException(st.lineno(), "Syntax error: number expected"); return st.nval; } /* Initialize the syntax table of a stream tokenizer */ static void initSyntaxTable(StreamTokenizer st) { st.resetSyntax(); st.parseNumbers(); st.wordChars('a', 'z'); st.wordChars('A', 'Z'); st.whitespaceChars(0, (int) ' '); st.whitespaceChars((int) '\t', (int) '\t'); st.eolIsSignificant(false); st.commentChar((int) '#'); } }