/** * Portions Copyright 2001 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute, * Carnegie Mellon University. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. */ package edu.cmu.sphinx.alignment.tokenizer; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.StringTokenizer; /** * Implements a finite state machine that checks if a given string is * pronounceable. If it is pronounceable, the method <code>accept()</code> will * return true. */ public class PronounceableFSM { private static final String VOCAB_SIZE = "VOCAB_SIZE"; private static final String NUM_OF_TRANSITIONS = "NUM_OF_TRANSITIONS"; private static final String TRANSITIONS = "TRANSITIONS"; /** * The vocabulary size. */ protected int vocabularySize; /** * The transitions of this FSM */ protected int[] transitions; /** * Whether we should scan the input string from the front. */ protected boolean scanFromFront; /** * Constructs a PronounceableFSM with information in the given URL. * * @param url the URL that contains the FSM specification * @param scanFromFront indicates whether this FSM should scan the input * string from the front, or from the back * @throws IOException if something went wrong */ public PronounceableFSM(URL url, boolean scanFromFront) throws IOException { this.scanFromFront = scanFromFront; InputStream is = url.openStream(); loadText(is); is.close(); } /** * Constructs a PronounceableFSM with the given attributes. * * @param vocabularySize the vocabulary size of the FSM * @param transitions the transitions of the FSM * @param scanFromFront indicates whether this FSM should scan the input * string from the front, or from the back */ public PronounceableFSM(int vocabularySize, int[] transitions, boolean scanFromFront) { this.vocabularySize = vocabularySize; this.transitions = transitions; this.scanFromFront = scanFromFront; } /** * Loads the ASCII specification of this FSM from the given InputStream. * * @param is the input stream to load from * * @throws IOException if an error occurs on input. */ private void loadText(InputStream is) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = null; while ((line = reader.readLine()) != null) { if (!line.startsWith("***")) { if (line.startsWith(VOCAB_SIZE)) { vocabularySize = parseLastInt(line); } else if (line.startsWith(NUM_OF_TRANSITIONS)) { int transitionsSize = parseLastInt(line); transitions = new int[transitionsSize]; } else if (line.startsWith(TRANSITIONS)) { StringTokenizer st = new StringTokenizer(line); String transition = st.nextToken(); int i = 0; while (st.hasMoreTokens() && i < transitions.length) { transition = st.nextToken().trim(); transitions[i++] = Integer.parseInt(transition); } } } } reader.close(); } /** * Returns the integer value of the last integer in the given string. * * @param line the line to parse the integer from * * @return an integer */ private int parseLastInt(String line) { String lastInt = line.trim().substring(line.lastIndexOf(" ")); return Integer.parseInt(lastInt.trim()); } /** * Causes this FSM to transition to the next state given the current state * and input symbol. * * @param state the current state * @param symbol the input symbol */ private int transition(int state, int symbol) { for (int i = state; i < transitions.length; i++) { if ((transitions[i] % vocabularySize) == symbol) { return (transitions[i] / vocabularySize); } } return -1; } /** * Checks to see if this finite state machine accepts the given input * string. * * @param inputString the input string to be tested * * @return true if this FSM accepts, false if it rejects */ public boolean accept(String inputString) { int symbol; int state = transition(0, '#'); int leftEnd = inputString.length() - 1; int start = (scanFromFront) ? 0 : leftEnd; for (int i = start; 0 <= i && i <= leftEnd;) { char c = inputString.charAt(i); if (c == 'n' || c == 'm') { symbol = 'N'; } else if ("aeiouy".indexOf(c) != -1) { symbol = 'V'; } else { symbol = c; } state = transition(state, symbol); if (state == -1) { return false; } else if (symbol == 'V') { return true; } if (scanFromFront) { i++; } else { i--; } } return false; } }