/**
* Copyright 2003-2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.fst;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* An implementation of a finite state transducer lookup.
*
* @author Andreas Eisele
*/
public class FSTLookup {
// ///////////////////// Static FST repository ////////////////////
/**
* Map "filename encoding" or "filename" to FST.
*/
private static Map<String, FST> knownFSTs = new HashMap<String, FST>();
// //////////////////// An individual FSTLookup class //////////////
private FST fst;
/**
* Initialise the finite state transducer lookup. This constructor will assume that the file contains a header indicating the
* proper encoding.
*
* @param fileName
* the name of the file from which to load the FST.
* @throws IOException
* if the FST cannot be loaded from the given file.
*/
public FSTLookup(String fileName) throws IOException {
InputStream inStream = new FileInputStream(fileName);
try {
init(inStream, fileName);
} finally {
inStream.close();
}
}
/**
* Initialise the finite state transducer lookup. This constructor will assume that the stream contains a header indicating
* the proper encoding.
*
* @param inStream
* the stream from which to load the FST.
* @param identifier
* an identifier by which the FST lookup can be retrieved.
* @throws IOException
* if the FST cannot be loaded from the given file.
*/
public FSTLookup(InputStream inStream, String identifier) throws IOException {
init(inStream, identifier);
}
private void init(InputStream inStream, String identifier) throws IOException {
fst = knownFSTs.get(identifier);
if (fst == null) {
fst = new FST(inStream);
knownFSTs.put(identifier, fst);
}
}
/**
* Initialise the finite state transducer lookup. This is a constructor for legacy headerless FST files.
*
* @param fileName
* the name of the file from which to load the FST.
* @param encoding
* the name of the encoding used in the file (e.g., UTF-8 or ISO-8859-1).
* @throws IOException
* if the FST cannot be loaded from the given file.
* @throws UnsupportedEncodingException
* if the encoding is not supported.
*/
public FSTLookup(String fileName, String encoding) throws IOException, UnsupportedEncodingException {
InputStream inStream = new FileInputStream(fileName);
try {
init(inStream, fileName, encoding);
} finally {
inStream.close();
}
}
/**
* Initialise the finite state transducer lookup. This is a constructor for legacy headerless FST files.
*
* @param inStream
* the stream from which to load the FST.
* @param identifier
* an identifier by which the FST lookup can be retrieved.
* @param encoding
* the name of the encoding used in the file (e.g., UTF-8 or ISO-8859-1).
* @throws IOException
* if the FST cannot be loaded from the given file.
* @throws UnsupportedEncodingException
* if the encoding is not supported.
*/
public FSTLookup(InputStream inStream, String identifier, String encoding) throws IOException, UnsupportedEncodingException {
init(inStream, identifier, encoding);
}
private void init(InputStream inStream, String identifier, String encoding) throws IOException, UnsupportedEncodingException {
String key = identifier + " " + encoding;
fst = knownFSTs.get(key);
if (fst == null) {
fst = new FST(inStream, encoding);
knownFSTs.put(key, fst);
}
}
/**
* Look up a word in the FST. The FST runs in normal mode, i.e. it generates the expanded forms from the original forms. This
* method is thread-safe.
*
* @param word
* the word to look up.
* @return a string array containing all expansions of word. If no expansion is found, an array of length 0 is returned.
*/
public String[] lookup(String word) {
return lookup(word, false);
}
/**
* Look up a word in the FST. This method is thread-safe.
*
* @param word
* the word to look up.
* @param generate
* whether the FST is to run in inverse direction, i.e. generating the original form from the expanded form.
* @return a string array containing all expansions of word. If no expansion is found, an array of length 0 is returned.
*/
public String[] lookup(String word, boolean generate) {
StringBuilder buffer2 = new StringBuilder();
List<String> results = new ArrayList<String>();
lookup(word, 0, 0, generate, buffer2, results);
String[] resultArray = new String[results.size()];
resultArray = (String[]) results.toArray(resultArray);
return resultArray;
}
private void lookup(String word, int offset1, int arc, boolean generate, StringBuilder buffer2, List<String> results) {
do {
int label = fst.labels[arc];
int offset2 = buffer2.length();
if (label == 0) {
if (offset1 == word.length()) {
results.add(buffer2.toString());
}
} else {
String s1;
if (generate)
s1 = (String) fst.strings.get(fst.mapping[fst.offsets[2 * label + 1]]);
else
s1 = (String) fst.strings.get(fst.mapping[fst.offsets[2 * label]]);
if (word.startsWith(s1, offset1)) {
String s2;
if (generate)
s2 = (String) fst.strings.get(fst.mapping[fst.offsets[2 * label]]);
else
s2 = (String) fst.strings.get(fst.mapping[fst.offsets[2 * label + 1]]);
buffer2.append(s2);
lookup(word, offset1 + s1.length(), fst.targets[arc], generate, buffer2, results);
if (offset2 < buffer2.length())
buffer2.delete(offset2, buffer2.length());
}
}
} while (!fst.isLast[arc++]);
}
/**
* A simple command-line frontend for the FST.
*
* @param args
* args
* @throws IOException
* IOException
*/
public static void main(String[] args) throws IOException {
long iBegin = System.currentTimeMillis();
if (args.length == 0) {
System.err.println("usage: java marytts.fst.FSTLookup FstFile [-g] [word ...]");
System.exit(-1);
}
FSTLookup fstLookup = new FSTLookup(args[0]);
if (args.length == 1 || (args.length == 2 && args[1].equals("-g"))) {
boolean generate = false;
if (args.length == 2 && args[1].equals("-g"))
generate = true;
String line;
try {
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while ((line = in.readLine()) != null) {
showResults(line, fstLookup.lookup(line, generate));
}
} catch (Exception e) {
System.err.println("Invalid Input");
}
} else {
int i = 1;
boolean generate = false;
if (args[1].equals("-g")) {
generate = true;
i = 2;
}
for (; i < args.length; i++) {
showResults(args[i], fstLookup.lookup(args[i], generate));
}
}
long iEnd = System.currentTimeMillis();
System.err.println("processed in " + (iEnd - iBegin) + " ms.");
}
public static void showResults(String query, String[] args) {
System.out.println("---- " + args.length + " result(s) for " + query + ":");
int i;
for (i = 0; i < args.length; i++)
System.out.println(args[i]);
System.out.println();
}
}