/**
* Copyright 2010 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.util.data.text;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.LinkedHashMap;
import org.apache.oro.text.GlobCompiler;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.Perl5Matcher;
/**
* Reader for basename classification definition file, which allows mapping of basenames to classification strings in a lazy way,
* using glob expressions.
* <p>
* Definition file format:
* <ol>
* <li>lines with leading <tt>#</tt> are comments and are ignored
* <li>empty lines are ignored
* <li>every other line must conform to the format
*
* <pre>
* GLOB = class
* </pre>
*
* where <tt>GLOB</tt> is a glob expression and <tt>class</tt> is a classification string. Within the glob, the following
* characters have special meaning:
* <dl>
* <dt>*</dt>
* <dd>zero or more characters</dd>
* <dt>?</dt>
* <dd>any one character</dd>
* </dl>
* </ol>
* The idea is that a line like
*
* <pre>
* foo_* = bar
* </pre>
*
* in the classification definition file will cause all basenames starting with <tt>foo_</tt> to be handled as belonging to the
* "bar" class. One scenario where this is useful is the classification of prompts by speaking style in a multi-style voice
* database.
*
* @author steiner
*
*/
public class BasenameClassificationDefinitionFileReader {
protected BufferedReader reader;
public boolean fileOK = true;
protected LinkedHashMap<Pattern, String> styleDefinitions = new LinkedHashMap<Pattern, String>();
/**
* constructor to call main constructor with a filename String
*
* @param filename
* as a String
* @throws IOException
* IOException
*/
public BasenameClassificationDefinitionFileReader(String filename) throws IOException {
this(new FileReader(filename));
}
/**
* main constructor
*
* @param reader
* as a Reader
* @throws IOException
* IOException
*/
public BasenameClassificationDefinitionFileReader(Reader reader) throws IOException {
this.reader = new BufferedReader(reader);
parseDefinitionFile();
}
/**
* parse style definition file (see class documentation above for format), putting <glob expression, style string> pairs
* in styleDefinitions
*
* @throws IOException
* IOException
*/
private void parseDefinitionFile() throws IOException {
String line;
String globString;
String styleString;
GlobCompiler glob = new GlobCompiler();
Pattern globPattern;
// read lines...
while ((line = reader.readLine()) != null) {
// ...trimming whitespace:
line = line.trim();
// ignore lines that are empty or start with #:
if (line.equals("") || line.startsWith("#")) {
continue;
} else {
// split lines into fields
String[] fields = line.split("=");
try {
globString = fields[0].trim();
styleString = fields[1].trim();
} catch (IndexOutOfBoundsException iob) {
System.err.println("Warning: could not parse line: " + line);
fileOK = false;
continue;
}
// create GlobCompiler for glob expression:
try {
globPattern = glob.compile(globString);
} catch (MalformedPatternException mpe) {
System.err.println("Warning: could not parse line: ");
fileOK = false;
continue;
}
// put (glob expression, style string) pair in styleDefinions:
styleDefinitions.put(globPattern, styleString);
}
}
if (styleDefinitions.isEmpty()) {
System.err.println("Warning: no style definitions were found!");
}
}
/**
* match basename against the glob expressions in styleDefinitions
*
* @param basename
* basename
* @return style String of first matching glob expression, or empty String if no glob matches
*/
public String getValue(String basename) {
Perl5Matcher globMatcher = new Perl5Matcher();
String style = "";
for (Pattern globPattern : styleDefinitions.keySet()) {
if (globMatcher.matches(basename, globPattern)) {
style = styleDefinitions.get(globPattern);
break; // enable this line to change behavior to return style of *first* matching glob expr
// return style; // enable this line to change behavior to return style of *last* matching glob expr
}
}
// no globPattern in styleDefinitions matched... return empty string:
return style;
}
}