/**
* Copyright 2002 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.language.de.postlex;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* The rules for the postlexical phonological processes module.
*
* @author Marc Schröder
*/
public class PhonologicalRules {
// Rules as regular expressions with substitution patterns:
// The first string is a regular expression pattern, the three others are
// substitution patterns for PRECISE, NORMAL and SLOPPY pronunciation
// respectively. They may contain bracket references $1, $2, ...
// as in the example below:
// {"([bdg])@(n)", "$1@$2", "$1@$2", "$1@$2"}
private static final String[][] _rules = {
// @-Elision mit -en, -el, -em
{ "([dlrszSt])@n", "$1@n", "$1@n", "$1@n" },
// warning: mbrola de1/2 don't have Z-n diphone
// @Elision mit -en, -el, -em; Assimilation
{ "f@n", "f@n", "f@n", "f@n" },
{ "g@n", "g@n", "g@n", "g@n" }, // warning: mbrola de1 doesn't have g-N diphone
{ "k@n", "k@n", "k@n", "k@n" },// warning: mbrola de1 doesn't have k-N diphone
{ "p@n", "p@n", "p@n", "p@n" },
{ "x@n", "x@n", "x@n", "x@n" },// warning: mbrola de1/2 don't have x-N diphone
// @-Elision mit -en, -el, -em; Assimilation und Geminatenreduktion
{ "b@n", "b@n", "b@n", "b@n" },// warning: mbrola de1 doesn't have b-m diphone
{ "m@n", "m@n", "m@n", "m@n" },
{ "n@n", "n@n", "n@n", "n@n" },
// bei Geminatenreduktion wird der uebrigbleibende Laut eigentlich gelaengt.
// Da es jedoch noch kein Symbol und keine Semantik fuer Laengung gibt,
// soll an dieser Stelle nur darauf hingewiesen werden.
// Assimilation der Artikulationsart
{ "g-n", "g-n", "g-n", "g-n" },
// Assimilation und Geminatenreduktion
{ "m-b", "m-b", "m-b", "m-b" },
{ "t-t", "t-t", "t-t", "t-t" },
// bei Geminatenreduktion wird der uebrigbleibende Laut eigentlich gelaengt.
// Da es jedoch noch kein Symbol und keine Semantik fuer Laengung gibt,
// soll an dieser Stelle nur darauf hingewiesen werden.
// glottal stop removal:
{ "\\?(aI|OY|aU|[iIyYe\\{E29uUoOaA])", "?$1", "?$1", "?$1" },
// Reduce E6 -> 6 in unstressed syllables only:
// {"^([^'-]*)E6", "$16", "$16", "$16"},
// {"-([^'-]*)E6", "-$16", "-$16", "-$16"},
// be more specific: reduce fE6 -> f6 in unstressed syllables only
{ "^([^'-]*)fE6", "$1f6", "$1f6", "$1f6" },
{ "-([^'-]*)fE6", "-$1f6", "-$1f6", "-$1f6" },
// Replace ?6 with ?E6 wordinitial
{ "\\?6", "\\?E6", "\\?E6", "\\?E6" },
// !! Translate the old MARY SAMPA to the new MARY SAMPA:
{ "O~:", "a~", "a~", "a~" }, { "o~:", "o~", "o~", "o~" }, { "9~:", "9~", "9~", "9~" }, { "E~:", "e~", "e~", "e~" },
{ "O~", "a~", "a~", "a~" }, { "o~", "o~", "o~", "o~" }, { "9~", "9~", "9~", "9~" }, { "E~", "e~", "e~", "e~" },
{ "\\{", "E", "E", "E" },
// {"r", "R", "R", "R"}
};
private static final List rules = initialiseRules();
private static List initialiseRules() {
List r = new ArrayList();
for (int i = 0; i < _rules.length; i++) {
r.add(new PhonologicalRules(_rules[i]));
}
return r;
}
public static List getRules() {
return rules;
}
public static final int PRECISE = 1;
public static final int NORMAL = 2;
public static final int SLOPPY = 3;
private Pattern key;
private String precise;
private String normal;
private String sloppy;
public PhonologicalRules(String[] data) {
try {
key = Pattern.compile(data[0]);
} catch (PatternSyntaxException e) {
System.err.println("Cannot compile regular expression `" + data[0] + "':");
e.printStackTrace();
}
precise = data[1];
normal = data[2];
sloppy = data[3];
}
public boolean matches(String input) {
return key.matcher(input).find();
}
public String apply(String input, int precision) {
String repl = normal;
if (precision == PRECISE)
repl = precise;
else if (precision == SLOPPY)
repl = sloppy;
return key.matcher(input).replaceAll(repl);
}
}