/**
* Copyright 2002 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.language.de;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryXML;
import marytts.language.de.postlex.PhonologicalRules;
import marytts.modules.PronunciationModel;
import marytts.util.dom.MaryDomUtils;
import marytts.util.dom.NameNodeFilter;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;
/**
* The postlexical phonological processes module.
*
* @author Marc Schröder
*/
public class Postlex extends PronunciationModel {
public Postlex() {
super(Locale.GERMAN);
}
public MaryData process(MaryData d) throws Exception {
Document doc = d.getDocument();
mtuPostlex(doc);
phonologicalRules(doc);
return super.process(d);
}
private void mtuPostlex(Document doc) throws DOMException {
TreeWalker tw = ((DocumentTraversal) doc).createTreeWalker(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(MaryXML.MTU),
false);
Element m = null;
while ((m = (Element) tw.nextNode()) != null) {
if (MaryDomUtils.hasAncestor(m, MaryXML.MTU)) // not highest-level
continue;
// Now m is a highest-level mtu element
// Search for the token whose accent is retained;
// all other accents will be deleted.
Element c = m;
while (c != null && !c.getTagName().equals(MaryXML.TOKEN)) {
String whatToAccent = c.getAttribute("accent");
if (whatToAccent != null && whatToAccent.equals("first"))
c = MaryDomUtils.getFirstChildElement(c);
else
c = MaryDomUtils.getLastChildElement(c);
}
Element retainAccentToken = c;
// Now all token below m except retainAccentToken get
// their accent deleted.
NodeList tokens = m.getElementsByTagName(MaryXML.TOKEN);
for (int i = 0; i < tokens.getLength(); i++) {
Element t = (Element) tokens.item(i);
if (t != retainAccentToken) // not the same *Object*!
t.removeAttribute("accent");
}
} // for all highest-level mtu elements
}
private void phonologicalRules(Document doc) {
NodeList allTokens = doc.getElementsByTagName(MaryXML.TOKEN);
for (int i = 0; i < allTokens.getLength(); i++) {
Element t = (Element) allTokens.item(i);
if (t.hasAttribute("ph")) { // otherwise there's no point
String transcript = t.getAttribute("ph");
// determine the pronunciation precision required:
// The precision depends on two things:
// a) the precision requested in the closest enclosing
// <phonology> element
int phonolPrecision = PhonologicalRules.NORMAL;
Element phonolAncestor = (Element) MaryDomUtils.getAncestor(t, MaryXML.PHONOLOGY);
if (phonolAncestor != null) {
String phonolPrecisionString = phonolAncestor.getAttribute("precision");
if (phonolPrecisionString != null) {
if (phonolPrecisionString.equals("precise")) {
phonolPrecision = PhonologicalRules.PRECISE;
} else if (phonolPrecisionString.equals("sloppy")) {
phonolPrecision = PhonologicalRules.SLOPPY;
}
}
}
// b) whether this token has an accent or not.
boolean hasAccent = t.hasAttribute("accent") && !t.getAttribute("accent").equals("none");
// Roughly, tokens that carry an accent are stepped
// one step up in precision.
int precision = phonolPrecision;
if (hasAccent) {
if (precision == PhonologicalRules.NORMAL)
precision = PhonologicalRules.PRECISE;
else if (precision == PhonologicalRules.SLOPPY)
precision = PhonologicalRules.NORMAL;
}
List rules = PhonologicalRules.getRules();
// for all rules
for (Iterator it = rules.iterator(); it.hasNext();) {
PhonologicalRules pr = (PhonologicalRules) it.next();
// if a key matches
if (pr.matches(transcript)) {
// apply the rule and remember the result
transcript = pr.apply(transcript, precision);
}
// apply more rules if more rules match
}
t.setAttribute("ph", transcript);
} // if token has transcript
} // for all tokens
}
}