/** * Copyright 2000-2006 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.language.en; import java.util.Locale; import marytts.datatypes.MaryData; import marytts.datatypes.MaryDataType; import marytts.datatypes.MaryXML; import marytts.util.MaryUtils; import marytts.util.dom.MaryDomUtils; import marytts.util.dom.NameNodeFilter; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.traversal.DocumentTraversal; import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.NodeIterator; /** * * @author Marc Schröder */ public class JTokeniser extends marytts.modules.JTokeniser { /** * */ public JTokeniser() { super(MaryDataType.RAWMARYXML, MaryDataType.TOKENS, Locale.ENGLISH); } public MaryData process(MaryData d) throws Exception { MaryData result = super.process(d); normaliseToAscii(result); return result; } protected void normaliseToAscii(MaryData d) { Document doc = d.getDocument(); NodeIterator ni = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter( MaryXML.TOKEN), false); Element t = null; while ((t = (Element) ni.nextNode()) != null) { String s = MaryDomUtils.tokenText(t); String normalised = MaryUtils.normaliseUnicodeLetters(s, Locale.ENGLISH); if (!s.equals(normalised)) { MaryDomUtils.setTokenText(t, normalised); } } } /** * In current FreeTTS code, prosody elements get lost. So remember at least the force-accent element on individual tokens: * * @param d * d * @deprecated FreeTTS is no longer used, so this method no longer serves a purpose. */ @Deprecated protected void propagateForceAccent(MaryData d) { Document doc = d.getDocument(); NodeIterator prosodyNI = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter( MaryXML.PROSODY), false); Element prosody = null; while ((prosody = (Element) prosodyNI.nextNode()) != null) { if (prosody.hasAttribute("force-accent")) { String forceAccent = prosody.getAttribute("force-accent"); String accent = null; if (forceAccent.equals("none")) { accent = "none"; } else { accent = "unknown"; } NodeIterator tNI = ((DocumentTraversal) doc).createNodeIterator(prosody, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(MaryXML.TOKEN), false); Element t = null; while ((t = (Element) tNI.nextNode()) != null) { if (!t.hasAttribute("accent")) { t.setAttribute("accent", accent); } } // while t } } // while prosody } }