/** * Copyright 2002 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.language.it; import java.util.Iterator; import java.util.List; import java.util.Locale; import marytts.datatypes.MaryData; import marytts.datatypes.MaryDataType; import marytts.datatypes.MaryXML; import marytts.modules.InternalModule; import marytts.modules.PronunciationModel; import marytts.modules.phonemiser.AllophoneSet; import marytts.util.dom.MaryDomUtils; import marytts.util.dom.NameNodeFilter; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.w3c.dom.traversal.DocumentTraversal; import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.TreeWalker; /** * The postlexical phonological processes module. Used as first option to solve proclitics tokens and sillabification merging * * @author Fabio Tesser */ public class Postlex extends PronunciationModel { public Postlex() { super(Locale.ITALIAN); } public MaryData process(MaryData d) throws Exception { // System.err.println("Italian Postlex START"); Document doc = d.getDocument(); mtuMergeTokenPostlex(doc); return super.process(d); } /* * Return Quote space if quote present */ String returnQuoteIfStress(String lPhones) { if (lPhones.indexOf("'") != -1) { // System.out.println("there is ' in temp string"); return "' "; } else { // System.out.println("there is no ' in temp string"); return ""; } } /* * This method is used when proclitics are found in mtu proclitics is c'X (if there is in it_clitics.xml) */ private void mtuMergeTokenPostlex(Document doc) throws DOMException { TreeWalker tw = ((DocumentTraversal) doc).createTreeWalker(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(MaryXML.MTU), false); Element m = null; while ((m = (Element) tw.nextNode()) != null) { if (MaryDomUtils.hasAncestor(m, MaryXML.MTU)) // not highest-level continue; // Now m is a highest-level mtu element // Search for the token whose accent is retained; // all other accents will be deleted. Element c = m; while (c != null && !c.getTagName().equals(MaryXML.TOKEN)) { String whatToAccent = c.getAttribute("accent"); // check for last-proclitics (c' t' d' X) if (whatToAccent != null && whatToAccent.equals("last-proclitics")) { // System.err.println("token to join!!! in " + c.getNodeName()); Element c1 = MaryDomUtils.getFirstChildElement(c); // get the anchor as reference in order to delete the children after Element c_anchor = c; // set c as last (second) child element in this case // WARNING we treat the case with 2 token only!!! c = MaryDomUtils.getLastChildElement(c); // merge ph and POS? c.setAttribute("merged-token", "yes"); c.setAttribute("g2p_method", c1.getAttribute("g2p_method") + "+" + c.getAttribute("g2p_method")); // TODO: accent= to merge? take the first or the second? // c.setAttribute("accent", c1.getAttribute("accent")); // + "+" + c.getAttribute("accent")); // c.removeAttribute("accent"); c.setTextContent(c1.getTextContent() + "+" + c.getTextContent()); // Merge the ph and write the quote if necessary c.setAttribute("ph", returnQuoteIfStress(c.getAttribute("ph")) + c1.getAttribute("ph") + " " + c.getAttribute("ph")); // TODO: POS are not merged if you want to merge the POS: // c.setAttribute("pos", c1.getAttribute("pos") + "+" +c.getAttribute("pos")); // c.setAttribute("pos",c1.getAttribute("pos"); // remove child token c_anchor.removeChild(c1); } else c = MaryDomUtils.getLastChildElement(c); } /* * Element retainAccentToken = c; * * // Now all token below m except retainAccentToken get // their accent deleted. System.err.println("the olio" + * m.getNodeName()); NodeList tokens = m.getElementsByTagName(MaryXML.TOKEN); System.err.println("OK number" + * tokens.getLength()); for (int i=0; i<tokens.getLength(); i++) { Element t = (Element) tokens.item(i); * System.err.println("OK" ); if (t == retainAccentToken) // not the same *Object*! { * * System.err.println("VAI!!!!:" + t.getNodeName()); t.setNodeValue("aaaa"); //System.err.println("VAI!!!!:" + t.get); * //Element syl = MaryDomUtils.getFirstChildElement(t); // System.err.println("VAI!!!!: " + syl.getLocalName()); } } */ } // for all highest-level mtu elements } }