/** * Copyright 2000-2006 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.modules; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Locale; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.UnsupportedAudioFileException; import marytts.datatypes.MaryData; import marytts.datatypes.MaryDataType; import marytts.datatypes.MaryXML; import marytts.exceptions.SynthesisException; import marytts.modules.synthesis.Voice; import marytts.modules.synthesis.WaveformSynthesizer; import marytts.server.MaryProperties; import marytts.signalproc.effects.EffectsApplier; import marytts.util.data.audio.AppendableSequenceAudioInputStream; import marytts.util.dom.MaryDomUtils; import marytts.util.dom.NameNodeFilter; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.traversal.DocumentTraversal; import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.NodeIterator; /** * The synthesis module. * * @author Marc Schröder */ public class Synthesis extends InternalModule { private List<WaveformSynthesizer> waveformSynthesizers; private EffectsApplier effects; public Synthesis() { super("Synthesis", MaryDataType.ACOUSTPARAMS, MaryDataType.AUDIO, null); } public void startup() throws Exception { startupSynthesizers(); super.startup(); } private void startupSynthesizers() throws ClassNotFoundException, InstantiationException, Exception { waveformSynthesizers = new ArrayList<WaveformSynthesizer>(); for (String synthClassName : MaryProperties.synthesizerClasses()) { WaveformSynthesizer ws = (WaveformSynthesizer) Class.forName(synthClassName).newInstance(); ws.startup(); waveformSynthesizers.add(ws); } } /** * Perform a power-on self test by processing some example input data. * * @throws Error * if the module does not work properly. */ public synchronized void powerOnSelfTest() throws Error { for (Iterator<WaveformSynthesizer> it = waveformSynthesizers.iterator(); it.hasNext();) { WaveformSynthesizer ws = it.next(); ws.powerOnSelfTest(); } } public MaryData process(MaryData d) throws Exception { // We produce audio data, so we expect some helpers in our input: assert d.getAudioFileFormat() != null : "Audio file format is not set!"; Document doc = d.getDocument(); // As the input may contain multipe voice sections, // the challenge in this method is to join the audio data // resulting from individual synthesis calls with the respective // voice into one audio stream of the specified type. // Overall strategy: // * In input, identify the sections to be spoken by different voices // * For each of these sections, // - synthesise the section in the voice's native audio format // - convert to the common audio format if necessary / possible // * Join the audio input streams by appending each part to the output MaryData audio. // * Return a MaryData structure containing a single audio input stream // from which the audio data in the desired format can be read. AudioFormat targetFormat = d.getAudioFileFormat().getFormat(); Voice defaultVoice = d.getDefaultVoice(); String defaultStyle = d.getDefaultStyle(); String defaultEffects = d.getDefaultEffects(); Locale locale = d.getLocale(); String outputParams = d.getOutputParams(); if (defaultVoice == null) { defaultVoice = Voice.getDefaultVoice(locale); if (defaultVoice == null) { throw new SynthesisException("No voice available for locale '" + locale + "'"); } logger.info("No default voice associated with data. Assuming global default " + defaultVoice.getName()); } MaryData result = new MaryData(outputType(), d.getLocale()); // Also remember XML document in "AUDIO" output data, to keep track of phone durations: result.setDocument(doc); result.setAudioFileFormat(d.getAudioFileFormat()); if (d.getAudio() != null) { // This (empty) AppendableSequenceAudioInputStream object allows a // thread reading the audio data on the other "end" to get to our data as we are producing it. assert d.getAudio() instanceof AppendableSequenceAudioInputStream; result.setAudio(d.getAudio()); } NodeIterator it = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter( new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY, MaryXML.NONVERBAL }), false); List<Element> elements = new ArrayList<Element>(); Element element = null; Voice currentVoice = defaultVoice; String currentStyle = defaultStyle; String currentEffect = defaultEffects; Element currentVoiceElement = null; Element currentSentence = null; while ((element = (Element) it.nextNode()) != null) { Element v = (Element) MaryDomUtils.getAncestor(element, MaryXML.VOICE); Element s = (Element) MaryDomUtils.getAncestor(element, MaryXML.SENTENCE); // Check for non-verbal elements if (element.getNodeName().equals(MaryXML.NONVERBAL)) { if (v != null) { Voice newvoice = Voice.getVoice(v); if (newvoice != null && newvoice.hasVocalizationSupport()) { AudioInputStream ais = newvoice.getVocalizationSynthesizer().synthesize(newvoice, d.getAudioFileFormat(), element); result.appendAudio(ais); } } continue; } // Chunk at boundaries between voice sections if (v == null) { if (currentVoiceElement != null) { // We have just left a voice section if (!elements.isEmpty()) { AudioInputStream ais = synthesizeOneSection(elements, currentVoice, currentStyle, currentEffect, targetFormat, outputParams); if (ais != null) { result.appendAudio(ais); } elements.clear(); } currentVoice = defaultVoice; currentStyle = defaultStyle; currentEffect = defaultEffects; currentVoiceElement = null; } } else if (v != currentVoiceElement || (v.getAttribute("style") != null && v.getAttribute("style") != "" && !v.getAttribute("style").equals( currentStyle)) || (v.getAttribute("effect") != null && v.getAttribute("effect") != "" && !v.getAttribute("effect").equals( currentEffect))) { // We have just entered a new voice section if (!elements.isEmpty()) { AudioInputStream ais = synthesizeOneSection(elements, currentVoice, currentStyle, currentEffect, targetFormat, outputParams); if (ais != null) { result.appendAudio(ais); } elements.clear(); } // Override with new voice, style, and/or effect Voice newVoice = Voice.getVoice(v); if (newVoice != null) { currentVoice = newVoice; } if (v.getAttribute("style") != null && v.getAttribute("style") != "") currentStyle = v.getAttribute("style"); if (v.getAttribute("effect") != null && v.getAttribute("effect") != "") currentEffect = v.getAttribute("effect"); currentVoiceElement = v; } // Chunk at sentence boundaries if (s != currentSentence) { if (!elements.isEmpty()) { AudioInputStream ais = synthesizeOneSection(elements, currentVoice, currentStyle, currentEffect, targetFormat, outputParams); if (ais != null) { result.appendAudio(ais); } elements.clear(); } currentSentence = s; } elements.add(element); } if (!elements.isEmpty()) { AudioInputStream ais = synthesizeOneSection(elements, currentVoice, currentStyle, currentEffect, targetFormat, outputParams); if (ais != null) { result.appendAudio(ais); } } return result; } /** * Synthesize one section, consisting of tokens and boundaries, with a given voice, to the given target audio format. */ private AudioInputStream synthesizeOneSection(List<Element> tokensAndBoundaries, Voice voice, String currentStyle, String currentEffect, AudioFormat targetFormat, String outputParams) throws SynthesisException, UnsupportedAudioFileException { // sanity check: are there any tokens containing phone descendants? if (!containsPhoneDescendants(tokensAndBoundaries)) { logger.warn("No PHONE segments found in this section; will not attempt to synthesize it!"); return null; } EffectsApplier ef = new EffectsApplier(); // HMM-only effects need to get their parameters prior to synthesis ef.setHMMEffectParameters(voice, currentEffect); // AudioInputStream ais = null; ais = voice.synthesize(tokensAndBoundaries, outputParams); if (ais == null) return null; // Conversion to targetFormat required? if (!ais.getFormat().matches(targetFormat)) { // Attempt conversion; if not supported, log a warning // and provide the non-converted stream. logger.info("Audio format conversion required for voice " + voice.getName()); try { AudioInputStream intermedStream = AudioSystem.getAudioInputStream(targetFormat, ais); ais = intermedStream; } catch (IllegalArgumentException iae) { // conversion not supported boolean solved = false; // try again with intermediate sample rate conversion if (!targetFormat.getEncoding().equals(ais.getFormat()) && targetFormat.getSampleRate() != ais.getFormat().getSampleRate()) { AudioFormat sampleRateConvFormat = new AudioFormat(ais.getFormat().getEncoding(), targetFormat.getSampleRate(), ais.getFormat().getSampleSizeInBits(), ais.getFormat().getChannels(), ais.getFormat().getFrameSize(), ais.getFormat().getFrameRate(), ais.getFormat().isBigEndian()); try { AudioInputStream intermedStream = AudioSystem.getAudioInputStream(sampleRateConvFormat, ais); ais = AudioSystem.getAudioInputStream(targetFormat, intermedStream); // No exception thrown, i.e. success solved = true; } catch (IllegalArgumentException iae1) { } } if (!solved) throw new UnsupportedAudioFileException("Conversion from audio format " + ais.getFormat() + " to requested audio format " + targetFormat + " not supported.\n" + iae.getMessage()); } } // Apply effect if present if (currentEffect != null && !currentEffect.equals("")) { ais = ef.apply(ais, currentEffect); } return ais; } /** * Check if the List of Elements contains any TOKENS that have PHONE descendants * * @param tokensAndBoundaries * the List of Elements to check for PHONE elements * @return true once a PHONE has been found within a TOKEN, false if this never happens */ private boolean containsPhoneDescendants(List<Element> tokensAndBoundaries) { for (Element element : tokensAndBoundaries) { if (element.getTagName().equals(MaryXML.TOKEN) && element.getElementsByTagName(MaryXML.PHONE).getLength() > 0) { return true; } } return false; } }