VocalizationSynthesizer.java example

Explorer
marytts-master
/**
 * Copyright 2000-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.vocalizations;

import java.io.IOException;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;

import marytts.datatypes.MaryXML;
import marytts.exceptions.MaryConfigurationException;
import marytts.exceptions.SynthesisException;
import marytts.features.FeatureDefinition;
import marytts.modules.synthesis.Voice;
import marytts.server.MaryProperties;
import marytts.unitselection.data.Unit;
import marytts.util.MaryUtils;

import org.apache.log4j.Logger;
import org.w3c.dom.Element;

/**
 * The vocalization synthesis module.
 * 
 * @author Sathish Pammi
 */

public class VocalizationSynthesizer {

	protected VocalizationSynthesisTechnology vSynthesizer;
	protected VocalizationSelector vSelector;
	protected VocalizationUnitFileReader unitFileReader;
	protected boolean f0ContourImposeSupport;

	protected Logger logger = MaryUtils.getLogger("Vocalization Synthesizer");

	public VocalizationSynthesizer(Voice voice) throws MaryConfigurationException {

		if (!voice.hasVocalizationSupport()) {
			throw new MaryConfigurationException("This voice " + voice.toString() + " doesn't support synthesis of vocalizations");
		}

		String unitFileName = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.unitfile");

		try {
			this.unitFileReader = new VocalizationUnitFileReader(unitFileName);
		} catch (IOException e) {
			throw new MaryConfigurationException("can't read unit file");
		}

		String intonationFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.intonationfile");
		String technology = MaryProperties.getProperty("voice." + voice.getName() + ".vocalization.synthesisTechnology",
				"fdpsola");
		f0ContourImposeSupport = MaryProperties.getBoolean("voice." + voice.getName() + ".f0ContourImposeSupport", false);

		if ("fdpsola".equals(technology)) {
			String timelineFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.timeline");
			vSynthesizer = new FDPSOLASynthesisTechnology(timelineFile, unitFileName, intonationFile, f0ContourImposeSupport);
		} else if ("mlsa".equals(technology)) {
			boolean imposePolynomialContour = MaryProperties.getBoolean("voice." + voice.getName()
					+ ".vocalization.imposePolynomialContour", true);
			String mlsaFeatureFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.mlsafeaturefile");
			String mixedExcitationFilter = MaryProperties.getFilename("voice." + voice.getName()
					+ ".vocalization.mixedexcitationfilter");
			vSynthesizer = new MLSASynthesisTechnology(mlsaFeatureFile, intonationFile, mixedExcitationFilter,
					imposePolynomialContour);
		} else if ("hnm".equals(technology)) {
			String timelineFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.timeline");
			String hnmFeatureFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.hnmfeaturefile");
			vSynthesizer = new HNMSynthesisTechnology(timelineFile, unitFileName, hnmFeatureFile, intonationFile,
					f0ContourImposeSupport);
		} else {
			throw new MaryConfigurationException("the property 'voice." + voice.getName()
					+ ".vocalization.synthesisTechnology' should be one among 'hnm', 'mlsa' and 'fdpsola'");
		}

		this.vSelector = new VocalizationSelector(voice);
	}

	/**
	 * Handle a request for synthesis of vocalization
	 * 
	 * @param voice
	 *            the selected voice
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target xml element ('vocalization' element)
	 * @return AudioInputStream of requested vocalization it returns null if the voice doesn't support synthesis of vocalizations
	 * @throws Exception
	 *             if domElement contains 'variant' attribute value is greater than available number of vocalizations
	 */
	public AudioInputStream synthesize(Voice voice, AudioFileFormat aft, Element domElement) throws Exception {

		if (!voice.hasVocalizationSupport())
			return null;

		if (domElement.hasAttribute("variant")) {
			return synthesizeVariant(aft, domElement);
		}

		if (f0ContourImposeSupport) {
			return synthesizeImposedIntonation(aft, domElement);
		}

		return synthesizeVocalization(aft, domElement);
	}

	/**
	 * Synthesize a "variant" vocalization
	 * 
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target 'vocalization' xml element
	 * @return AudioInputStream of requested vocalization
	 * @throws SynthesisException
	 *             if it can't synthesize vocalization
	 * @throws IllegalArgumentException
	 *             if domElement contains 'variant' attribute value is greater than available number of vocalizations
	 */
	private AudioInputStream synthesizeVariant(AudioFileFormat aft, Element domElement) throws SynthesisException {

		int numberOfBackChannels = unitFileReader.getNumberOfUnits();
		int backchannelNumber = 0;

		if (domElement.hasAttribute("variant")) {
			backchannelNumber = Integer.parseInt(domElement.getAttribute("variant"));
		}

		if (backchannelNumber >= numberOfBackChannels) {
			throw new IllegalArgumentException("This voice has " + numberOfBackChannels
					+ " backchannels only. so it doesn't support unit number " + backchannelNumber);
		}

		return synthesizeSelectedVocalization(backchannelNumber, aft, domElement);
	}

	/**
	 * Synthesize a vocalization which fits better for given target
	 * 
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target 'vocalization' xml element
	 * @return AudioInputStream output audio
	 * @throws SynthesisException
	 *             if it can't synthesize vocalization
	 */
	private AudioInputStream synthesizeVocalization(AudioFileFormat aft, Element domElement) throws SynthesisException {

		int numberOfBackChannels = unitFileReader.getNumberOfUnits();
		int backchannelNumber = vSelector.getBestMatchingCandidate(domElement);
		// here it is a bug, if getBestMatchingCandidate select a backchannelNumber greater than numberOfBackChannels
		assert backchannelNumber < numberOfBackChannels : "This voice has " + numberOfBackChannels
				+ " backchannels only. so it doesn't support unit number " + backchannelNumber;

		return synthesizeSelectedVocalization(backchannelNumber, aft, domElement);
	}

	/**
	 * Synthesize a vocalization which fits better for given target, in addition, impose intonation from closest best vocalization
	 * according to given feature definition for intonation selection
	 * 
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target 'vocalization' xml element
	 * @return AudioInputStream output audio
	 * @throws SynthesisException
	 *             if it can't synthesize vocalization
	 */
	private AudioInputStream synthesizeImposedIntonation(AudioFileFormat aft, Element domElement) throws SynthesisException {

		SourceTargetPair imposeF0Data = vSelector.getBestCandidatePairtoImposeF0(domElement);
		int targetIndex = imposeF0Data.getTargetUnitIndex();
		int sourceIndex = imposeF0Data.getSourceUnitIndex();

		logger.debug("Synthesizing candidate " + sourceIndex + " with intonation contour " + targetIndex);

		if (targetIndex == sourceIndex) {
			return synthesizeSelectedVocalization(sourceIndex, aft, domElement);
		}

		return imposeF0ContourOnVocalization(sourceIndex, targetIndex, aft, domElement);
	}

	/**
	 * Impose a target f0 contour onto a (source) unit
	 * 
	 * @param sourceIndex
	 *            unit index of segmentalform unit
	 * @param targetIndex
	 *            unit index of target f0 contour
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target 'vocalization' xml element
	 * @return AudioInputStream of requested vocalization
	 * @throws SynthesisException
	 *             if no data can be read at the given target time or if audio processing fails
	 */
	private AudioInputStream imposeF0ContourOnVocalization(int sourceIndex, int targetIndex, AudioFileFormat aft,
			Element domElement) throws SynthesisException {

		int numberOfBackChannels = unitFileReader.getNumberOfUnits();

		if (targetIndex >= numberOfBackChannels) {
			throw new IllegalArgumentException("This voice has " + numberOfBackChannels
					+ " backchannels only. so it doesn't support unit number " + targetIndex);
		}

		if (sourceIndex >= numberOfBackChannels) {
			throw new IllegalArgumentException("This voice has " + numberOfBackChannels
					+ " backchannels only. so it doesn't support unit number " + sourceIndex);
		}

		VocalizationUnit bUnit = unitFileReader.getUnit(sourceIndex);
		Unit[] units = bUnit.getUnits();
		String[] unitNames = bUnit.getUnitNames();
		long endTime = 0l;
		for (int i = 0; i < units.length; i++) {
			int unitDuration = units[i].duration * 1000 / unitFileReader.getSampleRate();
			endTime += unitDuration;
			Element element = MaryXML.createElement(domElement.getOwnerDocument(), MaryXML.PHONE);
			element.setAttribute("d", Integer.toString(unitDuration));
			element.setAttribute("end", Long.toString(endTime));
			element.setAttribute("p", unitNames[i]);
			domElement.appendChild(element);
		}

		return this.vSynthesizer.synthesizeUsingImposedF0(sourceIndex, targetIndex, aft);
	}

	/**
	 * Synthesize a selected vocalization
	 * 
	 * @param backchannelNumber
	 *            unit index number
	 * @param aft
	 *            AudioFileFormat of the output AudioInputStream
	 * @param domElement
	 *            target 'vocalization' xml element
	 * @return AudioInputStream output audio
	 * @throws SynthesisException
	 *             if it can't synthesize vocalization
	 * @throws IllegalArgumentException
	 *             if given backchannelNumber > no. of available vocalizations
	 */
	private AudioInputStream synthesizeSelectedVocalization(int backchannelNumber, AudioFileFormat aft, Element domElement)
			throws SynthesisException {

		int numberOfBackChannels = unitFileReader.getNumberOfUnits();
		if (backchannelNumber >= numberOfBackChannels) {
			throw new IllegalArgumentException("This voice has " + numberOfBackChannels
					+ " backchannels only. so it doesn't support unit number " + backchannelNumber);
		}

		VocalizationUnit bUnit = unitFileReader.getUnit(backchannelNumber);
		Unit[] units = bUnit.getUnits();
		String[] unitNames = bUnit.getUnitNames();
		long endTime = 0l;
		for (int i = 0; i < units.length; i++) {
			int unitDuration = units[i].duration * 1000 / unitFileReader.getSampleRate();
			endTime += unitDuration;
			Element element = MaryXML.createElement(domElement.getOwnerDocument(), MaryXML.PHONE);
			element.setAttribute("d", Integer.toString(unitDuration));
			element.setAttribute("end", Long.toString(endTime));
			element.setAttribute("p", unitNames[i]);
			domElement.appendChild(element);
		}

		return this.vSynthesizer.synthesize(backchannelNumber, aft);
	}

	/**
	 * List the possible vocalization names that are available for the given voice. These values can be used in the "name"
	 * attribute of the vocalization tag.
	 * 
	 * @return an array of Strings, each string containing one unique vocalization name.
	 */
	public String[] listAvailableVocalizations() {
		FeatureDefinition featureDefinition = vSelector.getFeatureDefinition();
		assert featureDefinition.hasFeature("name");
		int nameIndex = featureDefinition.getFeatureIndex("name");
		return featureDefinition.getPossibleValues(nameIndex);
	}
}