/** * Copyright 2000-2006 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.vocalizations; import java.io.IOException; import java.util.Arrays; import java.util.LinkedList; import javax.sound.sampled.AudioFileFormat; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import marytts.exceptions.MaryConfigurationException; import marytts.exceptions.SynthesisException; import marytts.signalproc.process.FDPSOLAProcessor; import marytts.unitselection.data.TimelineReader; import marytts.util.data.BufferedDoubleDataSource; import marytts.util.data.Datagram; import marytts.util.data.DatagramDoubleDataSource; import marytts.util.data.DoubleDataSource; import marytts.util.data.audio.DDSAudioInputStream; import marytts.util.math.MathUtils; import marytts.util.math.Polynomial; /** * FDPSOLA Synthesis technology to synthesize vocalizations * * @author Sathish Pammi */ public class FDPSOLASynthesisTechnology extends VocalizationSynthesisTechnology { protected VocalizationIntonationReader vIntonationReader; protected TimelineReader audioTimeline; protected VocalizationUnitFileReader unitFileReader; protected boolean f0ContourImposeSupport; public FDPSOLASynthesisTechnology(String waveTimeLineFile, String unitFile, String intonationFeatureFile, boolean imposeF0Support) throws MaryConfigurationException { try { this.audioTimeline = new TimelineReader(waveTimeLineFile); this.unitFileReader = new VocalizationUnitFileReader(unitFile); this.f0ContourImposeSupport = imposeF0Support; if (f0ContourImposeSupport) { this.vIntonationReader = new VocalizationIntonationReader(intonationFeatureFile); } else { this.vIntonationReader = null; } } catch (IOException e) { throw new MaryConfigurationException("Can not read data from files " + e); } } public FDPSOLASynthesisTechnology(TimelineReader audioTimeline, VocalizationUnitFileReader unitFileReader, HNMFeatureFileReader vHNMFeaturesReader, VocalizationIntonationReader vIntonationReader, boolean imposeF0Support) { this.audioTimeline = audioTimeline; this.unitFileReader = unitFileReader; this.vIntonationReader = vIntonationReader; this.f0ContourImposeSupport = imposeF0Support; } /** * Synthesize given vocalization (i.e. unit-selection) * * @param backchannelNumber * unit index * @param aft * audio file format * @return AudioInputStream of synthesized vocalization * @throws SynthesisException * if failed to synthesize vocalization */ @Override public AudioInputStream synthesize(int backchannelNumber, AudioFileFormat aft) throws SynthesisException { int numberOfBackChannels = unitFileReader.getNumberOfUnits(); if (backchannelNumber >= numberOfBackChannels) { throw new IllegalArgumentException("This voice has " + numberOfBackChannels + " backchannels only. so it doesn't support unit number " + backchannelNumber); } VocalizationUnit bUnit = unitFileReader.getUnit(backchannelNumber); long start = bUnit.startTime; int duration = bUnit.duration; Datagram[] frames = null; try { frames = audioTimeline.getDatagrams(start, duration); } catch (IOException e) { throw new SynthesisException("Can not read data from timeline file " + e); } // Generate audio from frames LinkedList<Datagram> datagrams = new LinkedList<Datagram>(); datagrams.addAll(Arrays.asList(frames)); DoubleDataSource audioSource = new DatagramDoubleDataSource(datagrams); // audioSource.getAllData(); return (new DDSAudioInputStream(new BufferedDoubleDataSource(audioSource), aft.getFormat())); } /** * Re-synthesize given vocalization using FDPSOLA technology * * @param backchannelNumber * unit index * @param aft * audio file format * @return AudioInputStream of synthesized vocalization * @throws SynthesisException * if failed to synthesize vocalization */ public AudioInputStream reSynthesize(int backchannelNumber, AudioFileFormat aft) throws SynthesisException { double[] pScalesArray = { 1.0f }; double[] tScalesArray = { 1.0f }; return synthesizeUsingF0Modification(backchannelNumber, pScalesArray, tScalesArray, aft); } /** * Impose target intonation contour on given vocalization using HNM technology * * @param sourceIndex * unit index of vocalization * @param targetIndex * unit index of target intonation * @param aft * audio file format * @return AudioInputStream of synthesized vocalization * @throws SynthesisException * if failed to synthesize vocalization */ @Override public AudioInputStream synthesizeUsingImposedF0(int sourceIndex, int targetIndex, AudioFileFormat aft) throws SynthesisException { if (!f0ContourImposeSupport) { throw new SynthesisException("Mary configuration of this voice doesn't support intonation contour imposition"); } int numberOfUnits = unitFileReader.getNumberOfUnits(); if (sourceIndex >= numberOfUnits || targetIndex >= numberOfUnits) { throw new IllegalArgumentException("sourceIndex(" + sourceIndex + ") and targetIndex(" + targetIndex + ") are should be less than number of available units (" + numberOfUnits + ")"); } if (sourceIndex == targetIndex) { return reSynthesize(sourceIndex, aft); } double[] sourceF0 = this.vIntonationReader.getContour(sourceIndex); double[] targetF0coeffs = this.vIntonationReader.getIntonationCoeffs(targetIndex); double[] sourceF0coeffs = this.vIntonationReader.getIntonationCoeffs(sourceIndex); if (targetF0coeffs == null || sourceF0coeffs == null) { return reSynthesize(sourceIndex, aft); } if (targetF0coeffs.length == 0 || sourceF0coeffs.length == 0) { return reSynthesize(sourceIndex, aft); } double[] targetF0 = Polynomial.generatePolynomialValues(targetF0coeffs, sourceF0.length, 0, 1); sourceF0 = Polynomial.generatePolynomialValues(sourceF0coeffs, sourceF0.length, 0, 1); assert targetF0.length == sourceF0.length; double[] tScalesArray = new double[sourceF0.length]; double[] pScalesArray = new double[sourceF0.length]; for (int i = 0; i < targetF0.length; i++) { pScalesArray[i] = (float) (targetF0[i] / sourceF0[i]); tScalesArray[i] = (float) (1.0); } return synthesizeUsingF0Modification(sourceIndex, pScalesArray, tScalesArray, aft); } /** * modify intonation contour using HNM technology * * @param backchannelNumber * unit index of vocalization * @param pScalesArray * pitch scales array * @param tScalesArray * time scales array * @param aft * audio file format * @return AudioInputStream of synthesized vocalization * @throws SynthesisException * if failed to synthesize vocalization */ private AudioInputStream synthesizeUsingF0Modification(int backchannelNumber, double[] pScalesArray, double[] tScalesArray, AudioFileFormat aft) throws SynthesisException { if (backchannelNumber > unitFileReader.getNumberOfUnits()) { throw new IllegalArgumentException("requesting unit should not be more than number of units"); } if (!f0ContourImposeSupport) { throw new SynthesisException("Mary configuration of this voice doesn't support intonation contour imposition"); } VocalizationUnit bUnit = unitFileReader.getUnit(backchannelNumber); long start = bUnit.startTime; int duration = bUnit.duration; Datagram[] frames = null; try { frames = audioTimeline.getDatagrams(start, duration); } catch (IOException e) { throw new SynthesisException("cannot get audio frames from timeline file " + e); } assert frames != null : "Cannot generate audio from null frames"; pScalesArray = MathUtils.arrayResize(pScalesArray, frames.length); tScalesArray = MathUtils.arrayResize(tScalesArray, frames.length); assert tScalesArray.length == pScalesArray.length; assert frames.length == tScalesArray.length; AudioFormat af; if (aft == null) { // default audio format float sampleRate = 16000.0F; // 8000,11025,16000,22050,44100 int sampleSizeInBits = 16; // 8,16 int channels = 1; // 1,2 boolean signed = true; // true,false boolean bigEndian = false; // true,false af = new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian); } else { af = aft.getFormat(); } double[] audio_double = (new FDPSOLAProcessor()).processDatagram(frames, null, af, null, pScalesArray, tScalesArray, false); /* Normalise the signal before return, this will normalise between 1 and -1 */ double MaxSample = MathUtils.getAbsMax(audio_double); for (int i = 0; i < audio_double.length; i++) { audio_double[i] = 0.3 * (audio_double[i] / MaxSample); } return (new DDSAudioInputStream(new BufferedDoubleDataSource(audio_double), af)); } }