/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
* Sinusoidal Modeling Synthesis Module Given tracks of sinusoids estimated during analysis and after possible modifications,
* output speech is synthesized.
*
* References: Quatieri, T. F. Discrete-Time Speech Signal Processing: Principles and Practice. Prentice-Hall Inc. 2001. (Chapter
* 9 – Sinusoidal Analysis/Synthesis)
*
* R.J. McAulay and T.F. Quatieri, "Speech Analysis/Synthesis Based on a Sinusoidal Representation," IEEE Transactions on
* Acoustics, Speech and Signal Processing, vol. ASSP-34, no. 4, August 1986.
*
* @author Oytun Türk
*/
public class PeakMatchedSinusoidalSynthesizer extends BaseSinusoidalSynthesizer {
public PeakMatchedSinusoidalSynthesizer(int samplingRate) {
super(samplingRate);
}
public double[] synthesize(SinusoidalTracks st) {
SinusoidalTracks[] sts = new SinusoidalTracks[1];
sts[0] = st;
return synthesize(sts, false);
}
public double[] synthesize(SinusoidalTracks[] sts) {
return synthesize(sts, false);
}
public double[] synthesize(SinusoidalTracks[] sts, boolean isSilentSynthesis) {
double[] y = null;
double[] tmpy = null;
for (int i = 0; i < sts.length; i++) {
if (y == null)
y = synthesize(sts[i], isSilentSynthesis);
else {
tmpy = synthesize(sts[i], isSilentSynthesis);
if (tmpy.length > y.length) {
double[] tmpy2 = new double[y.length];
System.arraycopy(y, 0, tmpy2, 0, y.length);
y = new double[tmpy.length];
Arrays.fill(y, 0.0);
System.arraycopy(tmpy2, 0, y, 0, tmpy2.length);
for (int j = 0; j < tmpy.length; j++)
y[i] += tmpy[i];
}
}
}
return y;
}
// st: Sinusoidal tracks
// absMaxDesired: Desired absolute maximum of the output
public double[] synthesize(SinusoidalTracks st, boolean isSilentSynthesis) {
int n; // discrete time index
int i, j;
int nStart, nEnd, pStart, pEnd;
float t; // continuous time
float t2; // continuous time squared
float t3; // continuous time cubed
float tFinal = st.getOriginalDuration();
int nFinal = (int) (Math.floor(tFinal * st.fs + 0.5));
double[] y = new double[nFinal + 1];
Arrays.fill(y, 0.0);
float currentAmp;
float currentTheta;
double alpha, beta;
int M;
float T; // Number of samples between consecutive frames (equals to pitch period in pitch synchronous analysis/synthesis)
float T2; // T squared
float T3; // T cubed
double oneOverTwoPi = 1.0 / MathUtils.TWOPI;
double term1, term2;
float currentTime; // For debugging purposes
for (i = 0; i < st.totalTracks; i++) {
for (j = 0; j < st.tracks[i].totalSins - 1; j++) {
if (st.tracks[i].states[j] != SinusoidalTrack.TURNED_OFF) {
pStart = (int) Math.floor(st.tracks[i].times[j] * st.fs + 0.5);
pEnd = (int) Math.floor(st.tracks[i].times[j + 1] * st.fs + 0.5);
nStart = Math.max(0, pStart);
nEnd = Math.max(0, pEnd);
nStart = Math.min(y.length - 1, nStart);
nEnd = Math.min(y.length - 1, nEnd);
// currentTime = 0.5f*(nEnd+nStart)/st.fs;
// System.out.println("currentTime=" + String.valueOf(currentTime));
for (n = nStart; n < nEnd; n++) {
if (false) // Direct synthesis
{
currentAmp = st.tracks[i].amps[j];
currentTheta = (n - nStart) * st.tracks[i].freqs[j] + st.tracks[i].phases[j];
y[n] += currentAmp * Math.cos(currentTheta);
} else // Synthesis with interpolation
{
// Amplitude interpolation
currentAmp = st.tracks[i].amps[j] + (st.tracks[i].amps[j + 1] - st.tracks[i].amps[j])
* ((float) n - pStart) / (pEnd - pStart + 1);
T = (pEnd - pStart);
if (n == nStart && st.tracks[i].states[j] == SinusoidalTrack.TURNED_ON) // Turning on a track
{
// Quatieri
currentTheta = st.tracks[i].phases[j + 1] - T * st.tracks[i].freqs[j + 1];
currentAmp = 0.0f;
} else if (n == nStart && st.tracks[i].states[j] == SinusoidalTrack.TURNED_OFF && j > 0) // Turning
// off a
// track
{
// Quatieri
currentTheta = st.tracks[i].phases[j - 1] + T * st.tracks[i].freqs[j - 1];
currentAmp = 0.0f;
} else // Cubic phase interpolation
{
// Quatieri
M = (int) (Math
.floor(oneOverTwoPi
* ((st.tracks[i].phases[j] + T * st.tracks[i].freqs[j] - st.tracks[i].phases[j + 1]) + (st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j])
* 0.5 * T) + 0.5));
term1 = st.tracks[i].phases[j + 1] - st.tracks[i].phases[j] - T * st.tracks[i].freqs[j] + M
* MathUtils.TWOPI;
term2 = st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j];
T2 = T * T;
T3 = T * T2;
alpha = 3.0 * term1 / T2 - term2 / T;
beta = -2 * term1 / T3 + term2 / T2;
t = ((float) n - nStart);
t2 = t * t;
t3 = t * t2;
// Quatieri
currentTheta = (float) (st.tracks[i].phases[j] + st.tracks[i].freqs[j] * t + alpha * t2 + beta
* t3);
}
// Synthesis
y[n] += currentAmp * Math.cos(currentTheta);
}
// System.out.println(String.valueOf(currentTheta));
}
}
}
if (!isSilentSynthesis)
System.out.println("Synthesized track " + String.valueOf(i + 1) + " of " + String.valueOf(st.totalTracks));
}
y = MathUtils.multiply(y, st.absMaxOriginal / MathUtils.getAbsMax(y));
return y;
}
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
// File input
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
double[] x = signal.getAllData();
double maxOrig = MathUtils.getAbsMax(x);
SinusoidalAnalyzer sa = null;
SinusoidalTracks st = null;
PitchSynchronousSinusoidalAnalyzer pa = null;
//
// Analysis
float deltaInHz = SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ;
float numPeriods = PitchSynchronousSinusoidalAnalyzer.DEFAULT_ANALYSIS_PERIODS;
boolean isSilentSynthesis = false;
int windowType = Window.HANNING;
boolean bRefinePeakEstimatesParabola = false;
boolean bRefinePeakEstimatesBias = false;
boolean bSpectralReassignment = false;
boolean bAdjustNeighFreqDependent = false;
// int spectralEnvelopeType = SinusoidalAnalysisParams.LP_SPEC;
int spectralEnvelopeType = SinusoidalAnalysisParams.SEEVOC_SPEC;
float[] initialPeakLocationsInHz = null;
initialPeakLocationsInHz = new float[1];
for (int i = 0; i < 1; i++)
initialPeakLocationsInHz[i] = (i + 1) * 350.0f;
boolean isFixedRateAnalysis = false;
boolean isRealSpeech = true;
double startFreqInHz = 0.0;
double endFreqInHz = 0.5 * samplingRate;
SinusoidalAnalysisParams params = new SinusoidalAnalysisParams(samplingRate, startFreqInHz, endFreqInHz, windowType,
bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent);
if (isFixedRateAnalysis) {
// Fixed window size and skip rate analysis
double[] f0s = null;
float ws_f0 = -1.0f;
float ss_f0 = -1.0f;
sa = new SinusoidalAnalyzer(params);
if (spectralEnvelopeType == SinusoidalAnalysisParams.SEEVOC_SPEC) // Pitch info needed
{
String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
f0s = f0.contour;
ws_f0 = (float) f0.header.windowSizeInSeconds;
ss_f0 = (float) f0.header.skipSizeInSeconds;
}
st = sa.analyzeFixedRate(x, 0.020f, 0.010f, deltaInHz, spectralEnvelopeType, f0s, ws_f0, ss_f0);
//
} else {
// Pitch synchronous analysis
String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, samplingRate, x.length,
f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds, true, pitchMarkOffset);
pa = new PitchSynchronousSinusoidalAnalyzer(params);
st = pa.analyzePitchSynchronous(x, pm, numPeriods, -1.0f, deltaInHz, spectralEnvelopeType, initialPeakLocationsInHz);
isSilentSynthesis = false;
}
//
// Resynthesis
PeakMatchedSinusoidalSynthesizer ss = new PeakMatchedSinusoidalSynthesizer(samplingRate);
x = ss.synthesize(st, isSilentSynthesis);
//
// File output
DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(x), inputAudio.getFormat());
String outFileName = args[0].substring(0, args[0].length() - 4) + "_sinResynthFullbandPitchSynch.wav";
AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
//
}
}