/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.sinusoidal.hntm.synthesis;
import java.util.Arrays;
import marytts.signalproc.sinusoidal.hntm.analysis.FrameNoisePartWaveform;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechFrame;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.window.HammingWindow;
import marytts.signalproc.window.Window;
import marytts.util.math.ArrayUtils;
import marytts.util.signal.SignalProcUtils;
/**
* Synthesizes noise part waveform from non-overlapping chunks of data. This model is the most natural one since it involves no
* noise models.
*
* @author oytun.turk
*
*/
public class NoisePartWaveformSynthesizer {
// TO DO: This should use overlap add since the noise waveform will not be a continuous waveform in TTS
public static double[] synthesize(HntmSpeechSignal hnmSignal, HntmSpeechFrame[] leftContexts,
HntmSpeechFrame[] rightContexts, HntmAnalyzerParams analysisParams) {
double[] noisePartWaveform = null;
if (hnmSignal != null && hnmSignal.frames != null) {
int outputLen = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);
noisePartWaveform = new double[outputLen];
double[] wgts = new double[outputLen];
Arrays.fill(noisePartWaveform, 0.0);
Arrays.fill(wgts, 0.0);
int i;
HntmSpeechFrame prevFrame, nextFrame, currentLeftContext, currentRightContext;
// TO DO: Overlap waveform noise case! See analysis code!
for (i = 0; i < hnmSignal.frames.length; i++) {
if (i > 0)
prevFrame = hnmSignal.frames[i - 1];
else
prevFrame = null;
if (i < hnmSignal.frames.length - 1)
nextFrame = hnmSignal.frames[i + 1];
else
nextFrame = null;
boolean isFirstSynthesisFrame = false;
if (i == 0)
isFirstSynthesisFrame = true;
boolean isLastSynthesisFrame = false;
if (i == hnmSignal.frames.length - 1)
isLastSynthesisFrame = true;
boolean existsLeftContexts = true;
if (leftContexts == null) // Take the previous frame parameters as left context (i.e. the HNM signal is a
// continuous one, not concatenated one
existsLeftContexts = false;
boolean existsRightContexts = true;
if (rightContexts == null) // Take the previous frame parameters as right context (i.e. the HNM signal is a
// continuous one, not concatenated one
existsRightContexts = false;
currentLeftContext = null;
if (leftContexts != null)
currentLeftContext = leftContexts[i];
currentRightContext = null;
if (rightContexts != null)
currentRightContext = rightContexts[i];
processFrame(prevFrame, hnmSignal.frames[i], nextFrame, hnmSignal.samplingRateInHz, isFirstSynthesisFrame,
isLastSynthesisFrame, noisePartWaveform, wgts, existsLeftContexts, currentLeftContext,
existsRightContexts, currentRightContext);
}
for (i = 0; i < outputLen; i++) {
if (wgts[i] > 1.0e-10)
noisePartWaveform[i] /= wgts[i];
}
}
return noisePartWaveform;
}
public static void processFrame(HntmSpeechFrame prevFrame, HntmSpeechFrame currentFrame, HntmSpeechFrame nextFrame,
int samplingRateInHz, boolean isFirstSynthesisFrame, boolean isLastSynthesisFrame, double[] noisePartWaveform,
double[] wgts, boolean existsLeftContexts, HntmSpeechFrame currentLeftContext, boolean existsRightContexts,
HntmSpeechFrame currentRightContext) {
double[] frameWaveform = null;
int waveformNoiseStartInd;
int j;
double[] leftContextWaveform = null;
double[] rightContextWaveform = null;
if (currentFrame.n != null && (currentFrame.n instanceof FrameNoisePartWaveform)) {
frameWaveform = ((FrameNoisePartWaveform) currentFrame.n).waveform2Doubles();
if (!existsLeftContexts) // Take the previous frame parameters as left context (i.e. the HNM signal is a continuous
// one, not concatenated one
{
if (!isFirstSynthesisFrame)
leftContextWaveform = ((FrameNoisePartWaveform) prevFrame.n).waveform2Doubles();
else {
leftContextWaveform = new double[frameWaveform.length];
Arrays.fill(leftContextWaveform, 0.0);
}
} else {
if (currentLeftContext != null && currentLeftContext.n != null) {
leftContextWaveform = ArrayUtils.copy(((FrameNoisePartWaveform) currentLeftContext.n).waveform2Doubles());
} else {
leftContextWaveform = new double[frameWaveform.length];
Arrays.fill(leftContextWaveform, 0.0);
}
}
waveformNoiseStartInd = SignalProcUtils.time2sample(currentFrame.tAnalysisInSeconds, samplingRateInHz);
waveformNoiseStartInd -= leftContextWaveform.length;
if (!existsRightContexts) // Take the next frame parameters as right context (i.e. the HNM signal is a continuous one,
// not concatenated one
{
if (!isLastSynthesisFrame)
rightContextWaveform = ((FrameNoisePartWaveform) nextFrame.n).waveform2Doubles();
else {
rightContextWaveform = new double[frameWaveform.length];
Arrays.fill(rightContextWaveform, 0.0);
}
} else {
if (currentRightContext != null && currentRightContext.n != null) {
rightContextWaveform = ArrayUtils.copy(((FrameNoisePartWaveform) currentRightContext.n).waveform2Doubles());
} else {
rightContextWaveform = new double[frameWaveform.length];
Arrays.fill(rightContextWaveform, 0.0);
}
}
frameWaveform = ArrayUtils.combine(leftContextWaveform, frameWaveform);
frameWaveform = ArrayUtils.combine(frameWaveform, rightContextWaveform);
if (frameWaveform != null) {
Window w = new HammingWindow(frameWaveform.length);
double[] wgt = w.getCoeffs();
for (j = waveformNoiseStartInd; j < Math.min(waveformNoiseStartInd + frameWaveform.length,
noisePartWaveform.length); j++) {
if (waveformNoiseStartInd + j >= 0) {
noisePartWaveform[j] += frameWaveform[j - waveformNoiseStartInd] * wgt[j - waveformNoiseStartInd];
wgts[j] += wgt[j - waveformNoiseStartInd];
}
}
}
}
}
}