/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.sinusoidal.hntm.synthesis;
import java.util.Arrays;
import marytts.signalproc.sinusoidal.hntm.analysis.FrameNoisePartLpc;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.window.Window;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
*
* LPC based noise model for HNM using the overlap-add approach, gain normalization according to generated harmonic part gain, and
* optional triangular energy envelope weighting.
*
* Reference: Stylianou, Y., 1996, "Harmonic plus Noise Models for Speech, combined with Statistical Methods, for Speech and
* Speaker Modification", Ph.D. thesis, Ecole Nationale Supérieure des Télécommunications. (Chapter 3, A Harmonic plus Noise
* Model, HNM)
*
* @author oytun.turk
*
*/
public class NoisePartWindowedOverlapAddLpcSynthesizer {
public static double[] synthesize(HntmSpeechSignal hnmSignal, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParams) {
double[] noisePart = null;
int i;
boolean isPrevNoised, isNoised, isNextNoised;
boolean isVoiced, isNextVoiced;
int lpOrder = 0;
float t;
float tsi = 0.0f;
float tsiNext; // Time in seconds
int startIndex = 0;
int startIndexNext;
int outputLen = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);
for (i = 0; i < hnmSignal.frames.length; i++) {
isNoised = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true : false);
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)
&& ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs != null) {
lpOrder = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs.length;
break;
}
}
float noiseWindowDurationInSeconds;
if (lpOrder > 0) // At least one noisy frame with LP coefficients exist
{
noisePart = new double[outputLen]; // In fact, this should be prosody scaled length when you implement prosody
// modifications
Arrays.fill(noisePart, 0.0);
double[] winWgtSum = new double[outputLen]; // In fact, this should be prosody scaled length when you implement
// prosody modifications
Arrays.fill(winWgtSum, 0.0);
Window winNoise;
int windowType = Window.HAMMING;
double[] x;
double[] xWindowed;
double[] y;
double[] yWindowed;
double[] yFiltered;
double[] wgt;
double[] yInitial = new double[lpOrder];
Arrays.fill(yInitial, 0.0); // Start with zero initial conditions
int n;
int fftSizeNoise = SignalProcUtils.getDFTSize(hnmSignal.samplingRateInHz);
int wsNoise = 0;
boolean isDisplay = false;
// Noise source of full length
double[] noiseSourceHpf = null;
// noiseSource = SignalProcUtils.getNoise(HnmAnalyzer.FIXED_MAX_FREQ_OF_VOICING_FOR_QUICK_TEST,
// 0.5f*hnmSignal.samplingRateInHz, HnmAnalyzer.HPF_TRANSITION_BANDWIDTH_IN_HZ, hnmSignal.samplingRateInHz,
// (int)(1.1*outputLen)); //Pink noise full signal length, works OK
/*
* if (HnmAnalyzer.FIXED_MAX_FREQ_OF_VOICING_FOR_QUICK_TEST<0.5*hnmSignal.samplingRateInHz) noiseSourceHpf =
* SignalProcUtils.getNoise(HnmAnalyzer.FIXED_MAX_FREQ_OF_VOICING_FOR_QUICK_TEST,
* HnmAnalyzer.FIXED_MAX_FREQ_OF_NOISE_FOR_QUICK_TEST, HnmAnalyzer.HPF_TRANSITION_BANDWIDTH_IN_HZ,
* hnmSignal.samplingRateInHz, (int)(1.1*outputLen)); //Pink noise full signal length, works OK if
* (noiseSourceHpf!=null) MathUtils.adjustMeanVariance(noiseSourceHpf, 0.0, 1.0); double[] noiseSourceFull =
* SignalProcUtils.getWhiteNoise((int)(1.1*outputLen), 1.0); //White noise full signal length, works OK
* MathUtils.adjustMeanVariance(noiseSourceFull, 0.0, 1.0);
*/
/*
* //Write the noise source to a wav file for checking AudioInputStream inputAudio = null; try { inputAudio =
* AudioSystem.getAudioInputStream(new File("d:\\hn.wav")); } catch (UnsupportedAudioFileException e) { // TODO
* Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block
* e.printStackTrace(); } DDSAudioInputStream outputAudio = new DDSAudioInputStream(new
* BufferedDoubleDataSource(noiseSource), inputAudio.getFormat()); try { AudioSystem.write(outputAudio,
* AudioFileFormat.Type.WAVE, new File("d:\\noiseSource.wav")); } catch (IOException e) { // TODO Auto-generated catch
* block e.printStackTrace(); }
*/
//
int transitionOverlapLen = SignalProcUtils.time2sample(synthesisParams.noiseSynthesisTransitionOverlapInSeconds,
hnmSignal.samplingRateInHz);
for (i = 0; i < hnmSignal.frames.length; i++) {
if (hnmSignal.frames[i].h != null && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f)
isVoiced = true;
else
isVoiced = false;
if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].h != null
&& hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz > 0.0f)
isNextVoiced = true;
else
isNextVoiced = false;
if (hnmSignal.frames[i].n != null
&& hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz)
isNoised = true;
else
isNoised = false;
if (i < hnmSignal.frames.length - 1
&& hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz
&& hnmSignal.frames[i + 1].n != null)
isNextNoised = true;
else
isNextNoised = false;
if (i > 0 && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz
&& hnmSignal.frames[i - 1].n != null)
isPrevNoised = true;
else
isPrevNoised = false;
if (i < hnmSignal.frames.length - 1 && isNextNoised)
noiseWindowDurationInSeconds = Math.max(synthesisParams.noiseSynthesisWindowDurationInSeconds,
2 * (hnmSignal.frames[i + 1].tAnalysisInSeconds - hnmSignal.frames[i].tAnalysisInSeconds));
else
noiseWindowDurationInSeconds = synthesisParams.noiseSynthesisWindowDurationInSeconds;
wsNoise = SignalProcUtils.time2sample(noiseWindowDurationInSeconds, hnmSignal.samplingRateInHz);
if (!isNextNoised)
wsNoise += transitionOverlapLen;
if (!isPrevNoised)
wsNoise += transitionOverlapLen;
if (wsNoise % 2 == 0) // Always use an odd window size to have a zero-phase analysis window
wsNoise++;
if (i == 0)
tsi = 0.0f;
else
tsi = Math.max(0.0f, hnmSignal.frames[i].tAnalysisInSeconds - 0.5f * noiseWindowDurationInSeconds);
// if (tsi>1.8 && tsi<1.82)
// System.out.println("Time=" + String.valueOf(tsi) + " " + (isPrevNoised?"+":"-") + (isNoised?"+":"-") +
// (isNextNoised?"+":"-"));
startIndex = SignalProcUtils.time2sample(tsi, hnmSignal.samplingRateInHz);
if (i < hnmSignal.frames.length - 1) {
tsiNext = Math.max(0.0f, hnmSignal.frames[i].tAnalysisInSeconds + 0.5f * noiseWindowDurationInSeconds);
startIndexNext = SignalProcUtils.time2sample(tsiNext, hnmSignal.samplingRateInHz);
} else {
startIndexNext = outputLen - 1;
tsiNext = SignalProcUtils.sample2time(startIndexNext, hnmSignal.samplingRateInHz);
}
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)) {
// Compute window
winNoise = Window.get(windowType, wsNoise);
winNoise.normalizePeakValue(1.0f);
wgt = winNoise.getCoeffs();
//
x = SignalProcUtils.getWhiteNoiseOfVariance(wsNoise, 1.0); // Variance specified white noise
// x = SignalProcUtils.getWhiteNoise(wsNoise, 0.5); //Absolute value limited white noise
// double[] tmpNoise = SignalProcUtils.getNoise(hnmSignal.frames[i].maximumFrequencyOfVoicingInHz,
// 0.5f*hnmSignal.samplingRateInHz, 50.0, hnmSignal.samplingRateInHz, 5*wsNoise); //Pink noise
// x = new double[wsNoise];
// System.arraycopy(tmpNoise, 2*wsNoise, x, 0, wsNoise);
// x = SignalProcUtils.getNoise(hnmSignal.frames[i].maximumFrequencyOfVoicingInHz,
// 0.5*hnmSignal.samplingRateInHz, 100.0, hnmSignal.samplingRateInHz, wsNoise); //Pink noise
// x = SignalProcUtils.getWhiteNoise(wsNoise, 1.0f);
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)
&& ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs != null) {
winNoise.apply(x, 0);
// y = SignalProcUtils.arFilter(x, ((FrameNoisePartLpc)hnmSignal.frames[i].n).lpCoeffs,
// ((FrameNoisePartLpc)hnmSignal.frames[i].n).gain);
y = SignalProcUtils.arFilter(x, ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs, 1.0);
if (synthesisParams.hpfAfterNoiseSynthesis)
y = SignalProcUtils.fdFilter(y, hnmSignal.frames[i].maximumFrequencyOfVoicingInHz,
0.5f * hnmSignal.samplingRateInHz, hnmSignal.samplingRateInHz, fftSizeNoise);
MathUtils.adjustStandardDeviation(y, ((FrameNoisePartLpc) hnmSignal.frames[i].n).origNoiseStd);
// Overlap-add
for (n = startIndex; n < Math.min(startIndex + wsNoise, noisePart.length); n++) {
noisePart[n] += y[n - startIndex] * wgt[n - startIndex] * wgt[n - startIndex];
winWgtSum[n] += wgt[n - startIndex] * wgt[n - startIndex] * wgt[n - startIndex];
}
}
}
System.out.println("LPC noise synthesis complete at " + String.valueOf(hnmSignal.frames[i].tAnalysisInSeconds)
+ "s. for frame " + String.valueOf(i + 1) + " of " + String.valueOf(hnmSignal.frames.length) + "..."
+ String.valueOf(startIndex) + "-" + String.valueOf(startIndex + wsNoise));
}
for (i = 0; i < winWgtSum.length; i++) {
if (winWgtSum[i] > 0.0) {
noisePart[i] /= winWgtSum[i];
if (Double.isNaN(noisePart[i]))
noisePart[i] = 0.0;
}
}
}
// Now, apply the triangular noise envelope for voiced parts
if (synthesisParams.applyTriangularNoiseEnvelopeForVoicedParts) {
double[] enEnv;
int enEnvLen;
tsiNext = 0;
int l1, lMid, l2;
for (i = 0; i < hnmSignal.frames.length; i++) {
isVoiced = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f) ? true : false);
if (isVoiced) {
if (i == 0)
tsi = 0.0f;
else
tsi = hnmSignal.frames[i].tAnalysisInSeconds;
startIndex = SignalProcUtils.time2sample(tsi, hnmSignal.samplingRateInHz);
if (i < hnmSignal.frames.length - 1) {
tsiNext = Math.max(0.0f, hnmSignal.frames[i + 1].tAnalysisInSeconds);
startIndexNext = SignalProcUtils.time2sample(tsiNext, hnmSignal.samplingRateInHz);
} else {
startIndexNext = outputLen - 1;
tsiNext = SignalProcUtils.sample2time(startIndexNext, hnmSignal.samplingRateInHz);
}
enEnvLen = startIndexNext - startIndex + 1;
if (enEnvLen > 0) {
enEnv = new double[enEnvLen];
int n;
l1 = SignalProcUtils.time2sample(0.15 * (tsiNext - tsi), hnmSignal.samplingRateInHz);
l2 = SignalProcUtils.time2sample(0.85 * (tsiNext - tsi), hnmSignal.samplingRateInHz);
lMid = (int) Math.floor(0.5 * (l1 + l2) + 0.5);
for (n = 0; n < l1; n++)
enEnv[n] = synthesisParams.energyTriangleLowerValue;
for (n = l1; n < lMid; n++)
enEnv[n] = (n - l1)
* (synthesisParams.energyTriangleUpperValue - synthesisParams.energyTriangleLowerValue)
/ (lMid - l1) + synthesisParams.energyTriangleLowerValue;
for (n = lMid; n < l2; n++)
enEnv[n] = (n - lMid)
* (synthesisParams.energyTriangleLowerValue - synthesisParams.energyTriangleUpperValue)
/ (l2 - lMid) + synthesisParams.energyTriangleUpperValue;
for (n = l2; n < enEnvLen; n++)
enEnv[n] = synthesisParams.energyTriangleLowerValue;
for (n = startIndex; n <= Math.min(noisePart.length - 1, startIndexNext); n++)
noisePart[n] *= enEnv[n - startIndex];
}
}
}
}
if (analysisParams.preemphasisCoefNoise > 0.0f)
noisePart = SignalProcUtils.removePreemphasis(noisePart, analysisParams.preemphasisCoefNoise);
MathUtils.adjustMean(noisePart, 0.0);
return noisePart;
}
}