/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.sinusoidal.hntm.synthesis;
import java.util.Arrays;
import marytts.signalproc.sinusoidal.hntm.analysis.FrameNoisePartLpc;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.window.Window;
import marytts.util.math.ArrayUtils;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
* A time-domain LP synthesis filter based version of the HNM noise part synthesis algorithm described in:
*
* Reference: Stylianou, Y., 1996, "Harmonic plus Noise Models for Speech, combined with Statistical Methods, for Speech and
* Speaker Modification", Ph.D. thesis, Ecole Nationale Supérieure des Télécommunications. (Chapter 3, A Harmonic plus Noise
* Model, HNM)
*
* Supports optional triangular energy envelope weighting.
*
* @author oytun.turk
*
*/
public class NoisePartLpFilterPostHpfLpcSynthesizer {
// LPC based noise model + OLA approach + Gain normalization according to generated harmonic part gain
public static double[] synthesize(HntmSpeechSignal hnmSignal, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParams) {
double[] noisePart = null;
double[] noisePart2 = null;
double[] weights = null;
boolean isPrevNoised, isNoised, isNextNoised;
boolean isVoiced, isNextVoiced;
float tsi = 0.0f;
float tsiNext; // Time in seconds
int i, n, j;
int startIndex = 0;
int startIndexNext;
int outputLen = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);
int lpOrder = 0;
double[] excitation = MathUtils.random(outputLen, -0.5, 0.5);
int fftSizeNoise = SignalProcUtils.getDFTSize(hnmSignal.samplingRateInHz);
for (i = 0; i < hnmSignal.frames.length; i++) {
isNoised = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true : false);
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)
&& ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs != null) {
lpOrder = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs.length;
break;
}
}
if (lpOrder > 0) // At least one noisy frame with LP coefficients exist
{
noisePart = new double[outputLen]; // In fact, this should be prosody scaled length when you implement prosody
// modifications
Arrays.fill(noisePart, 0.0);
noisePart2 = new double[outputLen]; // In fact, this should be prosody scaled length when you implement prosody
// modifications
Arrays.fill(noisePart2, 0.0);
weights = new double[outputLen]; // In fact, this should be prosody scaled length when you implement prosody
// modifications
Arrays.fill(weights, 0.0);
boolean bFirst = true;
int pmInd = 0;
int pmIndNext;
int start = 0;
double[] tmpy = null;
double[] tmpalpha = null;
double tmp;
int count;
for (i = 0; i < hnmSignal.frames.length; i++) {
pmInd = SignalProcUtils.time2sample(hnmSignal.frames[i].tAnalysisInSeconds, hnmSignal.samplingRateInHz);
if (i < hnmSignal.frames.length - 1)
pmIndNext = SignalProcUtils.time2sample(hnmSignal.frames[i + 1].tAnalysisInSeconds,
hnmSignal.samplingRateInHz);
else
pmIndNext = outputLen - 1;
start = pmInd;
isNoised = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true
: false);
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)
&& ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs != null) {
if (i < hnmSignal.frames.length - 1) {
for (n = 0; n <= pmIndNext - pmInd; n++) {
tmpy = new double[Math.max(start - 1, 0) - Math.max(start - lpOrder, 0) + 1];
count = 0;
for (j = Math.max(start - 1, 0); j >= Math.max(start - lpOrder, 0); j--)
tmpy[count++] = noisePart[j];
tmpalpha = new double[tmpy.length];
for (j = 0; j < tmpy.length; j++)
tmpalpha[j] = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs[j];
tmp = 0.0;
for (j = 0; j < tmpalpha.length; j++)
tmp += tmpalpha[j] * tmpy[j];
if (start >= outputLen)
break;
noisePart[start] = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpGain * excitation[start] + tmp;
start++;
}
} else // for the last frame
{
for (n = 0; n < outputLen - pmInd; n++) {
tmpy = new double[Math.max(start - 1, 0) - Math.max(start - lpOrder, 0) + 1];
count = 0;
for (j = Math.max(start - 1, 0); j >= Math.max(start - lpOrder, 0); j--)
tmpy[count++] = noisePart[j];
tmpalpha = new double[tmpy.length];
for (j = 0; j < tmpy.length; j++)
tmpalpha[j] = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs[j];
tmp = 0.0;
for (j = 0; j < tmpalpha.length; j++)
tmp += tmpalpha[j] * tmpy[j];
if (start >= outputLen)
break;
noisePart[start] = ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpGain * excitation[start] + tmp;
start++;
}
}
}
pmInd = pmIndNext;
}
if (analysisParams.preemphasisCoefNoise > 0.0f)
noisePart = SignalProcUtils.removePreemphasis(noisePart, analysisParams.preemphasisCoefNoise);
MathUtils.adjustMean(noisePart, 0.0);
int startInd = 0;
int endInd;
for (i = 0; i < hnmSignal.frames.length - 2; i++) {
pmInd = SignalProcUtils.time2sample(hnmSignal.frames[i].tAnalysisInSeconds, hnmSignal.samplingRateInHz);
if (i <= hnmSignal.frames.length - 3)
pmIndNext = SignalProcUtils.time2sample(hnmSignal.frames[i + 2].tAnalysisInSeconds,
hnmSignal.samplingRateInHz);
else
pmIndNext = outputLen - 1;
start = pmInd;
startInd = start;
isPrevNoised = false;
if (i > 0)
isPrevNoised = ((hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true
: false);
isNoised = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true
: false);
isNextNoised = false;
if (i < hnmSignal.frames.length - 1)
isNextNoised = ((hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) ? true
: false);
if (isNoised && hnmSignal.frames[i].n != null && (hnmSignal.frames[i].n instanceof FrameNoisePartLpc)
&& ((FrameNoisePartLpc) hnmSignal.frames[i].n).lpCoeffs != null) {
endInd = Math.min(pmIndNext, outputLen - 1);
double[] tmpFrm = ArrayUtils.subarray(noisePart, startInd, endInd - startInd + 1);
if (synthesisParams.hpfAfterNoiseSynthesis
&& hnmSignal.frames[i].maximumFrequencyOfVoicingInHz
- analysisParams.overlapBetweenHarmonicAndNoiseRegionsInHz > 0.0f)
tmpFrm = SignalProcUtils.fdFilter(tmpFrm, hnmSignal.frames[i].maximumFrequencyOfVoicingInHz
- analysisParams.overlapBetweenHarmonicAndNoiseRegionsInHz, 0.5f * hnmSignal.samplingRateInHz,
hnmSignal.samplingRateInHz, fftSizeNoise);
tmpFrm = SignalProcUtils.normalizeAverageSampleEnergy(tmpFrm,
((FrameNoisePartLpc) hnmSignal.frames[i].n).origAverageSampleEnergy);
Window winNoise = Window.get(analysisParams.noiseAnalysisWindowType, endInd - startInd + 1);
winNoise.normalizePeakValue(1.0f);
double[] wgtNoise = winNoise.getCoeffs();
if (!isPrevNoised) {
int halfLen = (int) Math.floor(0.5 * tmpFrm.length + 0.5);
for (j = 0; j < halfLen; j++)
noisePart2[startInd + j] += tmpFrm[j] * wgtNoise[j];
for (j = halfLen + 1; j < tmpFrm.length; j++) {
noisePart2[startInd + j] += tmpFrm[j] * wgtNoise[j];
weights[startInd + j] += wgtNoise[j];
}
} else if (!isNextNoised) {
int halfLen = (int) Math.floor(0.5 * tmpFrm.length + 0.5);
for (j = 0; j < halfLen; j++) {
noisePart2[startInd + j] += tmpFrm[j] * wgtNoise[j];
weights[startInd + j] += wgtNoise[j];
}
for (j = halfLen + 1; j < tmpFrm.length; j++)
noisePart2[startInd + j] += tmpFrm[j] * wgtNoise[j];
} else {
for (j = 0; j < tmpFrm.length; j++) {
noisePart2[startInd + j] += tmpFrm[j] * wgtNoise[j];
weights[startInd + j] += wgtNoise[j];
}
}
}
pmInd = pmIndNext;
}
for (i = 0; i < outputLen; i++) {
if (weights[i] > 1e-20)
noisePart2[i] /= weights[i];
}
System.arraycopy(noisePart2, 0, noisePart, 0, outputLen);
// Now, apply the triangular noise envelope for voiced parts
if (synthesisParams.applyTriangularNoiseEnvelopeForVoicedParts) {
double[] enEnv;
int enEnvLen;
tsiNext = 0;
int l1, lMid, l2;
for (i = 0; i < hnmSignal.frames.length; i++) {
isVoiced = ((hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f) ? true : false);
if (isVoiced) {
if (i == 0)
tsi = 0.0f;
else
tsi = hnmSignal.frames[i].tAnalysisInSeconds;
startIndex = SignalProcUtils.time2sample(tsi, hnmSignal.samplingRateInHz);
if (i < hnmSignal.frames.length - 1) {
tsiNext = Math.max(0.0f, hnmSignal.frames[i + 1].tAnalysisInSeconds);
startIndexNext = SignalProcUtils.time2sample(tsiNext, hnmSignal.samplingRateInHz);
} else {
startIndexNext = outputLen - 1;
tsiNext = SignalProcUtils.sample2time(startIndexNext, hnmSignal.samplingRateInHz);
}
enEnvLen = startIndexNext - startIndex + 1;
if (enEnvLen > 0) {
enEnv = new double[enEnvLen];
l1 = SignalProcUtils.time2sample(0.15 * (tsiNext - tsi), hnmSignal.samplingRateInHz);
l2 = SignalProcUtils.time2sample(0.85 * (tsiNext - tsi), hnmSignal.samplingRateInHz);
lMid = (int) Math.floor(0.5 * (l1 + l2) + 0.5);
for (n = 0; n < l1; n++)
enEnv[n] = synthesisParams.energyTriangleLowerValue;
for (n = l1; n < lMid; n++)
enEnv[n] = (n - l1)
* (synthesisParams.energyTriangleUpperValue - synthesisParams.energyTriangleLowerValue)
/ (lMid - l1) + synthesisParams.energyTriangleLowerValue;
for (n = lMid; n < l2; n++)
enEnv[n] = (n - lMid)
* (synthesisParams.energyTriangleLowerValue - synthesisParams.energyTriangleUpperValue)
/ (l2 - lMid) + synthesisParams.energyTriangleUpperValue;
for (n = l2; n < enEnvLen; n++)
enEnv[n] = synthesisParams.energyTriangleLowerValue;
for (n = startIndex; n <= Math.min(noisePart.length - 1, startIndexNext); n++)
noisePart[n] *= enEnv[n - startIndex];
}
}
}
}
}
// MaryUtils.plot(noisePart);
return noisePart;
}
}