/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * Permission is hereby granted, free of charge, to use and distribute * this software and its documentation without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of this work, and to * permit persons to whom this work is furnished to do so, subject to * the following conditions: * * 1. The code must retain the above copyright notice, this list of * conditions and the following disclaimer. * 2. Any modifications must be clearly marked as such. * 3. Original authors' names are not deleted. * 4. The authors' names are not used to endorse or promote products * derived from this software without specific prior written * permission. * * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF * THIS SOFTWARE. */ package marytts.signalproc.sinusoidal.hntm.analysis; import java.util.Arrays; import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams; import marytts.signalproc.window.Window; import marytts.util.math.ArrayUtils; import marytts.util.signal.SignalProcUtils; /** * @author oytun.turk * */ public class HntmAnalyzerNoisePartWaveformSynthesizer { // LPC based noise model + OLA approach + Gain normalization according to generated harmonic part gain public static double[] synthesize(HntmSpeechSignal hnmSignal, double[][] frameWaveforms, HntmAnalyzerParams analysisParams, HntmSynthesizerParams synthesisParams) { double[] noisePart = null; int i; boolean isPrevNoised, isNoised, isNextNoised; boolean isVoiced, isNextVoiced; float t; float tsi = 0.0f; int startIndex = 0; int outputLen = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz); isNoised = false; for (i = 0; i < hnmSignal.frames.length; i++) { if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz && frameWaveforms[i] != null) { isNoised = true; break; } } if (isNoised) // At least one noisy frame with LP coefficients exist { noisePart = new double[outputLen]; // In fact, this should be prosody scaled length when you implement prosody // modifications Arrays.fill(noisePart, 0.0); double[] winWgtSum = new double[outputLen]; // In fact, this should be prosody scaled length when you implement // prosody modifications Arrays.fill(winWgtSum, 0.0); Window winNoise; int windowType = Window.HAMMING; double[] x; double[] xWindowed; double[] y; double[] yWindowed; double[] yFiltered; double[] wgt; int n; int fftSizeNoise = SignalProcUtils.getDFTSize(hnmSignal.samplingRateInHz); boolean isDisplay = false; // Noise source of full length double[] noiseSourceHpf = null; int transitionOverlapLen = SignalProcUtils.time2sample(synthesisParams.noiseSynthesisTransitionOverlapInSeconds, hnmSignal.samplingRateInHz); int wsNoise = SignalProcUtils.time2sample(analysisParams.noiseAnalysisWindowDurationInSeconds, hnmSignal.samplingRateInHz); if (wsNoise % 2 == 1) wsNoise++; int halfWsNoise = wsNoise / 2; y = new double[wsNoise]; // Compute window winNoise = Window.get(windowType, wsNoise); winNoise.normalizePeakValue(1.0f); wgt = winNoise.getCoeffs(); // for (i = 0; i < hnmSignal.frames.length; i++) { if (hnmSignal.frames[i].h != null && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f) isVoiced = true; else isVoiced = false; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].h != null && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz > 0.0f) isNextVoiced = true; else isNextVoiced = false; if (i > 0 && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz && frameWaveforms[i - 1] != null) isPrevNoised = true; else isPrevNoised = false; if (frameWaveforms[i] != null && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz) isNoised = true; else isNoised = false; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < 0.5f * hnmSignal.samplingRateInHz && frameWaveforms[i + 1] != null) isNextNoised = true; else isNextNoised = false; if (i == 0) tsi = 0.0f; else tsi = Math.max(0.0f, hnmSignal.frames[i].tAnalysisInSeconds - 0.5f * analysisParams.noiseAnalysisWindowDurationInSeconds); startIndex = SignalProcUtils.time2sample(tsi, hnmSignal.samplingRateInHz); if (isNoised) { if (frameWaveforms[i] != null) { if (analysisParams.overlapNoiseWaveformModel) { y = ArrayUtils.copy(frameWaveforms[i]); if (isVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform) y = SignalProcUtils.interpolate(y, (0.5 * hnmSignal.samplingRateInHz) / (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz)); } else { // Fill left half boolean isTmpVoiced; int currentFrameInd = i; int count = 0; Arrays.fill(y, 0.0); double[] temp = ArrayUtils.copy(frameWaveforms[currentFrameInd]); if (isVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform) temp = SignalProcUtils .interpolate( temp, (0.5 * hnmSignal.samplingRateInHz) / (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz)); int count2 = temp.length - 1; while (count < halfWsNoise) { if (count2 < 0) { currentFrameInd--; if (currentFrameInd < 0 || hnmSignal.frames[currentFrameInd].n == null) break; temp = ArrayUtils.copy(frameWaveforms[currentFrameInd]); isTmpVoiced = false; if (hnmSignal.frames[currentFrameInd].h != null && hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz > 0.0f) isTmpVoiced = true; if (isTmpVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform) temp = SignalProcUtils .interpolate( temp, (0.5 * hnmSignal.samplingRateInHz) / (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz)); count2 = temp.length - 1; } y[halfWsNoise - count - 1] = temp[count2]; count++; count2--; } // // Fill right half currentFrameInd = i + 1; count = halfWsNoise; if (currentFrameInd < hnmSignal.frames.length && hnmSignal.frames[currentFrameInd].n != null) { temp = ((FrameNoisePartWaveform) hnmSignal.frames[currentFrameInd].n).waveform2Doubles(); isTmpVoiced = false; if (hnmSignal.frames[currentFrameInd].h != null && hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz > 0.0f) isTmpVoiced = true; if (isTmpVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform) temp = SignalProcUtils .interpolate( temp, (0.5 * hnmSignal.samplingRateInHz) / (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz)); count2 = 0; while (count < wsNoise) { if (count2 >= temp.length) { currentFrameInd++; if (currentFrameInd > hnmSignal.frames.length - 1 || hnmSignal.frames[currentFrameInd].n == null) break; temp = ((FrameNoisePartWaveform) hnmSignal.frames[currentFrameInd].n).waveform2Doubles(); isTmpVoiced = false; if (hnmSignal.frames[currentFrameInd].h != null && hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz > 0.0f) isTmpVoiced = true; if (isTmpVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform) temp = SignalProcUtils .interpolate( temp, (0.5 * hnmSignal.samplingRateInHz) / (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[currentFrameInd].maximumFrequencyOfVoicingInHz)); count2 = 0; } y[count] = temp[count2]; count++; count2++; } } } // if (!synthesisParams.hpfAfterNoiseSynthesis) y = SignalProcUtils.fdFilter(y, hnmSignal.frames[i].maximumFrequencyOfVoicingInHz, 0.5f * hnmSignal.samplingRateInHz, hnmSignal.samplingRateInHz, fftSizeNoise); winNoise = Window.get(windowType, y.length); winNoise.normalizePeakValue(1.0f); wgt = winNoise.getCoeffs(); // Overlap-add for (n = startIndex; n < Math.min(startIndex + y.length, noisePart.length); n++) { noisePart[n] += y[n - startIndex] * wgt[n - startIndex]; winWgtSum[n] += wgt[n - startIndex]; } } // } if (!analysisParams.isSilentAnalysis) System.out.println("Waveform noise synthesis for analysis complete at " + String.valueOf(hnmSignal.frames[i].tAnalysisInSeconds) + "s. for frame " + String.valueOf(i + 1) + " of " + String.valueOf(hnmSignal.frames.length) + "..."); } for (i = 0; i < winWgtSum.length; i++) { if (winWgtSum[i] > 0.0) noisePart[i] /= winWgtSum[i]; } } if (analysisParams.preemphasisCoefNoise > 0.0f) noisePart = SignalProcUtils.removePreemphasis(noisePart, analysisParams.preemphasisCoefNoise); // MathUtils.adjustMean(noisePart, 0.0); return noisePart; } }