NoisePartWaveformSynthesizer.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * Permission is hereby granted, free of charge, to use and distribute
 * this software and its documentation without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of this work, and to
 * permit persons to whom this work is furnished to do so, subject to
 * the following conditions:
 * 
 * 1. The code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 * 2. Any modifications must be clearly marked as such.
 * 3. Original authors' names are not deleted.
 * 4. The authors' names are not used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
 * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 * THIS SOFTWARE.
 */

/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * Permission is hereby granted, free of charge, to use and distribute
 * this software and its documentation without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of this work, and to
 * permit persons to whom this work is furnished to do so, subject to
 * the following conditions:
 * 
 * 1. The code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 * 2. Any modifications must be clearly marked as such.
 * 3. Original authors' names are not deleted.
 * 4. The authors' names are not used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
 * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 * THIS SOFTWARE.
 */

package marytts.signalproc.sinusoidal.hntm.synthesis;

import java.util.Arrays;

import marytts.signalproc.sinusoidal.hntm.analysis.FrameNoisePartWaveform;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechFrame;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.window.HammingWindow;
import marytts.signalproc.window.Window;
import marytts.util.math.ArrayUtils;
import marytts.util.signal.SignalProcUtils;

/**
 * Synthesizes noise part waveform from non-overlapping chunks of data. This model is the most natural one since it involves no
 * noise models.
 * 
 * @author oytun.turk
 * 
 */
public class NoisePartWaveformSynthesizer {
	// TO DO: This should use overlap add since the noise waveform will not be a continuous waveform in TTS
	public static double[] synthesize(HntmSpeechSignal hnmSignal, HntmSpeechFrame[] leftContexts,
			HntmSpeechFrame[] rightContexts, HntmAnalyzerParams analysisParams) {
		double[] noisePartWaveform = null;

		if (hnmSignal != null && hnmSignal.frames != null) {
			int outputLen = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);

			noisePartWaveform = new double[outputLen];
			double[] wgts = new double[outputLen];
			Arrays.fill(noisePartWaveform, 0.0);
			Arrays.fill(wgts, 0.0);
			int i;

			HntmSpeechFrame prevFrame, nextFrame, currentLeftContext, currentRightContext;

			// TO DO: Overlap waveform noise case! See analysis code!
			for (i = 0; i < hnmSignal.frames.length; i++) {
				if (i > 0)
					prevFrame = hnmSignal.frames[i - 1];
				else
					prevFrame = null;

				if (i < hnmSignal.frames.length - 1)
					nextFrame = hnmSignal.frames[i + 1];
				else
					nextFrame = null;

				boolean isFirstSynthesisFrame = false;
				if (i == 0)
					isFirstSynthesisFrame = true;

				boolean isLastSynthesisFrame = false;
				if (i == hnmSignal.frames.length - 1)
					isLastSynthesisFrame = true;

				boolean existsLeftContexts = true;
				if (leftContexts == null) // Take the previous frame parameters as left context (i.e. the HNM signal is a
											// continuous one, not concatenated one
					existsLeftContexts = false;

				boolean existsRightContexts = true;
				if (rightContexts == null) // Take the previous frame parameters as right context (i.e. the HNM signal is a
											// continuous one, not concatenated one
					existsRightContexts = false;

				currentLeftContext = null;
				if (leftContexts != null)
					currentLeftContext = leftContexts[i];

				currentRightContext = null;
				if (rightContexts != null)
					currentRightContext = rightContexts[i];

				processFrame(prevFrame, hnmSignal.frames[i], nextFrame, hnmSignal.samplingRateInHz, isFirstSynthesisFrame,
						isLastSynthesisFrame, noisePartWaveform, wgts, existsLeftContexts, currentLeftContext,
						existsRightContexts, currentRightContext);
			}

			for (i = 0; i < outputLen; i++) {
				if (wgts[i] > 1.0e-10)
					noisePartWaveform[i] /= wgts[i];
			}
		}

		return noisePartWaveform;
	}

	public static void processFrame(HntmSpeechFrame prevFrame, HntmSpeechFrame currentFrame, HntmSpeechFrame nextFrame,
			int samplingRateInHz, boolean isFirstSynthesisFrame, boolean isLastSynthesisFrame, double[] noisePartWaveform,
			double[] wgts, boolean existsLeftContexts, HntmSpeechFrame currentLeftContext, boolean existsRightContexts,
			HntmSpeechFrame currentRightContext) {
		double[] frameWaveform = null;
		int waveformNoiseStartInd;
		int j;

		double[] leftContextWaveform = null;
		double[] rightContextWaveform = null;

		if (currentFrame.n != null && (currentFrame.n instanceof FrameNoisePartWaveform)) {
			frameWaveform = ((FrameNoisePartWaveform) currentFrame.n).waveform2Doubles();

			if (!existsLeftContexts) // Take the previous frame parameters as left context (i.e. the HNM signal is a continuous
										// one, not concatenated one
			{
				if (!isFirstSynthesisFrame)
					leftContextWaveform = ((FrameNoisePartWaveform) prevFrame.n).waveform2Doubles();
				else {
					leftContextWaveform = new double[frameWaveform.length];
					Arrays.fill(leftContextWaveform, 0.0);
				}
			} else {
				if (currentLeftContext != null && currentLeftContext.n != null) {
					leftContextWaveform = ArrayUtils.copy(((FrameNoisePartWaveform) currentLeftContext.n).waveform2Doubles());
				} else {
					leftContextWaveform = new double[frameWaveform.length];
					Arrays.fill(leftContextWaveform, 0.0);
				}
			}

			waveformNoiseStartInd = SignalProcUtils.time2sample(currentFrame.tAnalysisInSeconds, samplingRateInHz);
			waveformNoiseStartInd -= leftContextWaveform.length;

			if (!existsRightContexts) // Take the next frame parameters as right context (i.e. the HNM signal is a continuous one,
										// not concatenated one
			{
				if (!isLastSynthesisFrame)
					rightContextWaveform = ((FrameNoisePartWaveform) nextFrame.n).waveform2Doubles();
				else {
					rightContextWaveform = new double[frameWaveform.length];
					Arrays.fill(rightContextWaveform, 0.0);
				}
			} else {
				if (currentRightContext != null && currentRightContext.n != null) {
					rightContextWaveform = ArrayUtils.copy(((FrameNoisePartWaveform) currentRightContext.n).waveform2Doubles());
				} else {
					rightContextWaveform = new double[frameWaveform.length];
					Arrays.fill(rightContextWaveform, 0.0);
				}
			}

			frameWaveform = ArrayUtils.combine(leftContextWaveform, frameWaveform);
			frameWaveform = ArrayUtils.combine(frameWaveform, rightContextWaveform);

			if (frameWaveform != null) {
				Window w = new HammingWindow(frameWaveform.length);
				double[] wgt = w.getCoeffs();
				for (j = waveformNoiseStartInd; j < Math.min(waveformNoiseStartInd + frameWaveform.length,
						noisePartWaveform.length); j++) {
					if (waveformNoiseStartInd + j >= 0) {
						noisePartWaveform[j] += frameWaveform[j - waveformNoiseStartInd] * wgt[j - waveformNoiseStartInd];
						wgts[j] += wgt[j - waveformNoiseStartInd];
					}
				}
			}
		}
	}
}