/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * Permission is hereby granted, free of charge, to use and distribute * this software and its documentation without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of this work, and to * permit persons to whom this work is furnished to do so, subject to * the following conditions: * * 1. The code must retain the above copyright notice, this list of * conditions and the following disclaimer. * 2. Any modifications must be clearly marked as such. * 3. Original authors' names are not deleted. * 4. The authors' names are not used to endorse or promote products * derived from this software without specific prior written * permission. * * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF * THIS SOFTWARE. */ package marytts.signalproc.sinusoidal.hntm.synthesis.hybrid; import marytts.signalproc.analysis.RegularizedCepstrumEstimator; import marytts.signalproc.analysis.RegularizedPostWarpedCepstrumEstimator; import marytts.signalproc.analysis.RegularizedPreWarpedCepstrumEstimator; import marytts.signalproc.sinusoidal.Sinusoid; import marytts.signalproc.sinusoidal.SinusoidalAnalysisParams; import marytts.signalproc.sinusoidal.SinusoidalTrack; import marytts.signalproc.sinusoidal.SinusoidalTracks; import marytts.signalproc.sinusoidal.TrackGenerator; import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams; import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; /** * This class converts harmonics as obtained by HNTM analysis to sinusoidal tracks that can be used by a pure sinusoidal * synthesizer. The aim of this approach is to find out the reason for quality decrease in HNTM synthesis in the absence of * prosody modificaitions. If the analyzed values are fine and pure sinusoidal synthesis generates a clean signal, we have a * problem in HNTM harmonic synthesis. If not, we will need to look at HNTM analysis using a frequency domain approach as in pure * sinusoidal model. * * Note that all frequency values should be in radians in the sinusoidal tracks whereas HNTM uses frequency values in Hz. * "convert" function handles this conversion as well. * * @author oytun.turk * */ public class HarmonicsToTrackConverter { public static SinusoidalTracks convert(HntmSpeechSignal hntmSignal, HntmAnalyzerParams analysisParams) { int numFrames = hntmSignal.frames.length; float deltaInRadians = SignalProcUtils.hz2radian(SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ, hntmSignal.samplingRateInHz); SinusoidalTracks tr = null; int i; Sinusoid zeroAmpSin; Sinusoid sin; if (numFrames > 0) { int j, k; float tmpDist, minDist; int trackInd; boolean[] bSinAssigneds = null; float amp; float[] currentCeps = null; for (i = 0; i < numFrames; i++) { if (hntmSignal.frames[i].h.complexAmps != null && hntmSignal.frames[i].h.complexAmps.length > 0) { if (!analysisParams.useHarmonicAmplitudesDirectly) currentCeps = hntmSignal.frames[i].h.getCeps(hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz, analysisParams); if (tr == null) // If no tracks yet, assign the current sinusoids to new tracks { tr = new SinusoidalTracks(hntmSignal.frames[i].h.complexAmps.length, hntmSignal.samplingRateInHz); tr.setSysAmpsAndTimes(hntmSignal, analysisParams); for (j = 0; j < hntmSignal.frames[i].h.complexAmps.length; j++) { // First add a zero amplitude sinusoid at previous time instant to allow smooth synthesis (i.e. // "turning on" the track) zeroAmpSin = new Sinusoid(0.0f, SignalProcUtils.hz2radian(j * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz), 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.add(new SinusoidalTrack(hntmSignal.frames[i].tAnalysisInSeconds - TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.TURNED_ON)); // amp = 0.0f; if (!analysisParams.useHarmonicAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) amp = (float) RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(currentCeps, j * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) amp = (float) RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, j * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); } else amp = (float) MathUtils.magnitudeComplex(hntmSignal.frames[i].h.complexAmps[j]); sin = new Sinusoid(amp, SignalProcUtils.hz2radian(j * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz), MathUtils.phaseInRadiansFloat(hntmSignal.frames[i].h.complexAmps[j]), i); tr.tracks[tr.currentIndex].add(hntmSignal.frames[i].tAnalysisInSeconds, sin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.ACTIVE); } } else // If there are tracks, first check "continuations" by checking whether a given sinusoid is in the // +-deltaInRadians neighbourhood of the previous track. // Those tracks that do not continue are "turned off". // All sinusoids of the current frame that are not assigned to any of the "continuations" or // "turned off" are "birth"s of new tracks. { for (j = 0; j < tr.currentIndex + 1; j++) { if (tr.tracks[j] != null) tr.tracks[j].resetCandidate(); } bSinAssigneds = new boolean[hntmSignal.frames[i].h.complexAmps.length]; // Continuations: for (k = 0; k < hntmSignal.frames[i].h.complexAmps.length; k++) { minDist = Math.abs(SignalProcUtils.hz2radian(k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz) - tr.tracks[0].freqs[tr.tracks[0].currentIndex]); if (minDist < deltaInRadians) trackInd = 0; else trackInd = -1; for (j = 1; j < tr.currentIndex + 1; j++) { tmpDist = Math.abs(SignalProcUtils.hz2radian(k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz) - tr.tracks[j].freqs[tr.tracks[j].currentIndex]); if (tmpDist < deltaInRadians && (trackInd == -1 || tmpDist < minDist)) { minDist = tmpDist; trackInd = j; } } if (trackInd > -1) { if (tr.tracks[trackInd].newCandidateInd > -1) bSinAssigneds[tr.tracks[trackInd].newCandidateInd] = false; amp = 0.0f; if (!analysisParams.useHarmonicAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) amp = (float) RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) amp = (float) RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); } else amp = (float) MathUtils.magnitudeComplex(hntmSignal.frames[i].h.complexAmps[k]); sin = new Sinusoid(amp, SignalProcUtils.hz2radian(k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz), MathUtils.phaseInRadiansFloat(hntmSignal.frames[i].h.complexAmps[k]), i); tr.tracks[trackInd].newCandidate = new Sinusoid(sin); tr.tracks[trackInd].newCandidateInd = k; bSinAssigneds[k] = true; // The sinusoid might be assigned to an existing track provided that a // closer sinusoid is not found } else bSinAssigneds[k] = false; // This is the birth of a new track since it does not match any existing // tracks } // Here is the actual assignment of sinusoids to existing tracks for (j = 0; j < tr.currentIndex + 1; j++) { if (tr.tracks[j].newCandidate != null) { Sinusoid tmpSin = new Sinusoid(tr.tracks[j].newCandidate); if (tr.tracks[j].states[tr.tracks[j].currentIndex] != SinusoidalTrack.ACTIVE) { zeroAmpSin = new Sinusoid(0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add(hntmSignal.frames[i].tAnalysisInSeconds - TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.TURNED_ON); } tr.tracks[j].add(hntmSignal.frames[i].tAnalysisInSeconds, tmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.ACTIVE); } else // Turn off tracks that are not assigned any new sinusoid { if (tr.tracks[j].states[tr.tracks[j].currentIndex] != SinusoidalTrack.TURNED_OFF) { zeroAmpSin = new Sinusoid(0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add(hntmSignal.frames[i].tAnalysisInSeconds + TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.TURNED_OFF); } } } // Births: Create new tracks from sinusoids that are not assigned to existing tracks for (k = 0; k < bSinAssigneds.length; k++) { if (!bSinAssigneds[k]) { // First add a zero amplitude sinusoid to previous frame to allow smooth synthesis (i.e. // "turning on" the track) zeroAmpSin = new Sinusoid(0.0f, SignalProcUtils.hz2radian(k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz), 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.add(new SinusoidalTrack(hntmSignal.frames[i].tAnalysisInSeconds - TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.TURNED_ON)); // amp = 0.0f; if (!analysisParams.useHarmonicAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) amp = (float) RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) amp = (float) RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz); } else amp = (float) MathUtils.magnitudeComplex(hntmSignal.frames[i].h.complexAmps[k]); sin = new Sinusoid(amp, SignalProcUtils.hz2radian(k * hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz), MathUtils.phaseInRadiansFloat(hntmSignal.frames[i].h.complexAmps[k]), i); tr.tracks[tr.currentIndex].add(hntmSignal.frames[i].tAnalysisInSeconds, sin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.ACTIVE); } } } System.out.println("Track generation using frame " + String.valueOf(i + 1) + " of " + String.valueOf(numFrames)); } // Turn-off all active tracks after the last speech frame if (i == numFrames - 1) { for (j = 0; j < tr.currentIndex + 1; j++) { if (Math.abs(hntmSignal.frames[i].tAnalysisInSeconds - tr.tracks[j].times[tr.tracks[j].totalSins - 1]) < TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS) { if (tr.tracks[j].states[tr.tracks[j].currentIndex] == SinusoidalTrack.ACTIVE) { zeroAmpSin = new Sinusoid(0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add(hntmSignal.frames[i].tAnalysisInSeconds + TrackGenerator.ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, hntmSignal.frames[i].maximumFrequencyOfVoicingInHz, SinusoidalTrack.TURNED_OFF); } } } } // } } for (i = 0; i <= tr.currentIndex; i++) tr.tracks[i].correctTrack(); tr.setOriginalDurationManual(hntmSignal.originalDurationInSeconds); SinusoidalTracks trOut = new SinusoidalTracks(tr, 0, tr.currentIndex); trOut = TrackGenerator.postProcess(trOut); return trOut; } }