/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * Permission is hereby granted, free of charge, to use and distribute * this software and its documentation without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of this work, and to * permit persons to whom this work is furnished to do so, subject to * the following conditions: * * 1. The code must retain the above copyright notice, this list of * conditions and the following disclaimer. * 2. Any modifications must be clearly marked as such. * 3. Original authors' names are not deleted. * 4. The authors' names are not used to endorse or promote products * derived from this software without specific prior written * permission. * * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF * THIS SOFTWARE. */ package marytts.signalproc.sinusoidal.hntm.modification; import java.util.Arrays; import marytts.signalproc.adaptation.prosody.BasicProsodyModifierParams; import marytts.signalproc.analysis.RegularizedCepstrumEstimator; import marytts.signalproc.analysis.RegularizedPostWarpedCepstrumEstimator; import marytts.signalproc.analysis.RegularizedPreWarpedCepstrumEstimator; import marytts.signalproc.process.TDPSOLAInstants; import marytts.signalproc.process.TDPSOLAProcessor; import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzer; import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams; import marytts.signalproc.sinusoidal.hntm.analysis.HntmPlusTransientsSpeechSignal; import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechFrame; import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal; import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignalWithContext; import marytts.signalproc.sinusoidal.hntm.analysis.TransientSegment; import marytts.signalproc.sinusoidal.hntm.synthesis.NoisePartWaveformSynthesizer; import marytts.signalproc.window.Window; import marytts.util.math.ArrayUtils; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; /** * Prosody modification for HNM as described in: * * Stylianou, Y., 1996, "Harmonic plus Noise Models for Speech, combined with Statistical Methods, for Speech and Speaker * Modification", Ph.D. thesis, Ecole Nationale Supérieure des Télécommunications. (Chapter 3, A Harmonic plus Noise Model, HNM) * * @author oytun.turk * */ public class HntmProsodyModifier { // Note that pmodParams are changed as well public static HntmSpeechSignalWithContext modify(HntmSpeechSignal hntmSignal, HntmSpeechFrame[] leftContexts, HntmSpeechFrame[] rightContexts, BasicProsodyModifierParams pmodParams, HntmAnalyzerParams analysisParams) { int i, j; int currentHarmonicNo; HntmSpeechSignalWithContext output = null; if (!pmodParams.willProsodyBeModified()) return new HntmSpeechSignalWithContext(hntmSignal, leftContexts, rightContexts); else { // Pre-process tScales and tScaleTimes to make sure transients are not duration modified but only shifted // Sort input time scales if required int[] sortedIndices; if (pmodParams.tScalesTimes != null) { sortedIndices = MathUtils.quickSort(pmodParams.tScalesTimes); pmodParams.tScales = MathUtils.sortAs(pmodParams.tScales, sortedIndices); } float[] tScalesMod = new float[hntmSignal.frames.length + 1]; float[] allScalesTimes = new float[hntmSignal.frames.length + 1]; float[] pScalesMod = new float[hntmSignal.frames.length + 1]; for (i = 0; i < hntmSignal.frames.length; i++) allScalesTimes[i] = hntmSignal.frames[i].tAnalysisInSeconds; allScalesTimes[hntmSignal.frames.length] = hntmSignal.frames[hntmSignal.frames.length - 1].tAnalysisInSeconds + (hntmSignal.frames[hntmSignal.frames.length - 1].tAnalysisInSeconds - hntmSignal.frames[hntmSignal.frames.length - 2].tAnalysisInSeconds); if (pmodParams.tScalesTimes != null) { if (pmodParams.tScales.length != pmodParams.tScalesTimes.length) { System.out.println("Error! Time scale array and associated instants should be of identical length"); return null; } // Map tScalesTimes to the analysis time axis (which is now in allScalesTimes) int scaleIndex; float alpha; for (i = 0; i < allScalesTimes.length; i++) { // For time scales scaleIndex = MathUtils.findClosest(pmodParams.tScalesTimes, allScalesTimes[i]); if (allScalesTimes[i] > pmodParams.tScalesTimes[scaleIndex]) { if (scaleIndex < pmodParams.tScalesTimes.length - 1) { if ((pmodParams.tScalesTimes[scaleIndex + 1] - pmodParams.tScalesTimes[scaleIndex]) > 1e-10) alpha = (pmodParams.tScalesTimes[scaleIndex + 1] - allScalesTimes[i]) / (pmodParams.tScalesTimes[scaleIndex + 1] - pmodParams.tScalesTimes[scaleIndex]); else alpha = 0.5f; tScalesMod[i] = alpha * pmodParams.tScales[scaleIndex] + (1.0f - alpha) * pmodParams.tScales[scaleIndex + 1]; } else tScalesMod[i] = pmodParams.tScales[scaleIndex]; } else if (allScalesTimes[i] < pmodParams.tScalesTimes[scaleIndex]) { if (scaleIndex > 0) { if ((pmodParams.tScalesTimes[scaleIndex] - pmodParams.tScalesTimes[scaleIndex - 1]) > 1e-10) alpha = (pmodParams.tScalesTimes[scaleIndex] - allScalesTimes[i]) / (pmodParams.tScalesTimes[scaleIndex] - pmodParams.tScalesTimes[scaleIndex - 1]); else alpha = 0.5f; tScalesMod[i] = alpha * pmodParams.tScales[scaleIndex - 1] + (1.0f - alpha) * pmodParams.tScales[scaleIndex]; } else tScalesMod[i] = pmodParams.tScales[scaleIndex]; } else tScalesMod[i] = pmodParams.tScales[scaleIndex]; // } // } else Arrays.fill(tScalesMod, pmodParams.tScales[0]); if (pmodParams.pScalesTimes != null) { if (pmodParams.pScales.length != pmodParams.pScalesTimes.length) { System.out.println("Error! Pitch scale array and associated instants should be of identical length"); return null; } // Map pScalesTimes to the analysis time axis (which is now in allScalesTimes) int scaleIndex; float alpha; for (i = 0; i < allScalesTimes.length; i++) { // For pitch scales scaleIndex = MathUtils.findClosest(pmodParams.pScalesTimes, allScalesTimes[i]); if (allScalesTimes[i] > pmodParams.pScalesTimes[scaleIndex]) { if (scaleIndex < pmodParams.pScalesTimes.length - 1) { if ((pmodParams.pScalesTimes[scaleIndex + 1] - pmodParams.pScalesTimes[scaleIndex]) > 1e-10) alpha = (pmodParams.pScalesTimes[scaleIndex + 1] - allScalesTimes[i]) / (pmodParams.pScalesTimes[scaleIndex + 1] - pmodParams.pScalesTimes[scaleIndex]); else alpha = 0.5f; pScalesMod[i] = alpha * pmodParams.pScales[scaleIndex] + (1.0f - alpha) * pmodParams.pScales[scaleIndex + 1]; } else pScalesMod[i] = pmodParams.pScales[scaleIndex]; } else if (allScalesTimes[i] < pmodParams.pScalesTimes[scaleIndex]) { if (scaleIndex > 0) { if ((pmodParams.pScalesTimes[scaleIndex] - pmodParams.pScalesTimes[scaleIndex - 1]) > 1e-10) alpha = (pmodParams.pScalesTimes[scaleIndex] - allScalesTimes[i]) / (pmodParams.pScalesTimes[scaleIndex] - pmodParams.pScalesTimes[scaleIndex - 1]); else alpha = 0.5f; pScalesMod[i] = alpha * pmodParams.pScales[scaleIndex - 1] + (1.0f - alpha) * pmodParams.pScales[scaleIndex]; } else pScalesMod[i] = pmodParams.pScales[scaleIndex]; } else pScalesMod[i] = pmodParams.pScales[scaleIndex]; // } // } else Arrays.fill(pScalesMod, pmodParams.pScales[0]); // Handle transient part by time shifting segments as necessary if (hntmSignal instanceof HntmPlusTransientsSpeechSignal && ((HntmPlusTransientsSpeechSignal) hntmSignal).transients != null) { int numTransientSegments = ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments.length; if (output == null) output = new HntmSpeechSignalWithContext(); output.hntmSignal = new HntmPlusTransientsSpeechSignal(hntmSignal.frames.length, hntmSignal.samplingRateInHz, hntmSignal.originalDurationInSeconds, numTransientSegments); output.leftContexts = new HntmSpeechFrame[hntmSignal.frames.length]; output.rightContexts = new HntmSpeechFrame[hntmSignal.frames.length]; if (numTransientSegments > 0) { float[] tempScales = new float[4 * numTransientSegments]; float[] tempScalesTimes = new float[4 * numTransientSegments]; float[] tempScales2 = ArrayUtils.copy(tScalesMod); float[] tempScalesTimes2 = ArrayUtils.copy(allScalesTimes); int ind = 0; for (i = 0; i < numTransientSegments; i++) { tempScalesTimes[2 * i] = ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i].startTime; tempScales[2 * i] = 1.0f; tempScalesTimes[2 * i + 1] = ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i] .getEndTime(hntmSignal.samplingRateInHz); tempScales[2 * i + 1] = 1.0f; if (tempScalesTimes2 != null) { for (j = 0; j < tempScalesTimes2.length; j++) { if (tempScalesTimes2[j] >= tempScalesTimes[2 * i] && tempScalesTimes2[j] <= tempScalesTimes[2 * i + 1]) tempScales2[j] = 1.0f; } } } for (i = numTransientSegments; i < 2 * numTransientSegments; i++) { tempScalesTimes[2 * i] = ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i - numTransientSegments].startTime - 0.001f; tempScales[2 * i] = 1.0f; for (j = 0; j < allScalesTimes.length; j++) { if (tempScalesTimes[2 * i] > allScalesTimes[j]) { tempScales[2 * i] = tScalesMod[j]; break; } } tempScalesTimes[2 * i + 1] = ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i - numTransientSegments].getEndTime(hntmSignal.samplingRateInHz) + 0.001f; tempScales[2 * i + 1] = 1.0f; for (j = allScalesTimes.length - 1; j >= 0; j--) { if (tempScalesTimes[2 * i + 1] < allScalesTimes[j]) { tempScales[2 * i + 1] = tScalesMod[j]; break; } } } tScalesMod = ArrayUtils.combine(tempScales, tempScales2); allScalesTimes = ArrayUtils.combine(tempScalesTimes, tempScalesTimes2); sortedIndices = MathUtils.quickSort(allScalesTimes); tScalesMod = MathUtils.sortAs(tScalesMod, sortedIndices); for (i = 0; i < numTransientSegments; i++) { ((HntmPlusTransientsSpeechSignal) output.hntmSignal).transients.segments[i] = new TransientSegment( ((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i]); ((HntmPlusTransientsSpeechSignal) output.hntmSignal).transients.segments[i].startTime = SignalProcUtils .timeScaledTime(((HntmPlusTransientsSpeechSignal) hntmSignal).transients.segments[i].startTime, tScalesMod, allScalesTimes); } } } else { if (output == null) output = new HntmSpeechSignalWithContext(); output.hntmSignal = new HntmSpeechSignal(hntmSignal.frames.length, hntmSignal.samplingRateInHz, hntmSignal.originalDurationInSeconds); output.leftContexts = new HntmSpeechFrame[hntmSignal.frames.length]; output.rightContexts = new HntmSpeechFrame[hntmSignal.frames.length]; } // /* * if (output==null) output = new HntmSpeechSignalWithContext(); * * output.hntmSignal = new HntmSpeechSignal(hntmSignal.frames.length, hntmSignal.samplingRateInHz, * hntmSignal.originalDurationInSeconds); for (i=0; i<hntmSignal.frames.length; i++) { output.hntmSignal.frames[i] = * new HntmSpeechFrame(hntmSignal.frames[i]); output.hntmSignal.frames[i].tAnalysisInSeconds = * SignalProcUtils.timeScaledTime(hntmSignal.frames[i].tAnalysisInSeconds, tScalesMod, tScalesTimesMod); } * * output.hntmSignal.originalDurationInSeconds = SignalProcUtils.timeScaledTime(hntmSignal.originalDurationInSeconds, * tScalesMod, tScalesTimesMod); */ float[] tAnalysis = new float[hntmSignal.frames.length + 1]; for (i = 0; i < hntmSignal.frames.length; i++) tAnalysis[i] = hntmSignal.frames[i].tAnalysisInSeconds; tAnalysis[hntmSignal.frames.length] = hntmSignal.frames[hntmSignal.frames.length - 1].tAnalysisInSeconds + (hntmSignal.frames[hntmSignal.frames.length - 1].tAnalysisInSeconds - hntmSignal.frames[hntmSignal.frames.length - 2].tAnalysisInSeconds); boolean[] vuvs = new boolean[hntmSignal.frames.length + 1]; for (i = 0; i < hntmSignal.frames.length; i++) { if (hntmSignal.frames[i].f0InHz > 10.0) vuvs[i] = true; else vuvs[i] = false; } vuvs[hntmSignal.frames.length] = vuvs[hntmSignal.frames.length - 1]; TDPSOLAInstants synthesisInstants = TDPSOLAProcessor.transformAnalysisInstants(tAnalysis, hntmSignal.samplingRateInHz, vuvs, tScalesMod, pScalesMod); // Time scaling if (output == null) output = new HntmSpeechSignalWithContext(); output.hntmSignal = new HntmSpeechSignal(synthesisInstants.synthesisInstantsInSeconds.length, hntmSignal.samplingRateInHz, hntmSignal.originalDurationInSeconds); output.leftContexts = new HntmSpeechFrame[synthesisInstants.synthesisInstantsInSeconds.length]; output.rightContexts = new HntmSpeechFrame[synthesisInstants.synthesisInstantsInSeconds.length]; int currentSynthesisIndex = 0; boolean bBroke = false; for (i = 0; i < synthesisInstants.repeatSkipCounts.length; i++) // This is of the same length with total analysis // frames { for (j = 0; j <= synthesisInstants.repeatSkipCounts[i]; j++) { if (i < hntmSignal.frames.length) { output.hntmSignal.frames[currentSynthesisIndex] = new HntmSpeechFrame(hntmSignal.frames[i]); if (leftContexts != null) output.leftContexts[currentSynthesisIndex] = new HntmSpeechFrame(leftContexts[i]); if (rightContexts != null) output.rightContexts[currentSynthesisIndex] = new HntmSpeechFrame(rightContexts[i]); } else { output.hntmSignal.frames[currentSynthesisIndex] = new HntmSpeechFrame( hntmSignal.frames[hntmSignal.frames.length - 1]); if (leftContexts != null) output.leftContexts[currentSynthesisIndex] = new HntmSpeechFrame( leftContexts[hntmSignal.frames.length - 1]); if (rightContexts != null) output.rightContexts[currentSynthesisIndex] = new HntmSpeechFrame( rightContexts[hntmSignal.frames.length - 1]); } output.hntmSignal.frames[currentSynthesisIndex].tAnalysisInSeconds = synthesisInstants.synthesisInstantsInSeconds[currentSynthesisIndex]; currentSynthesisIndex++; if (currentSynthesisIndex >= output.hntmSignal.frames.length) { bBroke = true; break; } } if (bBroke) break; } output.hntmSignal.originalDurationInSeconds = output.hntmSignal.frames[output.hntmSignal.frames.length - 1].tAnalysisInSeconds; // Time scale noise part if it is based on any waveform representation if (analysisParams.noiseModel == HntmAnalyzerParams.WAVEFORM || analysisParams.noiseModel == HntmAnalyzerParams.VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM || analysisParams.noiseModel == HntmAnalyzerParams.UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM) { // Synthesize original noise waveform double[] noisePartWaveform = NoisePartWaveformSynthesizer.synthesize(hntmSignal, leftContexts, rightContexts, analysisParams); // Time scale noise waveform using TD-PSOLA int noiseWaveformLenMod = SignalProcUtils.time2sample(output.hntmSignal.originalDurationInSeconds, output.hntmSignal.samplingRateInHz); double[] noisePartWaveformMod = new double[noiseWaveformLenMod]; double[] winWgtSum = new double[noiseWaveformLenMod]; Arrays.fill(noisePartWaveformMod, 0.0); Arrays.fill(winWgtSum, 0.0); Window winNoise = null; double[] wgt = null; int wsNoise, halfWsNoise; int analysisStartInd, analysisEndInd; int synthesisMidInd, synthesisStartInd, synthesisEndInd; currentSynthesisIndex = 0; bBroke = false; int k, kStart; boolean invert = false; for (i = 1; i < synthesisInstants.repeatSkipCounts.length - 1; i++) // TO DO: handle first and last noise wwaform // frames { for (j = 0; j <= synthesisInstants.repeatSkipCounts[i]; j++) { analysisStartInd = SignalProcUtils.time2sample(hntmSignal.frames[i - 1].tAnalysisInSeconds, hntmSignal.samplingRateInHz); if (i < hntmSignal.frames.length - 1) analysisEndInd = SignalProcUtils.time2sample(hntmSignal.frames[i + 1].tAnalysisInSeconds, hntmSignal.samplingRateInHz); else analysisEndInd = noisePartWaveform.length - 1; wsNoise = analysisEndInd - analysisStartInd + 1; if (wsNoise > 0) { halfWsNoise = (int) Math.floor(0.5 * wsNoise + 0.5); winNoise = Window.get(analysisParams.harmonicAnalysisWindowType, wsNoise); wgt = winNoise.getCoeffs(); synthesisMidInd = SignalProcUtils.time2sample( output.hntmSignal.frames[currentSynthesisIndex].tAnalysisInSeconds, output.hntmSignal.samplingRateInHz); synthesisStartInd = synthesisMidInd - halfWsNoise; synthesisEndInd = synthesisStartInd + wsNoise - 1; kStart = Math.max(0, synthesisStartInd); for (k = kStart; k <= Math.min(synthesisEndInd, noiseWaveformLenMod - 1); k++) { int kIndex; if (invert) { kIndex = analysisEndInd - (k - kStart); } else { kIndex = k - kStart + analysisStartInd; } int noisePartWaveformSize = noisePartWaveform.length; try { noisePartWaveformMod[k] += noisePartWaveform[kIndex] * wgt[k - kStart]; } catch (ArrayIndexOutOfBoundsException e) { // value of analysisEndInd seems to be 1 too large *sometimes* noisePartWaveformMod[k] += noisePartWaveform[kIndex - 1] * wgt[k - kStart]; } } } currentSynthesisIndex++; invert = !invert; if (currentSynthesisIndex >= output.hntmSignal.frames.length) { bBroke = true; break; } } if (bBroke) break; } for (i = 0; i < winWgtSum.length; i++) { if (winWgtSum[i] > 0.0) noisePartWaveformMod[i] /= winWgtSum[i]; } HntmAnalyzer.packNoisePartWaveforms(output.hntmSignal, noisePartWaveformMod); } // // NOT EFFECTIVE SINCE we do not use the newPhases! // Synthesis uses complexAmps only! // Phase envelope estimation and unwrapping to ensure phase continuity in frequency domain double[][] modifiedPhases = null; if (HntmAnalyzerParams.UNWRAP_PHASES_ALONG_HARMONICS_AFTER_TIME_SCALING) modifiedPhases = HntmAnalyzer.unwrapPhasesAlongHarmonics(output.hntmSignal); // // // float[] tSynthesis = new float[hntmSignalMod.frames.length]; // for (i=0; i<hntmSignalMod.frames.length; i++) // tSynthesis[i] = hntmSignalMod.frames[i].tAnalysisInSeconds; // MaryUtils.plot(tAnalysis); // MaryUtils.plot(tSynthesis); pmodParams = new BasicProsodyModifierParams(tScalesMod, allScalesTimes, pScalesMod, allScalesTimes); // Pitch scale modification if (pmodParams.pScales != null) { float pScale; int pScaleInd; boolean isVoiced; int newTotalHarmonics; float[] newPhases; float harmonicEnergyOrig; float harmonicEnergyMod; int k; int leftHarmonicInd, rightHarmonicInd; float[] currentCeps = null; for (i = 0; i < output.hntmSignal.frames.length; i++) { isVoiced = false; if (output.hntmSignal.frames[i].h != null && output.hntmSignal.frames[i].h.complexAmps != null && output.hntmSignal.frames[i].h.complexAmps.length > 0) isVoiced = true; if (isVoiced) { if (!analysisParams.useHarmonicAmplitudesDirectly) currentCeps = output.hntmSignal.frames[i].h.getCeps(output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz, analysisParams); pScaleInd = MathUtils.findClosest(allScalesTimes, output.hntmSignal.frames[i].tAnalysisInSeconds); pScale = pScalesMod[pScaleInd]; newTotalHarmonics = (int) Math.floor(output.hntmSignal.frames[i].h.complexAmps.length / pScale + 0.5); if (newTotalHarmonics > 0) { harmonicEnergyOrig = 0.0f; double[] amps = new double[output.hntmSignal.frames[i].h.complexAmps.length]; for (k = 0; k < output.hntmSignal.frames[i].h.complexAmps.length; k++) { currentHarmonicNo = (k + 1); if (!analysisParams.useHarmonicAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) amps[k] = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(currentCeps, currentHarmonicNo * output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) amps[k] = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, currentHarmonicNo * output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz); } else { // Linear interpolation using neighbouring harmonic amplitudes leftHarmonicInd = (int) Math.floor(currentHarmonicNo * pScale) - 1; if (leftHarmonicInd < 0) amps[k] = MathUtils.magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[0]); else { rightHarmonicInd = leftHarmonicInd + 1; if (rightHarmonicInd > output.hntmSignal.frames[i].h.complexAmps.length - 1) amps[k] = MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[output.hntmSignal.frames[i].h.complexAmps.length - 1]); else amps[k] = MathUtils .interpolatedSample( (leftHarmonicInd + 1) * output.hntmSignal.frames[i].f0InHz, currentHarmonicNo * pScale * output.hntmSignal.frames[i].f0InHz, (rightHarmonicInd + 1) * output.hntmSignal.frames[i].f0InHz, MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[leftHarmonicInd]), MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[rightHarmonicInd])); } } harmonicEnergyOrig += amps[k] * amps[k]; } // 1. Resample complex amplitude envelopes output.hntmSignal.frames[i].h.complexAmps = MathUtils.interpolate( output.hntmSignal.frames[i].h.complexAmps, newTotalHarmonics); // 2. Scale f0 output.hntmSignal.frames[i].f0InHz *= pScale; double[] linearAmps = new double[newTotalHarmonics]; double[] freqsInHz = new double[newTotalHarmonics]; harmonicEnergyMod = 0.0f; double[] ampsMod = new double[newTotalHarmonics]; if (!analysisParams.useHarmonicAmplitudesDirectly) currentCeps = output.hntmSignal.frames[i].h.getCeps(output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz, analysisParams); for (k = 0; k < newTotalHarmonics; k++) { currentHarmonicNo = (k + 1); if (!analysisParams.useHarmonicAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedPreWarpedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) ampsMod[k] = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, currentHarmonicNo * output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedPostWarpedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) ampsMod[k] = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( currentCeps, currentHarmonicNo * output.hntmSignal.frames[i].f0InHz, output.hntmSignal.samplingRateInHz); } else { // Linear interpolation using neighbouring harmonic amplitudes leftHarmonicInd = (int) Math.floor(currentHarmonicNo * pScale) - 1; if (leftHarmonicInd < 0) ampsMod[k] = MathUtils.magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[0]); else { rightHarmonicInd = leftHarmonicInd + 1; if (rightHarmonicInd > output.hntmSignal.frames[i].h.complexAmps.length - 1) ampsMod[k] = MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[output.hntmSignal.frames[i].h.complexAmps.length - 1]); else ampsMod[k] = MathUtils .interpolatedSample( (leftHarmonicInd + 1) * output.hntmSignal.frames[i].f0InHz, currentHarmonicNo * pScale * output.hntmSignal.frames[i].f0InHz, (rightHarmonicInd + 1) * output.hntmSignal.frames[i].f0InHz, MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[leftHarmonicInd]), MathUtils .magnitudeComplex(output.hntmSignal.frames[i].h.complexAmps[rightHarmonicInd])); } } harmonicEnergyMod += ampsMod[k] * ampsMod[k]; linearAmps[k] = ampsMod[k]; // Not energy scaled yet freqsInHz[k] = currentHarmonicNo * output.hntmSignal.frames[i].f0InHz; } // double[] vocalTractDBOrig = // RegularizedPreWarpedCepstrumEstimator.cepstrum2dbSpectrumValues(hntmSignalMod.frames[i].h.ceps, // SignalProcUtils.halfSpectrumSize(4096)-1, hntmSignalMod.samplingRateInHz); // MaryUtils.plot(vocalTractDBOrig); // double[] vocalTractDBMod = // RegularizedPreWarpedCepstrumEstimator.cepstrum2dbSpectrumValues(hntmSignalMod.frames[i].h.ceps, // SignalProcUtils.halfSpectrumSize(4096)-1, hntmSignalMod.samplingRateInHz); // MaryUtils.plot(vocalTractDBMod); } else output.hntmSignal.frames[i].h.complexAmps = null; } } // // Phase envelope estimation and unwrapping to ensure phase continuity in frequency domain if (HntmAnalyzerParams.UNWRAP_PHASES_ALONG_HARMONICS_AFTER_PITCH_SCALING) HntmAnalyzer.unwrapPhasesAlongHarmonics(output.hntmSignal); // } return output; } } }