/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal.hntm.analysis;
import java.io.IOException;
import java.util.Arrays;
import marytts.signalproc.analysis.Labels;
import marytts.signalproc.analysis.LpcAnalyser;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.analysis.RegularizedCepstrumEstimator;
import marytts.signalproc.analysis.RegularizedPostWarpedCepstrumEstimator;
import marytts.signalproc.analysis.RegularizedPreWarpedCepstrumEstimator;
import marytts.signalproc.analysis.LpcAnalyser.LpCoeffs;
import marytts.signalproc.sinusoidal.NonharmonicSinusoidalSpeechFrame;
import marytts.signalproc.sinusoidal.PeakMatchedSinusoidalSynthesizer;
import marytts.signalproc.sinusoidal.SinusoidalAnalysisParams;
import marytts.signalproc.sinusoidal.SinusoidalAnalyzer;
import marytts.signalproc.sinusoidal.SinusoidalTracks;
import marytts.signalproc.sinusoidal.hntm.analysis.pitch.HnmPitchVoicingAnalyzer;
import marytts.signalproc.sinusoidal.hntm.synthesis.HarmonicPartLinearPhaseInterpolatorSynthesizer;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizedSignal;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams;
import marytts.signalproc.sinusoidal.hntm.synthesis.hybrid.HarmonicsToTrackConverter;
import marytts.signalproc.window.GaussWindow;
import marytts.signalproc.window.Window;
import marytts.util.math.ArrayUtils;
import marytts.util.math.ComplexNumber;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
import Jampack.H;
import Jampack.Inv;
import Jampack.JampackException;
import Jampack.Parameters;
import Jampack.Times;
import Jampack.Z;
import Jampack.Zdiagmat;
import Jampack.Zmat;
/**
* This class implements a harmonic+noise model for speech as described in
*
* Stylianou, Y., 1996, "Harmonic plus Noise Models for Speech, combined with Statistical Methods, for Speech and Speaker
* Modification", Ph.D. thesis, Ecole Nationale Supérieure des Télécommunications. (Chapter 3, A Harmonic plus Noise Model, HNM)
*
* @author Oytun Türk
*
*/
public class HntmAnalyzer {
public HntmAnalyzer() {
}
public HntmSpeechSignal analyze(short[] x, int fs, PitchReaderWriter f0) {
double[] xDouble = ArrayUtils.copyShort2Double(x);
return analyze(xDouble, fs, f0);
}
public HntmSpeechSignal analyze(double[] x, int fs, PitchReaderWriter f0) {
HntmAnalyzerParams analysisParams = new HntmAnalyzerParams(); // Using default analysis parameters
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis = new HntmSynthesizerParams(); // Using defaulot synthesis
// parameters before noise
// analysis
return analyze(x, fs, f0, null, analysisParams, synthesisParamsBeforeNoiseAnalysis, null);
}
public HntmSpeechSignal analyze(short[] x, int fs, PitchReaderWriter f0, Labels labels, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis) {
return analyze(x, fs, f0, labels, analysisParams, synthesisParamsBeforeNoiseAnalysis, null);
}
public HntmSpeechSignal analyze(short[] x, int fs, PitchReaderWriter f0, Labels labels, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis, String analysisResultsFile) {
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, fs, x.length, f0.header.windowSizeInSeconds,
f0.header.skipSizeInSeconds, false, pitchMarkOffset);
return analyze(x, fs, pm, f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds,
ArrayUtils.copyDouble2Float(f0.contour), labels, analysisParams, synthesisParamsBeforeNoiseAnalysis,
analysisResultsFile);
}
public HntmSpeechSignal analyze(short[] x, int fs, PitchMarks pm, double f0WindowSizeInSeconds, double f0SkipSizeInSeconds,
float[] f0Contour, Labels labels, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis, String analysisResultsFile) {
double[] xDouble = ArrayUtils.copyShort2Double(x);
return analyze(xDouble, fs, pm, f0WindowSizeInSeconds, f0SkipSizeInSeconds, f0Contour, labels, analysisParams,
synthesisParamsBeforeNoiseAnalysis, analysisResultsFile);
}
public HntmSpeechSignal analyze(double[] x, int fs, PitchReaderWriter f0, Labels labels, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis, String analysisResultsFile) {
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, fs, x.length, f0.header.windowSizeInSeconds,
f0.header.skipSizeInSeconds, false, pitchMarkOffset);
return analyze(x, fs, pm, f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds,
ArrayUtils.copyDouble2Float(f0.contour), labels, analysisParams, synthesisParamsBeforeNoiseAnalysis,
analysisResultsFile);
}
public HntmSpeechSignal analyze(double[] x, int fs, PitchMarks pm, double f0WindowSizeInSeconds, double f0SkipSizeInSeconds,
float[] f0Contour, Labels labels, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsBeforeNoiseAnalysis, String analysisResultsFile) {
HarmonicAndTransientAnalysisOutput output = analyzeHarmonicAndTransientParts(x, fs, pm, f0WindowSizeInSeconds,
f0SkipSizeInSeconds, f0Contour, labels, analysisParams);
analyzeNoisePart(x, output.hnmSignal, analysisParams, synthesisParamsBeforeNoiseAnalysis, output.isInTransientSegments);
if (analysisResultsFile != null) {
try {
output.hnmSignal.write(analysisResultsFile);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return output.hnmSignal;
}
public HarmonicAndTransientAnalysisOutput analyzeHarmonicAndTransientParts(double[] x, int fs, PitchReaderWriter f0,
Labels labels, HntmAnalyzerParams analysisParams) {
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, fs, x.length, f0.header.windowSizeInSeconds,
f0.header.skipSizeInSeconds, false, pitchMarkOffset);
return analyzeHarmonicAndTransientParts(x, fs, pm, f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds,
ArrayUtils.copyDouble2Float(f0.contour), labels, analysisParams);
}
public HarmonicAndTransientAnalysisOutput analyzeHarmonicAndTransientParts(double[] x, int fs, PitchMarks pm,
double f0WindowSizeInSeconds, double f0SkipSizeInSeconds, float[] f0Contour, Labels labels,
HntmAnalyzerParams analysisParams) {
HarmonicAndTransientAnalysisOutput output = new HarmonicAndTransientAnalysisOutput();
output.hnmSignal = null;
float originalDurationInSeconds = SignalProcUtils.sample2time(x.length, fs);
int lpOrder = SignalProcUtils.getLPOrder(fs);
int i, j, k;
// // TO DO
// Step1. Initial pitch estimation: Current version just reads from a file
if (pm != null) {
// FileUtils.writeTextFile(pm.pitchMarks, "d:\\pm.txt");
float[] initialF0s = ArrayUtils.subarray(f0Contour, 0, f0Contour.length);
// float[] initialF0s = HnmPitchVoicingAnalyzer.estimateInitialPitch(x, samplingRate, windowSizeInSeconds,
// skipSizeInSeconds, f0MinInHz, f0MaxInHz, windowType);
//
// Step2: Do for each frame (at 10 ms skip rate):
// 2.a. Voiced/Unvoiced decision
// 2.b. If voiced, maximum frequency of voicing estimation
// Otherwise, maximum frequency of voicing is set to 0.0
analysisParams.hnmPitchVoicingAnalyzerParams.f0AnalysisWindowSizeInSeconds = (float) f0WindowSizeInSeconds;
analysisParams.hnmPitchVoicingAnalyzerParams.f0AnalysisSkipSizeInSeconds = (float) f0SkipSizeInSeconds;
float[] maxFrequencyOfVoicings = HnmPitchVoicingAnalyzer.analyzeVoicings(x, fs, initialF0s,
analysisParams.hnmPitchVoicingAnalyzerParams, analysisParams.isSilentAnalysis);
float maxFreqOfVoicingInHz;
// maxFreqOfVoicingInHz = HnmAnalyzer.FIXED_MAX_FREQ_OF_VOICING_FOR_QUICK_TEST; //This should come from the above
// automatic analysis
// 2.c. Refined pitch estimation
float[] f0s = ArrayUtils.subarray(f0Contour, 0, f0Contour.length);
// float[] f0s = HnmPitchVoicingAnalyzer.estimateRefinedPitch(fftSize, fs, leftNeighInHz, rightNeighInHz,
// searchStepInHz, initialF0s, maxFrequencyOfVoicings);
// //
// Step3. Determine analysis time instants based on refined pitch values.
// (Pitch synchronous if voiced, 10 ms skip if unvoiced)
double numPeriods = analysisParams.numPeriodsHarmonicsExtraction;
double f0InHz = f0s[0];
double T0Double;
double assumedF0ForUnvoicedInHz = 100.0;
boolean isVoiced, isNoised;
if (f0InHz > 10.0)
isVoiced = true;
else {
isVoiced = false;
f0InHz = assumedF0ForUnvoicedInHz;
}
int ws;
// int totalFrm = (int)Math.floor(pm.pitchMarks.length-numPeriods+0.5);
int totalFrm = pm.pitchMarks.length - 1;
if (totalFrm > pm.pitchMarks.length - 1)
totalFrm = pm.pitchMarks.length - 1;
// Extract frames and analyze them
double[] frm = null; // Extracted pitch synchronously
double[] frmWindowed = null;
int pmInd = 0;
boolean isOutputToTextFile = false;
Window win;
int closestInd;
String[] transientPhonemesList = { "p", "t", "k", "pf", "ts", "tS" };
if (analysisParams.harmonicModel == HntmAnalyzerParams.HARMONICS_PLUS_NOISE)
output.hnmSignal = new HntmSpeechSignal(totalFrm, fs, originalDurationInSeconds);
else if (analysisParams.harmonicModel == HntmAnalyzerParams.HARMONICS_PLUS_TRANSIENTS_PLUS_NOISE && labels != null)
output.hnmSignal = new HntmPlusTransientsSpeechSignal(totalFrm, fs, originalDurationInSeconds,
labels.items.length);
boolean isPrevVoiced = false;
int numHarmonics = 0;
int prevNumHarmonics = 0;
ComplexNumber[] harmonics = null;
ComplexNumber[] noiseHarmonics = null;
double[] phases;
double[] dPhases;
double[] dPhasesPrev = null;
int MValue;
int maxVoicingIndex;
int currentLabInd = 0;
boolean isInTransientSegment = false;
int transientSegmentInd = 0;
output.isInTransientSegments = new boolean[totalFrm];
Arrays.fill(output.isInTransientSegments, false);
for (i = 0; i < totalFrm; i++) {
f0InHz = pm.f0s[i];
// T0 = pm.pitchMarks[i+1]-pm.pitchMarks[i];
if (f0InHz > 10.0)
T0Double = SignalProcUtils.time2sampleDouble(1.0 / f0InHz, fs);
else
T0Double = SignalProcUtils.time2sampleDouble(1.0 / assumedF0ForUnvoicedInHz, fs);
ws = (int) Math.floor(numPeriods * T0Double + 0.5);
// if (ws%2==0) //Always use an odd window size to have a zero-phase analysis window
// ws++;
output.hnmSignal.frames[i].tAnalysisInSeconds = (((float) pm.pitchMarks[i + 1]) / fs); // Middle of analysis frame
if (analysisParams.harmonicModel == HntmAnalyzerParams.HARMONICS_PLUS_TRANSIENTS_PLUS_NOISE && labels != null) {
while (labels.items[currentLabInd].time < output.hnmSignal.frames[i].tAnalysisInSeconds) {
currentLabInd++;
if (currentLabInd > labels.items.length - 1) {
currentLabInd = labels.items.length - 1;
break;
}
}
if (!isInTransientSegment) // Perhaps start of a new transient segment
{
for (j = 0; j < transientPhonemesList.length; j++) {
if (labels.items[currentLabInd].phn.compareTo(transientPhonemesList[j]) == 0) {
isInTransientSegment = true;
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[transientSegmentInd] = new TransientSegment();
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[transientSegmentInd].startTime = Math
.max(0.0f, (((float) pm.pitchMarks[i]) / fs)
- analysisParams.overlapBetweenTransientAndNontransientSectionsInSeconds);
break;
}
}
} else // Perhaps end of an existing transient segment
{
boolean isTransientPhoneme = false;
for (j = 0; j < transientPhonemesList.length; j++) {
if (labels.items[currentLabInd].phn.compareTo(transientPhonemesList[j]) == 0) {
isTransientPhoneme = true;
break;
}
}
if (!isTransientPhoneme) // End of transient segment, put it in transient part
{
float endTime = Math.min((((float) pm.pitchMarks[i] + 0.5f * ws) / fs)
+ analysisParams.overlapBetweenTransientAndNontransientSectionsInSeconds,
output.hnmSignal.originalDurationInSeconds);
int waveformStartInd = Math
.max(0,
SignalProcUtils
.time2sample(
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[transientSegmentInd].startTime,
fs));
int waveformEndInd = Math.min(x.length - 1, SignalProcUtils.time2sample(endTime, fs));
if (waveformEndInd - waveformStartInd + 1 > 0) {
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[transientSegmentInd].waveform = new int[waveformEndInd
- waveformStartInd + 1];
for (j = waveformStartInd; j <= waveformEndInd; j++)
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[transientSegmentInd].waveform[j
- waveformStartInd] = (int) x[j];
}
transientSegmentInd++;
isInTransientSegment = false;
}
}
}
maxVoicingIndex = SignalProcUtils.time2frameIndex(output.hnmSignal.frames[i].tAnalysisInSeconds,
analysisParams.hnmPitchVoicingAnalyzerParams.mvfAnalysisWindowSizeInSeconds,
analysisParams.hnmPitchVoicingAnalyzerParams.mvfAnalysisSkipSizeInSeconds);
maxVoicingIndex = Math.min(maxVoicingIndex, maxFrequencyOfVoicings.length - 1);
maxFreqOfVoicingInHz = maxFrequencyOfVoicings[maxVoicingIndex];
// if (hnmSignal.frames[i].tAnalysisInSeconds<0.7 && f0InHz>10.0)
if (f0InHz > 10.0)
output.hnmSignal.frames[i].maximumFrequencyOfVoicingInHz = maxFreqOfVoicingInHz;
else
output.hnmSignal.frames[i].maximumFrequencyOfVoicingInHz = 0.0f;
numHarmonics = (int) Math.floor(output.hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / f0InHz + 0.5);
isVoiced = numHarmonics > 0 ? true : false;
isNoised = output.hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5 * fs ? true : false;
if (isInTransientSegment) {
output.hnmSignal.frames[i].h = null;
output.hnmSignal.frames[i].n = null;
} else {
if (!isVoiced) {
f0InHz = assumedF0ForUnvoicedInHz;
T0Double = SignalProcUtils.time2sampleDouble(1.0 / f0InHz, fs);
ws = (int) Math.floor(numPeriods * T0Double + 0.5);
// if (ws%2==0) //Always use an odd window size to have a zero-phase analysis window
// ws++;
// output.hnmSignal.frames[i].tAnalysisInSeconds = (float)((pm.pitchMarks[i]+0.5*ws)/fs); //Middle of
// analysis frame
}
frm = new double[ws];
Arrays.fill(frm, 0.0);
int frmStartIndex;
if (i == 0)
frmStartIndex = 0;
else
frmStartIndex = SignalProcUtils.time2sample(output.hnmSignal.frames[i].tAnalysisInSeconds - 0.5
* numPeriods / f0InHz, fs);
int frmEndIndex = frmStartIndex + ws - 1;
// System.out.println(String.valueOf(frmStartIndex) + " " + String.valueOf(frmEndIndex));
int count = 0;
for (j = Math.max(0, frmStartIndex); j < Math.min(frmEndIndex, x.length - 1); j++)
frm[count++] = x[j];
/*
* for (j=pm.pitchMarks[i]; j<Math.min(pm.pitchMarks[i]+ws-1, x.length); j++) frm[j-pm.pitchMarks[i]] = x[j];
*/
win = Window.get(analysisParams.harmonicAnalysisWindowType, ws);
win.normalizePeakValue(1.0f);
double[] wgt = win.getCoeffs();
// double[] wgtSquared = new double[wgt.length];
// for (j=0; j<wgt.length; j++)
// wgtSquared[j] = wgt[j]*wgt[j];
// Step4. Estimate complex amplitudes of harmonics if voiced
// The phase of the complex amplitude is the phase and its magnitude is the absolute amplitude of the harmonic
if (isVoiced) {
// Time-domain full cross-correlation, i.e. harmonics are correlated
if (!analysisParams.useJampackInAnalysis)
harmonics = estimateComplexAmplitudes(frm, wgt, f0InHz, numHarmonics, fs,
analysisParams.hnmPitchVoicingAnalyzerParams.lastCorrelatedHarmonicNeighbour);
else {
try {
harmonics = estimateComplexAmplitudesJampack(frm, wgt, f0InHz, numHarmonics, fs,
analysisParams.hnmPitchVoicingAnalyzerParams.lastCorrelatedHarmonicNeighbour);
} catch (JampackException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// harmonics = estimateComplexAmplitudesTD(frm, f0InHz, numHarmonics,fs);
// harmonics = estimateComplexAmplitudesSplitOptimization(frm, wgt, f0InHz, numHarmonics, fs);
// harmonicAmps = estimateComplexAmplitudesUncorrelated(frm, wgtSquared, numHarmonics, f0InHz, fs);
numHarmonics = harmonics.length;
// Only for visualization
// double[] absMags = MathUtils.magnitudeComplex(harmonicAmps);
// double[] dbMags = MathUtils.amp2db(absMags);
// MaryUtils.plot(dbMags);
//
output.hnmSignal.frames[i].f0InHz = (float) f0InHz;
output.hnmSignal.frames[i].h = new FrameHarmonicPart();
} else {
output.hnmSignal.frames[i].f0InHz = 0.0f;
numHarmonics = 0;
}
output.hnmSignal.frames[i].n = null;
// Step6. Estimate amplitude envelopes
if (numHarmonics > 0) {
if (isVoiced) {
frmWindowed = win.apply(frm, 0);
LpCoeffs lpcs = LpcAnalyser.calcLPC(frmWindowed, lpOrder, 0.0f);
output.hnmSignal.frames[i].n = new FrameNoisePartLpc();
((FrameNoisePartLpc) output.hnmSignal.frames[i].n).setLpCoeffs(lpcs.getA(), (float) lpcs.getGain());
/*
* //Only for display purposes double[] envelope = new
* double[SignalProcUtils.halfSpectrumSize(fftSize)]; for (int ff=0; ff<envelope.length; ff++) {
* envelope[ff] = LpcAnalyser.calcSpecValLinear(hnmSignal.frames[i].lpCoeffs,
* hnmSignal.frames[i].lpGain, SignalProcUtils.index2freq(ff, fs, envelope.length-1), fs); }
* MaryUtils.plot(MathUtils.linear2db(envelope)); SignalProcUtils.displayDFTSpectrumInDB(frm,
* fftSize); MaryUtils.plot(MathUtils.linear2db(linearAmps)); //
*/
output.hnmSignal.frames[i].h.complexAmps = ArrayUtils.copy(harmonics);
if (i == 10) {
// The following is only for visualization
int fftSize = SignalProcUtils.getDFTSize(fs);
double[] frameDftDB = MathUtils.amp2db(SignalProcUtils
.getFrameHalfMagnitudeSpectrum(frm, fftSize));
double[] dbAmps = output.hnmSignal.frames[i].h.getDBAmps();
double[] vocalTractDB = RegularizedPreWarpedCepstrumEstimator.cepstrum2dbSpectrumValues(
output.hnmSignal.frames[i].h.getCeps(output.hnmSignal.frames[i].f0InHz, fs,
analysisParams), SignalProcUtils.halfSpectrumSize(fftSize) - 1, fs);
// FileUtils.toTextFile(freqsInHz, "d:\\freqsInHz.txt");
// FileUtils.toTextFile(frameDftDB, "d:\\frameDftDB.txt");
// FileUtils.toTextFile(dbAmps, "d:\\dbAmps.txt");
// FileUtils.toTextFile(vocalTractDB, "d:\\vocalTractDB.txt");
}
//
}
//
}
}
if (isVoiced && !isInTransientSegment)
isPrevVoiced = true;
else {
prevNumHarmonics = 0;
isPrevVoiced = false;
}
output.isInTransientSegments[i] = isInTransientSegment;
if (!analysisParams.isSilentAnalysis) {
if (analysisParams.harmonicModel == HntmAnalyzerParams.HARMONICS_PLUS_NOISE)
System.out.println("Harmonic analysis completed at "
+ String.valueOf(output.hnmSignal.frames[i].tAnalysisInSeconds) + "s. for frame "
+ String.valueOf(i + 1) + " of " + String.valueOf(totalFrm));
else if (analysisParams.harmonicModel == HntmAnalyzerParams.HARMONICS_PLUS_TRANSIENTS_PLUS_NOISE)
System.out.println("Harmonic and transient analysis completed at "
+ String.valueOf(output.hnmSignal.frames[i].tAnalysisInSeconds) + "s. for frame "
+ String.valueOf(i + 1) + " of " + String.valueOf(totalFrm));
}
}
// Set delta times
for (i = 0; i < output.hnmSignal.frames.length; i++) {
if (i == 0)
output.hnmSignal.frames[i].deltaAnalysisTimeInSeconds = output.hnmSignal.frames[i].tAnalysisInSeconds;
else if (i == output.hnmSignal.frames.length - 1)
output.hnmSignal.frames[i].deltaAnalysisTimeInSeconds = output.hnmSignal.originalDurationInSeconds
- output.hnmSignal.frames[i].tAnalysisInSeconds;
else
output.hnmSignal.frames[i].deltaAnalysisTimeInSeconds = output.hnmSignal.frames[i + 1].tAnalysisInSeconds
- output.hnmSignal.frames[i].tAnalysisInSeconds;
}
}
if (output.hnmSignal instanceof HntmPlusTransientsSpeechSignal) {
int numTransientSegments = 0;
for (i = 0; i < ((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments.length; i++) {
if (((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[i] != null)
numTransientSegments++;
}
if (numTransientSegments > 0) {
TransientPart tempPart = new TransientPart(numTransientSegments);
int count = 0;
for (i = 0; i < ((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments.length; i++) {
if (((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[i] != null) {
tempPart.segments[count++] = new TransientSegment(
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients.segments[i]);
if (count >= numTransientSegments)
break;
}
}
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients = new TransientPart(tempPart);
} else
((HntmPlusTransientsSpeechSignal) output.hnmSignal).transients = null;
}
// NOT EFFECTIVE SINCE we do not use the newPhases!
// Synthesis uses complexAmps only!
// Phase envelope estimation and unwrapping to ensure phase continuity in frequency domain
double[][] modifiedPhases = null;
if (HntmAnalyzerParams.UNWRAP_PHASES_ALONG_HARMONICS_AFTER_ANALYSIS)
modifiedPhases = unwrapPhasesAlongHarmonics(output.hnmSignal);
//
return output;
}
public void analyzeNoisePart(double[] originalSignal, HntmSpeechSignal hnmSignal, HntmAnalyzerParams analysisParams,
HntmSynthesizerParams synthesisParamsForNoiseAnalysis, boolean[] isInTransientSegments) {
// Re-synthesize harmonic and transient parts, obtain noise waveform by simple subtraction
HntmSynthesizedSignal s = new HntmSynthesizedSignal();
// The following lines enable direct amplitudes to be used when synthesizing harmonic part to estimate the noise part
// and makes this estimation independent of actual harmonic part amplitude representation used in synthesis and
// modifications
boolean useHarmonicAmplitudesDirectlyTemp = analysisParams.useHarmonicAmplitudesDirectly;
if (!analysisParams.useHarmonicAmplitudesDirectly)
analysisParams.useHarmonicAmplitudesDirectly = true;
//
if (analysisParams.harmonicSynthesisMethodBeforeNoiseAnalysis == HntmSynthesizerParams.LINEAR_PHASE_INTERPOLATION) {
// s.harmonicPart = HarmonicPartLinearPhaseInterpolatorSynthesizer.synthesize(hnmSignal, analysisParams,
// synthesisParamsForNoiseAnalysis);
HarmonicPartLinearPhaseInterpolatorSynthesizer hs = new HarmonicPartLinearPhaseInterpolatorSynthesizer(hnmSignal,
analysisParams, synthesisParamsForNoiseAnalysis);
s.harmonicPart = hs.synthesizeAll();
} else if (analysisParams.harmonicSynthesisMethodBeforeNoiseAnalysis == HntmSynthesizerParams.CUBIC_PHASE_INTERPOLATION) {
// Convert to pure sinusoidal tracks
SinusoidalTracks st = HarmonicsToTrackConverter.convert(hnmSignal, analysisParams);
//
PeakMatchedSinusoidalSynthesizer ss = new PeakMatchedSinusoidalSynthesizer(hnmSignal.samplingRateInHz);
s.harmonicPart = ss.synthesize(st, true);
}
analysisParams.useHarmonicAmplitudesDirectly = useHarmonicAmplitudesDirectlyTemp;
double[] xHarmTransResynth = SignalProcUtils.addSignals(s.harmonicPart, s.transientPart);
double[] xDiff = SignalProcUtils.addSignals(originalSignal, 1.0, xHarmTransResynth, -1.0);
float originalDurationInSeconds = SignalProcUtils.sample2time(xDiff.length, hnmSignal.samplingRateInHz);
int lpOrder;
if (analysisParams.computeNoisePartLpOrderFromSamplingRate)
lpOrder = SignalProcUtils.getLPOrder(hnmSignal.samplingRateInHz);
else
lpOrder = analysisParams.noisePartLpOrder;
// Only effective for FREQUENCY_DOMAIN_PEAK_PICKING_HARMONICS_ANALYSIS
SinusoidalAnalysisParams sinAnaParams = null;
boolean bRefinePeakEstimatesParabola = false;
boolean bRefinePeakEstimatesBias = false;
boolean bSpectralReassignment = false;
boolean bAdjustNeighFreqDependent = false;
//
int i, j, k;
double[] xPreemphasized = null;
if (analysisParams.preemphasisCoefNoise > 0.0)
xPreemphasized = SignalProcUtils.applyPreemphasis(xDiff, analysisParams.preemphasisCoefNoise);
else
xPreemphasized = ArrayUtils.copy(xDiff);
// // TO DO
// Step1. Initial pitch estimation: Current version just reads from a file
if (hnmSignal != null) {
// Step3. Determine analysis time instants based on refined pitch values.
// (Pitch synchronous if voiced, 10 ms skip if unvoiced)
double numPeriods = analysisParams.numPeriodsHarmonicsExtraction;
double f0InHz = hnmSignal.frames[0].f0InHz;
double T0Double;
double assumedF0ForUnvoicedInHz = 100.0;
boolean isVoiced, isNoised;
if (f0InHz > 10.0)
isVoiced = true;
else {
isVoiced = false;
f0InHz = assumedF0ForUnvoicedInHz;
}
int wsNoise = SignalProcUtils.time2sample(analysisParams.noiseAnalysisWindowDurationInSeconds,
hnmSignal.samplingRateInHz);
if (wsNoise % 2 == 0) // Always use an odd window size to have a zero-phase analysis window
wsNoise++;
Window winNoise = Window.get(analysisParams.noiseAnalysisWindowType, wsNoise);
winNoise.normalizePeakValue(1.0f);
double[] wgtNoise = winNoise.getCoeffs();
double[] wgtSquaredNoise = new double[wgtNoise.length];
for (j = 0; j < wgtNoise.length; j++)
wgtSquaredNoise[j] = wgtNoise[j] * wgtNoise[j];
int fftSizeNoise = SignalProcUtils.getDFTSize(hnmSignal.samplingRateInHz);
int totalFrm = hnmSignal.frames.length;
// Extract frames and analyze them
double[] frmNoise = new double[wsNoise]; // Extracted at fixed window size around analysis time instant since LP
// analysis requires longer windows (40 ms)
int noiseFrmStartInd;
boolean isPrevVoiced = false;
int numHarmonics = 0;
int prevNumHarmonics = 0;
ComplexNumber[] noiseHarmonics = null;
int numNoiseHarmonics = (int) Math.floor((0.5 * hnmSignal.samplingRateInHz) / analysisParams.noiseF0InHz + 0.5);
double[] freqsInHzNoise = new double[numNoiseHarmonics];
for (j = 0; j < numNoiseHarmonics; j++)
freqsInHzNoise[j] = analysisParams.noiseF0InHz * (j + 1);
double[][] M = null;
double[][] MTransW = null;
double[][] MTransWM = null;
double[][] lambdaR = null;
double[][] inverted = null;
if (analysisParams.noiseModel == HntmAnalyzerParams.PSEUDO_HARMONIC) {
if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
M = RegularizedPreWarpedCepstrumEstimator.precomputeM(freqsInHzNoise, hnmSignal.samplingRateInHz,
analysisParams.noisePartCepstrumOrder);
else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
M = RegularizedPostWarpedCepstrumEstimator.precomputeM(freqsInHzNoise, hnmSignal.samplingRateInHz,
analysisParams.noisePartCepstrumOrder);
if (M != null) {
MTransW = RegularizedCepstrumEstimator.precomputeMTransW(M, null);
MTransWM = RegularizedCepstrumEstimator.precomputeMTransWM(MTransW, M);
lambdaR = RegularizedCepstrumEstimator.precomputeLambdaR(
analysisParams.regularizedCepstrumEstimationLambdaNoise, analysisParams.noisePartCepstrumOrder);
inverted = RegularizedCepstrumEstimator.precomputeInverted(MTransWM, lambdaR);
}
}
int waveformNoiseStartInd = 0;
int waveformNoiseEndInd = 0;
double[][] frameWaveforms = null;
if (analysisParams.noiseModel == HntmAnalyzerParams.WAVEFORM
|| analysisParams.noiseModel == HntmAnalyzerParams.VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM
|| analysisParams.noiseModel == HntmAnalyzerParams.UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM) {
frameWaveforms = new double[totalFrm][];
for (i = 0; i < totalFrm; i++)
frameWaveforms[i] = null;
}
for (i = 0; i < totalFrm; i++) {
f0InHz = hnmSignal.frames[i].f0InHz;
if (f0InHz > 10.0) {
T0Double = SignalProcUtils.time2sampleDouble(1.0 / f0InHz, hnmSignal.samplingRateInHz);
numHarmonics = (int) Math.floor(hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / f0InHz + 0.5);
} else {
T0Double = SignalProcUtils.time2sampleDouble(1.0 / assumedF0ForUnvoicedInHz, hnmSignal.samplingRateInHz);
numHarmonics = 0;
}
isVoiced = numHarmonics > 0 ? true : false;
isNoised = hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < 0.5 * hnmSignal.samplingRateInHz ? true : false;
if (i > 0) {
prevNumHarmonics = (int) Math.floor(hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz / f0InHz + 0.5);
isPrevVoiced = prevNumHarmonics > 0 ? true : false;
}
if (!isInTransientSegments[i]) {
if (!isVoiced)
f0InHz = assumedF0ForUnvoicedInHz;
T0Double = SignalProcUtils.time2sampleDouble(1.0 / f0InHz, hnmSignal.samplingRateInHz);
// Perform full-spectrum LPC analysis for generating noise part
Arrays.fill(frmNoise, 0.0);
noiseFrmStartInd = Math.max(
0,
SignalProcUtils.time2sample(hnmSignal.frames[i].tAnalysisInSeconds - 0.5f
* analysisParams.noiseAnalysisWindowDurationInSeconds, hnmSignal.samplingRateInHz));
for (j = noiseFrmStartInd; j < Math.min(noiseFrmStartInd + wsNoise, xDiff.length); j++)
frmNoise[j - noiseFrmStartInd] = xPreemphasized[j];
// hnmSignal.frames[i].harmonicTotalEnergyRatio = 1.0f;
if (isNoised) {
double[] y = null;
if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz
- analysisParams.overlapBetweenHarmonicAndNoiseRegionsInHz > 0.0f)
y = SignalProcUtils.fdFilter(frmNoise, hnmSignal.frames[i].maximumFrequencyOfVoicingInHz
- analysisParams.overlapBetweenHarmonicAndNoiseRegionsInHz,
0.5f * hnmSignal.samplingRateInHz, hnmSignal.samplingRateInHz, fftSizeNoise);
if (analysisParams.hpfBeforeNoiseAnalysis && y != null)
frmNoise = ArrayUtils.copy(y); // Use fdfo only for computing energy ratio between noise and speech
// (if we get this working, we can remove filtering from above and
// include only gain ratio computation)
frmNoise = SignalProcUtils.replaceNaNsWith(frmNoise, 0.0);
frmNoise = MathUtils.add(frmNoise, MathUtils.random(frmNoise.length, -1.0e-20, 1.0e-20));
float origAverageSampleEnergy = (float) SignalProcUtils.getAverageSampleEnergy(frmNoise);
float origNoiseStd = (float) MathUtils.standardDeviation(frmNoise);
if (analysisParams.noiseModel == HntmAnalyzerParams.LPC
|| (analysisParams.noiseModel == HntmAnalyzerParams.VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM && isVoiced)
|| (analysisParams.noiseModel == HntmAnalyzerParams.UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM && !isVoiced)) {
frmNoise = winNoise.apply(frmNoise, 0);
LpCoeffs lpcs = LpcAnalyser.calcLPC(frmNoise, lpOrder, 0.0f);
hnmSignal.frames[i].n = new FrameNoisePartLpc();
((FrameNoisePartLpc) hnmSignal.frames[i].n).setLpCoeffs(lpcs.getA(), (float) lpcs.getGain());
// hnmSignal.frames[i].setLpCoeffs(lpcs.getLPRefc(), 1.0f); //Reflection coefficients (Lattice filter
// required for synthesis!
((FrameNoisePartLpc) hnmSignal.frames[i].n).origAverageSampleEnergy = origAverageSampleEnergy;
((FrameNoisePartLpc) hnmSignal.frames[i].n).origNoiseStd = origNoiseStd;
// Only for display purposes...
// SignalProcUtils.displayLPSpectrumInDB(((FrameNoisePartLpc)hnmSignal.frames[i].n).lpCoeffs,
// ((FrameNoisePartLpc)hnmSignal.frames[i].n).gain, fftSizeNoise);
} else if (analysisParams.noiseModel == HntmAnalyzerParams.PSEUDO_HARMONIC) {
// Note that for noise we use the uncorrelated version of the complex amplitude estimator
// Correlated version resulted in ill-conditioning
// Also, analysis was pretty slow since the number of harmonics is large for pseudo-harmonics of
// noise,
// i.e. for 16 KHz 5 to 8 KHz bandwidth in steps of 100 Hz produces 50 to 80 pseudo-harmonics
// (1) Uncorrelated approach as in Stylianou´s thesis
// noiseHarmonics= estimateComplexAmplitudesUncorrelated(frmNoise, wgtSquaredNoise, numNoiseHarmonics,
// NOISE_F0_IN_HZ, fs);
// OR... (2)Expensive approach which does not work very well
noiseHarmonics = estimateComplexAmplitudes(frmNoise, wgtNoise, analysisParams.noiseF0InHz,
numNoiseHarmonics, hnmSignal.samplingRateInHz,
analysisParams.hnmPitchVoicingAnalyzerParams.lastCorrelatedHarmonicNeighbour);
// OR... (3) Uncorrelated approach using full autocorrelation matrix (checking if there is a problem
// in estimateComplexAmplitudesUncorrelated
// noiseHarmonics= estimateComplexAmplitudesUncorrelated2(frmNoise, wgtSquared, numNoiseHarmonics,
// NOISE_F0_IN_HZ, fs);
double[] linearAmpsNoise = new double[numNoiseHarmonics];
for (j = 0; j < numNoiseHarmonics; j++)
linearAmpsNoise[j] = MathUtils.magnitudeComplex(noiseHarmonics[j]);
// double[] vocalTractDB = MathUtils.amp2db(linearAmpsNoise);
// MaryUtils.plot(vocalTractDB);
hnmSignal.frames[i].n = new FrameNoisePartPseudoHarmonic();
if (!analysisParams.useNoiseAmplitudesDirectly) {
double[] noiseWeights = null;
if (analysisParams.useWeightingInRegularizedCesptrumEstimationNoise) {
GaussWindow g = new GaussWindow(2 * linearAmpsNoise.length);
g.normalizeRange(0.1f, 1.0f);
noiseWeights = g.getCoeffsRightHalf();
if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps = RegularizedPreWarpedCepstrumEstimator
.freqsLinearAmps2cepstrum(linearAmpsNoise, freqsInHzNoise,
hnmSignal.samplingRateInHz, analysisParams.noisePartCepstrumOrder,
noiseWeights, analysisParams.regularizedCepstrumEstimationLambdaNoise);
else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps = RegularizedPostWarpedCepstrumEstimator
.freqsLinearAmps2cepstrum(linearAmpsNoise, freqsInHzNoise,
hnmSignal.samplingRateInHz, analysisParams.noisePartCepstrumOrder,
noiseWeights, analysisParams.regularizedCepstrumEstimationLambdaNoise);
} else {
// (1) This is how amplitudes are represented in Stylianou´s thesis
((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps = RegularizedCepstrumEstimator
.freqsLinearAmps2cepstrum(linearAmpsNoise, MTransW, inverted);
}
} else
((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps = ArrayUtils.subarrayDouble2Float(
linearAmpsNoise, 0, linearAmpsNoise.length); // Use amplitudes directly
/*
* //The following is only for visualization //int fftSize = 4096; //double[] vocalTractDB =
* RegularizedPreWarpedCepstrumEstimator
* .cepstrum2logAmpHalfSpectrum(((FrameNoisePartPseudoHarmonic)hnmSignal.frames[i].n).ceps, fftSize,
* hnmSignal.samplingRateInHz); double[] vocalTractDB = new double[numNoiseHarmonics]; for (j=0;
* j<numNoiseHarmonics; j++) vocalTractDB[j] =
* RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue
* (((FrameNoisePartPseudoHarmonic)hnmSignal.frames[i].n).ceps, (j+1)*HnmAnalyzer.NOISE_F0_IN_HZ,
* hnmSignal.samplingRateInHz); vocalTractDB = MathUtils.amp2db(vocalTractDB);
* MaryUtils.plot(vocalTractDB); //
*/
} else if (analysisParams.noiseModel == HntmAnalyzerParams.WAVEFORM
|| (analysisParams.noiseModel == HntmAnalyzerParams.VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM && !isVoiced)
|| (analysisParams.noiseModel == HntmAnalyzerParams.UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM && isVoiced)) {
if (i < totalFrm - 1)
waveformNoiseEndInd = Math.max(0, SignalProcUtils.time2sample(
hnmSignal.frames[i + 1].tAnalysisInSeconds, hnmSignal.samplingRateInHz));
else
waveformNoiseEndInd = SignalProcUtils.time2sample(hnmSignal.originalDurationInSeconds,
hnmSignal.samplingRateInHz);
frameWaveforms[i] = ArrayUtils.copy(frmNoise);
if (!analysisParams.overlapNoiseWaveformModel)
frameWaveforms[i] = ArrayUtils.subarray(frameWaveforms[i], 0, waveformNoiseEndInd
- waveformNoiseStartInd + 1);
if (isVoiced && analysisParams.hpfBeforeNoiseAnalysis && analysisParams.decimateNoiseWaveform)
frameWaveforms[i] = SignalProcUtils.decimate(frameWaveforms[i], 0.5 * hnmSignal.samplingRateInHz
/ (0.5 * hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz));
waveformNoiseStartInd = waveformNoiseEndInd + 1;
}
} else
hnmSignal.frames[i].n = null;
//
}
if (!analysisParams.isSilentAnalysis)
System.out.println("Noise analysis completed at " + String.valueOf(hnmSignal.frames[i].tAnalysisInSeconds)
+ "s. for frame " + String.valueOf(i + 1) + " of " + String.valueOf(totalFrm));
}
if (analysisParams.noiseModel == HntmAnalyzerParams.WAVEFORM
|| analysisParams.noiseModel == HntmAnalyzerParams.VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM
|| analysisParams.noiseModel == HntmAnalyzerParams.UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM) {
// Synthesize the noise waveform from overlapping waveform frames
double[] noisePartWaveform = HntmAnalyzerNoisePartWaveformSynthesizer.synthesize(hnmSignal, frameWaveforms,
analysisParams, synthesisParamsForNoiseAnalysis);
packNoisePartWaveforms(hnmSignal, noisePartWaveform);
}
}
}
// Pack the noise part waveform in non-overlapping chunks in hnmSignal
public static void packNoisePartWaveforms(HntmSpeechSignal hnmSignal, double[] noisePartWaveform) {
int frameStartInd = 0;
int frameEndInd;
double[] frameWaveform = null;
for (int i = 0; i < hnmSignal.frames.length; i++) {
if (i < hnmSignal.frames.length - 1)
frameEndInd = SignalProcUtils.time2sample(hnmSignal.frames[i].tAnalysisInSeconds, hnmSignal.samplingRateInHz);
else
frameEndInd = noisePartWaveform.length - 1;
frameWaveform = ArrayUtils.subarray(noisePartWaveform, frameStartInd, frameEndInd - frameStartInd + 1);
hnmSignal.frames[i].n = new FrameNoisePartWaveform(frameWaveform);
frameStartInd = frameEndInd + 1;
}
}
public ComplexNumber[] estimateComplexAmplitudes(double[] s, double[] wgt, double f0InHz, int L, double samplingRateInHz,
int lastCorrelatedHarmonicNeighbour) {
int t, i, k;
ComplexNumber[] xpart = null;
double harmonicSample;
double noiseSample;
int M = s.length;
// assert M % 2==1; //Frame length should be odd
int N;
double tShift;
if (M % 2 == 1) {
N = (M - 1) / 2;
tShift = 0.0;
} else {
N = M / 2;
tShift = 0.5 / samplingRateInHz;
}
ComplexNumber[][] B = new ComplexNumber[M][2 * L + 1];
double omega;
ComplexNumber[][] W = MathUtils.diagonalComplexMatrix(wgt);
for (k = -L; k <= L; k++) {
for (t = 0; t < M; t++) {
omega = MathUtils.TWOPI * k * f0InHz * ((t + tShift) / samplingRateInHz);
B[t][k + L] = new ComplexNumber((float) Math.cos(omega), (float) Math.sin(omega));
}
}
ComplexNumber[][] BTWTW = MathUtils.matrixProduct(MathUtils.hermitianTranspoze(B), MathUtils.transpoze(W));
BTWTW = MathUtils.matrixProduct(BTWTW, W);
ComplexNumber[] b = MathUtils.matrixProduct(BTWTW, s); // MathUtils.multiply(s,2.0));
ComplexNumber[][] R = MathUtils.matrixProduct(BTWTW, B);
// Set some R entries equal to zero to neglect interaction between far harmonics
if (lastCorrelatedHarmonicNeighbour > -1 && lastCorrelatedHarmonicNeighbour < L) {
for (i = 0; i < 2 * L + 1; i++) {
for (k = 0; k < 2 * L + 1; k++) {
if (i > k + lastCorrelatedHarmonicNeighbour || k > i + lastCorrelatedHarmonicNeighbour)
R[i][k] = new ComplexNumber(0.0f, 0.0f);
}
}
}
//
/*
* //Use matrix inversion ComplexNumber[][] invR = MathUtils.inverse(R); ComplexNumber[] x =
* MathUtils.matrixProduct(invR,MathUtils.multiplyComplex(b, 1.0));
*/
// Use generalized Levinson
ComplexNumber[] r = new ComplexNumber[R.length];
for (i = 0; i < R.length; i++)
r[i] = new ComplexNumber(R[i][0]);
// FileUtils.toTextFile(R, "d:/string_Rall.txt");
// FileUtils.toTextFile(r, "d:/string_r.txt");
// FileUtils.toTextFile(b, "d:/string_b.txt");
ComplexNumber[] x = MathUtils.levinson(r, MathUtils.multiplyComplex(b, 1.0));
// FileUtils.toTextFile(x, "d:/string_x.txt");
xpart = new ComplexNumber[L];
for (k = L + 1; k <= 2 * L; k++)
xpart[k - (L + 1)] = new ComplexNumber(2.0f * x[k].real, 2.0f * x[k].imag);
// MaryUtils.plot(MathUtils.amp2db(MathUtils.magnitudeComplex(xpart)));
// Display
// MaryUtils.plot(MathUtils.amp2db(MathUtils.magnitudeComplex(x)));
// MaryUtils.plot(MathUtils.amp2db(MathUtils.magnitudeComplex(xpart)));
//
// StringUtils.toTextFile(MathUtils.phaseInRadians(xpart), "d:\\hamming.txt");
return xpart;
}
public ComplexNumber[] estimateComplexAmplitudesJampack(double[] frm, double[] wgt, double f0InHz, int L,
double samplingRateInHz, int lastCorrelatedHarmonicNeighbour) throws JampackException {
if (Parameters.getBaseIndex() != 0)
Parameters.setBaseIndex(0);
int t, i, k;
ComplexNumber[] xpart = null;
double harmonicSample;
double noiseSample;
int M = frm.length;
// assert M % 2==1; //Frame length should be odd
int N;
double tShift;
if (M % 2 == 1) {
N = (M - 1) / 2;
tShift = 0.0;
} else {
N = M / 2;
tShift = 0.5 / samplingRateInHz;
}
Zmat B = new Zmat(M, 2 * L + 1);
double omega;
Zdiagmat W = new Zdiagmat(wgt.length);
for (i = 0; i < wgt.length; i++)
W.put(i, new Z(wgt[i], 0.0));
for (k = -L; k <= L; k++) {
for (t = 0; t < M; t++) {
omega = MathUtils.TWOPI * k * f0InHz * ((t + tShift) / samplingRateInHz);
B.put(t, k + L, new Z(Math.cos(omega), Math.sin(omega)));
}
}
Zmat s = new Zmat(frm.length, 1);
for (i = 0; i < frm.length; i++)
s.put(i, 0, new Z(frm[i], 0.0));
Zmat BT = H.o(B);
Zmat BTWTW = Times.o(BT, W);
BTWTW = Times.o(BTWTW, W);
Zmat b = Times.o(BTWTW, s);
Zmat R = Times.o(BTWTW, B);
// Set some R entries equal to zero to neglect interaction between far harmonics
if (lastCorrelatedHarmonicNeighbour > -1 && lastCorrelatedHarmonicNeighbour < L) {
for (i = 0; i < 2 * L + 1; i++) {
for (k = 0; k < 2 * L + 1; k++) {
if (i > k + lastCorrelatedHarmonicNeighbour || k > i + lastCorrelatedHarmonicNeighbour)
R.put(i, k, new Z(0.0, 0.0));
}
}
}
//
// Use matrix inversion
Zmat invR = Inv.o(R);
Zmat x = Times.o(invR, b);
xpart = new ComplexNumber[L];
for (k = L + 1; k <= 2 * L; k++)
xpart[k - (L + 1)] = new ComplexNumber(2.0f * x.get(k, 0).re, 2.0f * x.get(k, 0).im);
return xpart;
}
public ComplexNumber[] estimateComplexAmplitudesTD(double[] x, double f0InHz, int L, double samplingRateInHz) {
int N = x.length;
double[][] Q = new double[N][2 * L];
double w0InRadians = SignalProcUtils.hz2radian(f0InHz, (int) samplingRateInHz);
int i, j;
for (i = 0; i < N; i++) {
for (j = 1; j <= L; j++)
Q[i][j - 1] = Math.cos(i * j * w0InRadians);
for (j = L + 1; j <= 2 * L; j++)
Q[i][j - 1] = Math.sin(i * (j - L) * w0InRadians);
}
double[][] QT = MathUtils.transpoze(Q);
double[][] QTQInv = MathUtils.inverse(MathUtils.matrixProduct(QT, Q));
double[] hopt = MathUtils.matrixProduct(MathUtils.matrixProduct(QTQInv, QT), x);
ComplexNumber[] xpart = new ComplexNumber[L];
for (i = 0; i < L; i++)
xpart[i] = new ComplexNumber((float) hopt[i], (float) hopt[i + L]);
return xpart;
}
// Complex amplitude estimation for harmonics in time domain (Diagonal correlation matrix approach, harmonics assumed
// independent)
// The main advantage is the operation being in time domain.
// Therefore, we can use window sizes as short as two pitch periods and track rapid changes in amplitudes and phases
// N: local pitch period in samples
// wgtSquared: window weights squared
// frm: speech frame to be analysed (its length should be 2*N+1)
// Uses Equation 3.32 in Stylianou`s thesis
// This requires harmonics to be uncorrelated.
// We use this for estimating pseudo-harmonic amplitudes of the noise part.
// Note that this function is equivalent to peak-picking in the frequency domain in Quatieri´s sinusoidal framework.
public ComplexNumber[] estimateComplexAmplitudesUncorrelated(double[] frm, double[] wgtSquared, int L, double f0InHz,
double samplingRateInHz) {
int M = frm.length;
int N;
double tShift;
if (M % 2 == 1) {
N = (M - 1) / 2;
tShift = 0.0;
} else {
N = M / 2;
tShift = 0.5 / samplingRateInHz;
}
ComplexNumber tmp;
int t, k;
double omega;
double denum = 0.0;
for (t = 0; t < M; t++)
denum += wgtSquared[t];
ComplexNumber[] Ak = new ComplexNumber[L];
for (k = 1; k <= L; k++) {
Ak[k - 1] = new ComplexNumber(0.0f, 0.0f);
for (t = 0; t < M; t++) {
omega = -1.0 * MathUtils.TWOPI * k * f0InHz * ((double) (t + tShift) / samplingRateInHz);
tmp = new ComplexNumber((float) (wgtSquared[t] * frm[t] * Math.cos(omega)),
(float) (wgtSquared[t] * frm[t] * Math.sin(omega)));
Ak[k - 1] = MathUtils.addComplex(Ak[k - 1], tmp);
}
Ak[k - 1] = MathUtils.divide(Ak[k - 1], denum);
}
return Ak;
}
public ComplexNumber[] estimateComplexAmplitudesPeakPicking(double[] windowedFrm, int spectralEnvelopeType, boolean isVoiced,
float f0, float maximumFreqOfVoicingInHz, boolean bEstimateHNMVoicing, SinusoidalAnalysisParams params) {
int k;
ComplexNumber[] x = null;
int numHarmonics = (int) Math.floor(maximumFreqOfVoicingInHz / f0 + 0.5);
if (numHarmonics > 0) {
/*
* float[] initialPeakLocationsInHz = new float[numHarmonics+1]; initialPeakLocationsInHz[0] = 0.0f; for (int i=1;
* i<numHarmonics+1; i++) initialPeakLocationsInHz[i] = i*f0;
*/
float[] initialPeakLocationsInHz = new float[numHarmonics];
for (int i = 0; i < numHarmonics; i++)
initialPeakLocationsInHz[i] = (i + 1) * f0;
NonharmonicSinusoidalSpeechFrame nhs = SinusoidalAnalyzer.analyze_frame(windowedFrm, false, spectralEnvelopeType,
isVoiced, f0, maximumFreqOfVoicingInHz, bEstimateHNMVoicing, params, initialPeakLocationsInHz);
x = new ComplexNumber[nhs.sinusoids.length];
for (int i = 0; i < nhs.sinusoids.length; i++)
x[i] = MathUtils.ampPhase2ComplexNumber(nhs.sinusoids[i].amp, nhs.sinusoids[i].phase);
}
return x;
}
// This is an implementation of the harmonics parameter estimation algorithm
// described in Stylianou`s PhD Thesis, Appendix A
public ComplexNumber[] estimateComplexAmplitudesSplitOptimization(double[] x, double[] w, double f0InHz, int L,
double samplingRateInHz) {
int M = x.length;
if (M % 2 != 1) {
System.out.println("Error! Frame length should be odd...");
return null;
}
int N = (M - 1) / 2;
double w0InRadians = SignalProcUtils.hz2radian(f0InHz, (int) samplingRateInHz);
double[][] W = MathUtils.diagonalMatrix(w);
double[][] B = new double[M][2 * L + 1];
int i, j;
for (i = 1; i <= L; i++) {
for (j = -N; j <= N; j++) {
B[j + N][2 * (i - 1)] = Math.cos(i * j * w0InRadians);
B[j + N][2 * (i - 1) + 1] = Math.sin(i * j * w0InRadians);
}
}
for (j = -N; j <= N; j++)
B[j + N][2 * L] = 1.0;
double[][] BTWT = MathUtils.matrixProduct(MathUtils.transpoze(B), MathUtils.transpoze(W));
double[] Ws = MathUtils.matrixProduct(W, x);
double[] b = MathUtils.matrixProduct(BTWT, Ws);
double[][] Aodd = new double[L + 1][L + 1];
double[][] Aeven = new double[L][L];
double[] r = new double[2 * L + 1];
int n;
for (i = 0; i <= 2 * L; i++) {
r[i] = 0.0;
for (n = 1; n <= N; n++)
r[i] += w[n + N] * w[n + N] * Math.cos(i * n * w0InRadians);
}
for (i = 1; i <= L; i++) {
for (j = 1; j <= L; j++)
Aeven[i - 1][j - 1] = r[Math.abs(i - j)] - r[i + j];
}
for (i = 1; i <= L; i++) {
for (j = 1; j <= L; j++)
Aodd[i - 1][j - 1] = r[Math.abs(i - j)] - r[i + j];
}
for (i = 1; i <= L; i++)
Aodd[i - 1][L] = 2 * r[i] + w[N] * w[N];
Aodd[L][L] = 2 * r[0] + w[N] * w[N];
double[] bodd = new double[L + 1];
double[] beven = new double[L];
for (i = 0; i < L + 1; i++)
bodd[i] = b[2 * i];
for (i = 0; i < L; i++)
beven[i] = b[2 * i + 1];
// Direct solutions using matrix inversion
double[] cSol = MathUtils.matrixProduct(MathUtils.inverse(Aodd), bodd);
double[] sSol = MathUtils.matrixProduct(MathUtils.inverse(Aeven), beven); //
//
// TO DO: We can use Gauss-Seidel method, or Successive Over Relaxation method to solve the two systems of linear
// equations without matrix inversion
// We´ll have to convert Matlab codes of these methods into Java...
// Note that:
// cSol = [c1 c2 ... sL a0]^T
// sSol = [s1 s2 ... sL]^T
// The harmonic amplitudes are {ak} = {a0, sqrt(c1^2+s1^2), sqrt(c2^2+s2^2), ..., sqrt(cL^2+sL^2)
// and the phases are {phik} = {0.0, -arctan(s1/c1), -arctan(s2/c2), ..., -arctan(sL/cL)
ComplexNumber[] xpart = null;
int k;
xpart = new ComplexNumber[L];
for (k = 1; k <= L; k++)
xpart[k - 1] = new ComplexNumber((float) cSol[k - 1], (float) (-1.0 * sSol[k - 1]));
return xpart;
}
// Phase envelope estimation and unwrapping to ensure phase continuity in frequency domain
public static double[][] unwrapPhasesAlongHarmonics(HntmSpeechSignal hntmSignal) {
double[] maximumFrequencyOfVoicingsInHz = hntmSignal.getMaximumFrequencyOfVoicings();
double[][] phases = hntmSignal.getPhasesInRadians();
double[][] newPhases = null;
if (phases != null) {
int i, k;
int maxNumHarmonics = 0;
int numHarmonicsCurrentFrame;
int totalFrames = maximumFrequencyOfVoicingsInHz.length;
assert phases.length == totalFrames;
for (i = 0; i < totalFrames; i++) {
if (maximumFrequencyOfVoicingsInHz[i] > 0.0f && phases[i] != null) {
numHarmonicsCurrentFrame = phases.length;
if (numHarmonicsCurrentFrame > maxNumHarmonics)
maxNumHarmonics = numHarmonicsCurrentFrame;
}
}
double[] dphaseks = new double[maxNumHarmonics];
Arrays.fill(dphaseks, 0.0f);
newPhases = new double[phases.length][];
for (i = 0; i < phases.length; i++) {
if (phases[i] != null)
newPhases[i] = new double[phases[i].length];
}
boolean isPrevTrackVoiced;
int Mk;
for (i = 0; i < totalFrames; i++) {
if (maximumFrequencyOfVoicingsInHz[i] > 0.0f && phases[i] != null) {
System.arraycopy(phases[i], 0, newPhases[i], 0, phases[i].length);
for (k = 1; k < phases[i].length - 1; k++) {
isPrevTrackVoiced = false;
if (i > 0 && phases[i - 1] != null && phases[i - 1].length > k)
isPrevTrackVoiced = true;
if (!isPrevTrackVoiced) // First voiced frame of a voiced segment
dphaseks[k - 1] = phases[i][k] - phases[i][k - 1];
Mk = (int) (Math.floor((dphaseks[k - 1] + phases[i][k] - phases[i][k + 1]) / (MathUtils.TWOPI) + 0.5));
newPhases[i][k + 1] += Mk * MathUtils.TWOPI;
dphaseks[k] = newPhases[i][k + 1] - phases[i][k];
}
}
}
}
return newPhases;
}
}