/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal.pitch;
import marytts.signalproc.sinusoidal.NonharmonicSinusoidalSpeechFrame;
import marytts.signalproc.sinusoidal.NonharmonicSinusoidalSpeechSignal;
import marytts.util.math.MathUtils;
/**
* Sinusoidal model based pitch tracker
*
* @author Oytun Türk
*
*/
public class BaseSinusoidalPitchTracker {
float[] f0s; // f0 values in Hz
double[] Qs; // Performance measure for each frame
public class F0Value {
public float f0;
public double maxQ;
public F0Value() {
f0 = 0.0f;
maxQ = 0.0;
}
}
public BaseSinusoidalPitchTracker() {
}
public float[] pitchTrack(NonharmonicSinusoidalSpeechSignal sinSignal, int samplingRate, float searchStepInHz,
float minFreqInHz, float maxFreqInHz) {
f0s = null;
Qs = null;
if (sinSignal.framesSins.length > 0) {
f0s = new float[sinSignal.framesSins.length];
Qs = new double[sinSignal.framesSins.length];
F0Value v;
int i;
for (i = 0; i < sinSignal.framesSins.length; i++) {
v = pitchAnalyzeFrame(sinSignal.framesSins[i], samplingRate, searchStepInHz, minFreqInHz, maxFreqInHz);
f0s[i] = v.f0;
Qs[i] = v.maxQ;
// System.out.println("f0=" + String.valueOf(f0s[i]) + " " + String.valueOf(i+1) + " of " +
// String.valueOf(sinSignal.framesSins.length));
}
f0s = postProcessTrack(f0s, Qs);
}
return f0s;
}
public F0Value pitchAnalyzeFrame(NonharmonicSinusoidalSpeechFrame sinFrame, int samplingRate, float searchStepInHz,
float minFreqInHz, float maxFreqInHz) {
F0Value v = new F0Value();
double[] Q = null;
float w0;
int i;
if (sinFrame != null) {
int numCandidates = (int) Math.floor((maxFreqInHz - minFreqInHz) / searchStepInHz + 1 + 0.5);
Q = new double[numCandidates];
int stInd, enInd;
for (i = 0; i < numCandidates; i++) {
w0 = i * searchStepInHz + minFreqInHz;
Q[i] = performanceCriterion(sinFrame, w0, samplingRate);
}
// MaryUtils.plot(Q, true, 1000);
}
// Search for distinct peaks in the Q-function
if (Q != null) {
v.maxQ = MathUtils.getMax(Q);
int numNeighs = (int) Math.floor(10.0f / searchStepInHz + 0.5);
int[] maxInds = MathUtils.getExtrema(Q, numNeighs, numNeighs, true, 0, Q.length - 1, 0.1 * v.maxQ);
if (maxInds != null) {
int maxInd = 0;
v.maxQ = Q[maxInds[maxInd]];
for (i = 1; i < maxInds.length; i++) {
if (Q[maxInds[i]] > v.maxQ) {
v.maxQ = Q[maxInds[i]];
maxInd = i;
}
}
v.f0 = maxInds[maxInd] * searchStepInHz + minFreqInHz;
}
}
if (v.maxQ < 5.0e-5)
v.f0 = 0.0f;
char chTab = 9;
System.out.println(String.valueOf(v.f0) + chTab + String.valueOf(v.maxQ));
return v;
}
// Baseline version that does nothing, implement functionality in derived classes
public double performanceCriterion(NonharmonicSinusoidalSpeechFrame sinFrame, float f0Candidate, int samplingRate) {
return -1.0f;
}
// Post process f0 values to eliminate obvious pitch halving errors
// Note that comb filter based sinusoidal pitch tracker solves pitch doubling automatically.
// But there is always a pitch halving possibility
// This function tries to eliminate obvious halving errors,
// i.e. isolated f0 values that are approximately half of the neighboring f0 values
// The function also checks for isolated voiced or unvoiced f0 values and tries to correct them
public float[] postProcessTrack(float[] f0sIn, double[] QsIn) {
float[] f0sOut = null;
if (f0sIn != null) {
int i, j;
int numfrm = f0sIn.length;
float avgContextF0;
int contextCount = 1;
f0sOut = new float[numfrm];
System.arraycopy(f0sIn, 0, f0sOut, 0, numfrm);
// Search for isolated unvoiceds & voiceds:
for (i = 1; i < numfrm - 1; i++) {
if (f0sOut[i] <= 10.0f && f0sOut[i - 1] > 10.0f && f0sOut[i + 1] > 10.0f) // isolated unvoiced
f0sOut[i] = 0.5f * (f0sOut[i - 1] + f0sOut[i + 1]);
else if (f0sOut[i] > 10.0f && f0sOut[i - 1] <= 10.0f && f0sOut[i + 1] <= 10.0f) // isolated voiced
f0sOut[i] = 0.0f;
}
// Search for isolated halvings & doublings:
// (Doubling check should not be necessary for sinusoidal based pitch trackers!)
for (i = contextCount; i < numfrm - contextCount; i++) {
boolean bAllVoiced = true;
for (j = -contextCount; j <= contextCount; j++) {
if (f0sOut[i + j] < 10.0f) {
bAllVoiced = false;
break;
}
}
if (bAllVoiced) {
avgContextF0 = 0.0f;
for (j = -contextCount; j < 0; j++)
avgContextF0 += f0sOut[i + j];
for (j = 1; j <= contextCount; j++)
avgContextF0 += f0sOut[i + j];
avgContextF0 = avgContextF0 / (2.0f * contextCount);
if (Math.abs(f0sOut[i] - 0.5 * avgContextF0) < 10.0f) // isolated halving
f0sOut[i] *= 2.0f;
else if (Math.abs(f0sOut[i] - 2.0 * avgContextF0) < 10.0f) // isolated doubling
f0sOut[i] *= 0.5f;
}
}
}
return f0sOut;
}
}