/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal.hntm.analysis.pitch;
import java.io.File;
import java.io.IOException;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.sinusoidal.NonharmonicSinusoidalSpeechFrame;
import marytts.signalproc.sinusoidal.NonharmonicSinusoidalSpeechSignal;
import marytts.signalproc.sinusoidal.PitchSynchronousSinusoidalAnalyzer;
import marytts.signalproc.sinusoidal.SinusoidalAnalysisParams;
import marytts.signalproc.sinusoidal.pitch.BaseSinusoidalPitchTracker;
import marytts.signalproc.window.Window;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
* This pitch tracker is based on Quatieri´s book
*
* @author Oytun Türk
*/
public class HarmonicPitchTracker extends BaseSinusoidalPitchTracker {
public HarmonicPitchTracker() {
}
public double performanceCriterion(NonharmonicSinusoidalSpeechFrame sinFrame, float f0Candidate, int samplingRate) {
int l, k, kw0Ind, Kw0, K;
double kw0;
double Q = 0.0;
double tempSum, tempSum2;
double freqHz;
double maxFreqInHz = Math.max(1000.0, f0Candidate + 50.0);
double maxFreqInRadians = SignalProcUtils.hz2radian(maxFreqInHz, samplingRate);
Kw0 = Math.max(0, (int) Math.floor(maxFreqInHz / f0Candidate + 0.5 + 1));
K = 0;
while (sinFrame.sinusoids[K].freq < maxFreqInRadians) {
K++;
if (K >= sinFrame.sinusoids.length - 1) {
K = sinFrame.sinusoids.length - 1;
break;
}
}
tempSum2 = 0.0;
if (K < 1)
Q = -1e+50;
else {
for (l = 1; l <= K; l++) {
tempSum = 0.0;
freqHz = SignalProcUtils.radian2hz(sinFrame.sinusoids[l - 1].freq, samplingRate);
for (k = 1; k <= K; k++) {
kw0 = k * f0Candidate;
kw0Ind = SignalProcUtils.freq2index(kw0, samplingRate, sinFrame.systemAmps.length - 1);
tempSum += sinFrame.systemAmps[kw0Ind]
* Math.abs(MathUtils.sinc(freqHz - kw0, 10 * sinFrame.systemAmps.length));
}
Q += sinFrame.sinusoids[l - 1].amp * tempSum;
}
tempSum = 0.0;
for (k = 1; k <= Kw0; k++) {
kw0 = k * f0Candidate;
kw0Ind = SignalProcUtils.freq2index(kw0, samplingRate, sinFrame.systemAmps.length - 1);
tempSum += sinFrame.systemAmps[kw0Ind] * sinFrame.systemAmps[kw0Ind];
}
Q = Q - 0.5 * tempSum;
}
return Q;
}
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
double[] x = signal.getAllData();
float searchStepInHz = 0.5f;
float minFreqInHz = 40.0f;
float maxFreqInHz = 400.0f;
float windowSizeInSeconds = SinusoidalAnalysisParams.DEFAULT_ANALYSIS_WINDOW_SIZE;
float skipSizeInSeconds = SinusoidalAnalysisParams.DEFAULT_ANALYSIS_SKIP_SIZE;
float deltaInHz = SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ;
// int spectralEnvelopeType = SinusoidalAnalysisParams.LP_SPEC;
int spectralEnvelopeType = SinusoidalAnalysisParams.SEEVOC_SPEC;
// int spectralEnvelopeType = SinusoidalAnalysisParams.REGULARIZED_CEPS;
double startFreqInHz = 0.0;
double endFreqInHz = 0.5 * samplingRate;
int windowType = Window.HAMMING;
boolean bRefinePeakEstimatesParabola = false;
boolean bRefinePeakEstimatesBias = false;
boolean bSpectralReassignment = false;
boolean bAdjustNeighFreqDependent = false;
SinusoidalAnalysisParams params = new SinusoidalAnalysisParams(samplingRate, startFreqInHz, endFreqInHz, windowType,
bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent);
String strPitchFileIn = args[0].substring(0, args[0].length() - 4) + ".ptc";
PitchReaderWriter f0 = new PitchReaderWriter(strPitchFileIn);
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, samplingRate, x.length,
f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds, true, 0);
PitchSynchronousSinusoidalAnalyzer sa = new PitchSynchronousSinusoidalAnalyzer(params);
NonharmonicSinusoidalSpeechSignal ss = sa.extractSinusoidsFixedRate(x, windowSizeInSeconds, skipSizeInSeconds, deltaInHz,
spectralEnvelopeType, f0.contour, (float) f0.header.windowSizeInSeconds, (float) f0.header.skipSizeInSeconds);
HarmonicPitchTracker p = new HarmonicPitchTracker();
float[] f0s = p.pitchTrack(ss, samplingRate, searchStepInHz, minFreqInHz, maxFreqInHz);
String strPitchFileOut = args[0].substring(0, args[0].length() - 4) + ".ptcSin";
PitchReaderWriter.write_pitch_file(strPitchFileOut, f0s, windowSizeInSeconds, skipSizeInSeconds, samplingRate);
for (int i = 0; i < f0s.length; i++)
System.out.println(String.valueOf(i * skipSizeInSeconds + 0.5f * windowSizeInSeconds) + " sec. = "
+ String.valueOf(f0s[i]));
}
}