/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.window.Window;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
* A pitch synchronous analyzer for sinusoidal models
*
* @author Oytun Türk
*
*/
public class PitchSynchronousSinusoidalAnalyzer extends SinusoidalAnalyzer {
public static float DEFAULT_ANALYSIS_PERIODS = 2.5f;
// fs: Sampling rate in Hz
// windowType: Type of window (See class Window for details)
// bRefinePeakEstimatesParabola: Refine peak and frequency estimates by fitting parabolas?
// bRefinePeakEstimatesBias: Further refine peak and frequency estimates by correcting bias?
// (Only effective when bRefinePeakEstimatesParabola=true)
public PitchSynchronousSinusoidalAnalyzer(SinusoidalAnalysisParams paramsIn) {
super(paramsIn);
}
//
// Pitch synchronous analysis
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm) {
return analyzePitchSynchronous(x, pm, DEFAULT_ANALYSIS_PERIODS, -1.0f);
}
// Pitch synchronous analysis
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm, float numPeriods) {
return analyzePitchSynchronous(x, pm, numPeriods, -1.0f);
}
// Pitch synchronous analysis using a fixed skip size
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm, float numPeriods, float skipSizeInSeconds) {
return analyzePitchSynchronous(x, pm, numPeriods, skipSizeInSeconds, SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ);
}
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm, float numPeriods, float skipSizeInSeconds,
float deltaInHz) {
return analyzePitchSynchronous(x, pm, numPeriods, skipSizeInSeconds, deltaInHz, SinusoidalAnalysisParams.LP_SPEC);
}
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm, float numPeriods, float skipSizeInSeconds,
float deltaInHz, int spectralEnvelopeType) {
return analyzePitchSynchronous(x, pm, numPeriods, skipSizeInSeconds, deltaInHz, spectralEnvelopeType, null);
}
/*
* Pitch synchronous analysis
*
* x: Speech/Audio signal to be analyzed pitchMarks: Integer array of sample indices for pitch period start instants
* numPeriods: Number of pitch periods to be used in analysis skipSizeInSeconds: Skip size for fixed skip rate but pitch
* synchronous analysis (Enter -1.0f for using adaptive skip rates of one complete pitch periods) deltaInHz: Maximum allowed
* frequency deviation when creating sinusoidal tracks spectralEnvelopeType: Spectral envelope estimation method with possible
* values NO_SPEC (do not compute spectral envelope) LP_SPEC (linear prediction based envelope) SEEVOC_SPEC (Spectral Envelope
* Estimation Vocoder based envelope) REGULARIZED_CEPS (Regularized cepstrum based envelope)
*/
public SinusoidalTracks analyzePitchSynchronous(double[] x, PitchMarks pm, float numPeriods, float skipSizeInSeconds,
float deltaInHz, int spectralEnvelopeType, float[] initialPeakLocationsInHz) {
NonharmonicSinusoidalSpeechSignal sinSignal = extracSinusoidsPitchSynchronous(x, pm, numPeriods, skipSizeInSeconds,
deltaInHz, spectralEnvelopeType, initialPeakLocationsInHz);
// Extract sinusoidal tracks
TrackGenerator tg = new TrackGenerator();
SinusoidalTracks sinTracks = tg.generateTracks(sinSignal, deltaInHz, params.fs);
if (sinTracks != null) {
sinTracks.getTrackStatistics();
getGrossStatistics(sinTracks);
}
sinTracks.absMaxOriginal = (float) params.absMax;
sinTracks.totalEnergy = (float) params.totalEnergy;
// Add post-processing functionality to here
return sinTracks;
}
public NonharmonicSinusoidalSpeechSignal extracSinusoidsPitchSynchronous(double[] x, PitchMarks pm, float numPeriods,
float skipSizeInSeconds, float deltaInHz) {
return extracSinusoidsPitchSynchronous(x, pm, numPeriods, skipSizeInSeconds, deltaInHz, SinusoidalAnalysisParams.LP_SPEC);
}
public NonharmonicSinusoidalSpeechSignal extracSinusoidsPitchSynchronous(double[] x, PitchMarks pm, float numPeriods,
float skipSizeInSeconds, float deltaInHz, int spectralEnvelopeType) {
return extracSinusoidsPitchSynchronous(x, pm, numPeriods, skipSizeInSeconds, deltaInHz, SinusoidalAnalysisParams.LP_SPEC,
null);
}
public NonharmonicSinusoidalSpeechSignal extracSinusoidsPitchSynchronous(double[] x, PitchMarks pm, float numPeriods,
float skipSizeInSeconds, float deltaInHz, int spectralEnvelopeType, float[] initialPeakLocationsInHz) {
params.absMax = MathUtils.getAbsMax(x);
params.totalEnergy = SignalProcUtils.energy(x);
boolean bFixedSkipRate = false;
if (skipSizeInSeconds > 0.0f) // Perform fixed skip rate but pitch synchronous analysis. This is useful for time/pitch
// scale modification
{
params.ss = (int) Math.floor(skipSizeInSeconds * params.fs + 0.5);
bFixedSkipRate = true;
}
int totalFrm;
if (!bFixedSkipRate) {
totalFrm = (int) Math.floor(pm.pitchMarks.length - numPeriods + 0.5);
if (totalFrm > pm.pitchMarks.length - 1)
totalFrm = pm.pitchMarks.length - 1;
} else
totalFrm = (int) (x.length / params.ss + 0.5);
// Extract frames and analyze them
double[] frm = null;
int i, j;
int T0;
NonharmonicSinusoidalSpeechSignal sinSignal = new NonharmonicSinusoidalSpeechSignal(totalFrm);
boolean[] isSinusoidNulls = new boolean[totalFrm];
Arrays.fill(isSinusoidNulls, false);
int totalNonNull = 0;
int pmInd = 0;
int currentTimeInd = 0;
float f0;
float currentTime;
boolean isOutputToTextFile = false;
boolean isVoiced;
for (i = 0; i < totalFrm; i++) {
if (!bFixedSkipRate) {
T0 = pm.pitchMarks[i + 1] - pm.pitchMarks[i];
isVoiced = pm.f0s[i] > 10.0 ? true : false;
f0 = pm.f0s[i];
} else {
while (pm.pitchMarks[pmInd] < currentTimeInd) {
pmInd++;
if (pmInd > pm.pitchMarks.length - 1) {
pmInd = pm.pitchMarks.length - 1;
break;
}
}
if (pmInd < pm.pitchMarks.length - 1) {
T0 = pm.pitchMarks[pmInd + 1] - pm.pitchMarks[pmInd];
isVoiced = pm.f0s[pmInd] > 10.0 ? true : false;
} else {
T0 = pm.pitchMarks[pmInd] - pm.pitchMarks[pmInd - 1];
isVoiced = pm.f0s[pmInd - 1] > 10.0 ? true : false;
}
f0 = ((float) params.fs) / T0;
}
params.ws = (int) Math.floor(numPeriods * T0 + 0.5);
if (params.ws % 2 == 0) // Always use an odd window size to have a zero-phase analysis window
params.ws++;
// System.out.println("ws=" + String.valueOf(ws) + " minWindowSize=" + String.valueOf(minWindowSize));
params.ws = Math.max(params.ws, params.minWindowSize);
frm = new double[params.ws];
Arrays.fill(frm, 0.0);
if (!bFixedSkipRate) {
for (j = pm.pitchMarks[i]; j < Math.min(pm.pitchMarks[i] + params.ws - 1, x.length); j++)
frm[j - pm.pitchMarks[i]] = x[j];
} else {
for (j = currentTimeInd; j < Math.min(currentTimeInd + params.ws - 1, x.length); j++)
frm[j - currentTimeInd] = x[j];
}
params.win = Window.get(params.windowType, params.ws);
params.win.applyInline(frm, 0, params.ws);
if (!bFixedSkipRate) {
// currentTime = (float)(0.5*(pitchMarks[i+1]+pitchMarks[i])/fs);
currentTime = (float) ((pm.pitchMarks[i] + 0.5f * params.ws) / params.fs); // Middle of analysis frame
} else {
// currentTime = (currentTimeInd+0.5f*T0)/fs;
currentTime = (currentTimeInd + 0.5f * params.ws) / params.fs; // Middle of analysis frame
currentTimeInd += params.ss;
}
/*
* if (currentTime>0.500 && currentTime<0.520) isOutputToTextFile = true; else isOutputToTextFile = false;
*/
if (initialPeakLocationsInHz == null)
sinSignal.framesSins[i] = (NonharmonicSinusoidalSpeechFrame) analyze_frame(frm, isOutputToTextFile,
spectralEnvelopeType, isVoiced, f0, params);
else
sinSignal.framesSins[i] = (NonharmonicSinusoidalSpeechFrame) analyze_frame(frm, isOutputToTextFile,
spectralEnvelopeType, isVoiced, f0,
initialPeakLocationsInHz[initialPeakLocationsInHz.length - 1] + 50.0f, false, params,
initialPeakLocationsInHz);
if (sinSignal.framesSins[i] != null) {
for (j = 0; j < sinSignal.framesSins[i].sinusoids.length; j++)
sinSignal.framesSins[i].sinusoids[j].frameIndex = i;
}
int peakCount = 0;
if (sinSignal.framesSins[i] == null)
isSinusoidNulls[i] = true;
else {
isSinusoidNulls[i] = false;
totalNonNull++;
peakCount = sinSignal.framesSins[i].sinusoids.length;
}
if (sinSignal.framesSins[i] != null)
sinSignal.framesSins[i].time = currentTime;
System.out.println("Analysis complete at " + String.valueOf(currentTime) + "s. for frame " + String.valueOf(i + 1)
+ " of " + String.valueOf(totalFrm) + "(found " + String.valueOf(peakCount) + " peaks)");
}
//
NonharmonicSinusoidalSpeechSignal sinSignal2 = null;
float[] voicings2 = null;
if (totalNonNull > 0) {
// Collect non-null sinusoids only
sinSignal2 = new NonharmonicSinusoidalSpeechSignal(totalNonNull);
int ind = 0;
for (i = 0; i < totalFrm; i++) {
if (!isSinusoidNulls[i]) {
sinSignal2.framesSins[ind] = new NonharmonicSinusoidalSpeechFrame(sinSignal.framesSins[i]);
ind++;
if (ind > totalNonNull - 1)
break;
}
}
//
sinSignal2.originalDurationInSeconds = ((float) x.length) / params.fs;
}
return sinSignal2;
}
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
double[] x = signal.getAllData();
String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, samplingRate, x.length,
f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds, true, pitchMarkOffset);
double startFreqInHz = 0.0;
double endFreqInHz = 0.5 * samplingRate;
int windowType = Window.HAMMING;
boolean bRefinePeakEstimatesParabolaIn = true;
boolean bRefinePeakEstimatesBiasIn = true;
boolean bSpectralReassignmentIn = true;
boolean bAdjustNeighFreqDependentIn = true;
SinusoidalAnalysisParams params = new SinusoidalAnalysisParams(samplingRate, startFreqInHz, endFreqInHz, windowType,
bRefinePeakEstimatesParabolaIn, bRefinePeakEstimatesBiasIn, bSpectralReassignmentIn, bAdjustNeighFreqDependentIn);
PitchSynchronousSinusoidalAnalyzer sa = new PitchSynchronousSinusoidalAnalyzer(params);
SinusoidalTracks st = sa.analyzePitchSynchronous(x, pm);
}
}