/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.sinusoidal;
import java.io.File;
import java.io.IOException;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.filter.ComplementaryFilterBankAnalyser;
import marytts.signalproc.filter.FIRBandPassFilterBankAnalyser;
import marytts.signalproc.filter.FIRWaveletFilterBankAnalyser;
import marytts.signalproc.filter.FilterBankAnalyserBase;
import marytts.signalproc.filter.Subband;
import marytts.signalproc.window.Window;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
/**
* A basic multi-resolution version of the sinusoidal analyzer. The idea is to adjust time-frequency resolution to resolve
* sinusoids better using a wavelet transform like approach. For this purpose, the original signal is subband filtered and
* sinusoidal parameters are extracted from the subbands using different window and skip sizes. This class has not been tested
* sufficiently and the subband decomposition procedure does not seem to be appropriate for this kind of analysis.
*
* @author Oytun Türk
*
*/
public class MultiresolutionSinusoidalAnalyzer extends BaseSinusoidalAnalyzer {
public FilterBankAnalyserBase filterbankAnalyser;
public int multiresolutionFilterbankType;
public int numBands;
public int samplingRate;
public MultiresolutionSinusoidalAnalyzer(int multiresolutionFilterbankTypeIn, int numBandsIn, int samplingRateIn) {
multiresolutionFilterbankType = multiresolutionFilterbankTypeIn;
numBands = numBandsIn;
samplingRate = samplingRateIn;
filterbankAnalyser = null;
if (multiresolutionFilterbankType == FilterBankAnalyserBase.FIR_BANDPASS_FILTERBANK) {
double overlapAround1000Hz = 100.0;
filterbankAnalyser = new FIRBandPassFilterBankAnalyser(numBands, samplingRate, overlapAround1000Hz);
} else if (multiresolutionFilterbankType == FilterBankAnalyserBase.FIR_WAVELET_FILTERBANK) {
double overlapAround1000Hz = 100.0;
filterbankAnalyser = new FIRWaveletFilterBankAnalyser(numBands, samplingRate);
} else if (multiresolutionFilterbankType == FilterBankAnalyserBase.COMPLEMENTARY_FILTERBANK) {
if (!MathUtils.isPowerOfTwo(numBands)) {
int tmpNumBands = 2;
while (tmpNumBands < numBands)
tmpNumBands *= 2;
numBands = tmpNumBands;
System.out.println("Number of bands should be a power of two for the complementary filterbank");
}
int baseFilterOrder = SignalProcUtils.getFIRFilterOrder(samplingRate);
int numLevels = numBands - 1;
filterbankAnalyser = new ComplementaryFilterBankAnalyser(numLevels, baseFilterOrder);
}
}
// Fixed rate version
public SinusoidalTracks[] analyze(double[] x, double lowestBandWindowSizeInSeconds, int windowType,
boolean bRefinePeakEstimatesParabola, boolean bRefinePeakEstimatesBias, boolean bSpectralReassignment,
boolean bAdjustNeighFreqDependent, boolean bFreqLimitedAnalysis) {
return analyze(x, lowestBandWindowSizeInSeconds, windowType, bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias,
bSpectralReassignment, bAdjustNeighFreqDependent, bFreqLimitedAnalysis, false, null, 0.0f);
}
// Fixed rate and pitch synchronous version.
// Set bPitchSynchronousAnalysis=false to get fixed rate version. In this case pitchMarks can be anything (i.e. null)
// and numPeriods can be a dummy value since they are only used for ptich synchronous processing
public SinusoidalTracks[] analyze(double[] x, double lowestBandWindowSizeInSeconds, int windowType,
boolean bRefinePeakEstimatesParabola, boolean bRefinePeakEstimatesBias, boolean bSpectralReassignment,
boolean bAdjustNeighFreqDependent, boolean bFreqLimitedAnalysis, boolean bPitchSynchronousAnalysis, PitchMarks pm, // Only
// used
// when
// bPitchSynchronousAnalysis=true
float numPeriods) // Only used when bPitchSynchronousAnalysis=true
{
SinusoidalTracks[] subbandTracks = new SinusoidalTracks[numBands];
Subband[] subbands = null;
// When there is downsampling, no need for frequency limited analysis
if (multiresolutionFilterbankType != FilterBankAnalyserBase.FIR_BANDPASS_FILTERBANK)
bFreqLimitedAnalysis = false;
if (filterbankAnalyser != null) {
subbands = filterbankAnalyser.apply(x);
SinusoidalAnalysisParams params = null;
for (int i = 0; i < subbands.length; i++) {
if (!bPitchSynchronousAnalysis || i > 0) // Pitch synchrounous subband analysis is only performed at the lowest
// frequency subband
{
SinusoidalAnalyzer sa = null;
if (bFreqLimitedAnalysis) {
params = new SinusoidalAnalysisParams((int) (subbands[i].samplingRate), subbands[i].lowestFreqInHz,
subbands[i].highestFreqInHz, windowType, bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias,
bSpectralReassignment, bAdjustNeighFreqDependent);
sa = new SinusoidalAnalyzer(params);
} else {
params = new SinusoidalAnalysisParams((int) (subbands[i].samplingRate), 0.0,
0.5 * subbands[i].samplingRate, windowType, bRefinePeakEstimatesParabola,
bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent);
sa = new SinusoidalAnalyzer(params);
}
float winSizeInSeconds = (float) (lowestBandWindowSizeInSeconds / Math.pow(2.0, i));
float skipSizeInSeconds = 0.5f * winSizeInSeconds;
float deltaInHz = 50.0f; // Also make this frequency range dependent??
if (multiresolutionFilterbankType == FilterBankAnalyserBase.FIR_WAVELET_FILTERBANK)
subbandTracks[i] = sa.analyzeFixedRate(subbands[i].waveform, winSizeInSeconds, skipSizeInSeconds,
deltaInHz, SinusoidalAnalysisParams.LP_SPEC);
else
subbandTracks[i] = sa.analyzeFixedRate(x, winSizeInSeconds, skipSizeInSeconds, deltaInHz,
SinusoidalAnalysisParams.LP_SPEC);
// Normalize overlapping frequency region gains if an overlapping subband stucture is used
if (multiresolutionFilterbankType == FilterBankAnalyserBase.FIR_BANDPASS_FILTERBANK)
normalizeSinusoidalAmplitudes(subbandTracks[i], samplingRate,
((FIRBandPassFilterBankAnalyser) filterbankAnalyser).normalizationFilterTransformedIR);
} else {
PitchSynchronousSinusoidalAnalyzer sa = null;
if (bFreqLimitedAnalysis) {
params = new SinusoidalAnalysisParams((int) (subbands[i].samplingRate), subbands[i].lowestFreqInHz,
subbands[i].highestFreqInHz, windowType, bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias,
bSpectralReassignment, bAdjustNeighFreqDependent);
sa = new PitchSynchronousSinusoidalAnalyzer(params);
} else {
params = new SinusoidalAnalysisParams((int) (subbands[i].samplingRate), 0.0,
0.5 * subbands[i].samplingRate, windowType, bRefinePeakEstimatesParabola,
bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent);
sa = new PitchSynchronousSinusoidalAnalyzer(params);
}
float winSizeInSeconds = (float) (lowestBandWindowSizeInSeconds / Math.pow(2.0, i)); // This is computed only
// for determining
// skip rate
float skipSizeInSeconds = 0.5f * winSizeInSeconds;
float deltaInHz = 50.0f; // Also make this frequency range dependent??
float numPeriodsCurrent = (float) (numPeriods / Math.pow(2.0, i)); // This iteratively halves the effective
// window size for higher frequency
// subbands
subbandTracks[i] = sa.analyzePitchSynchronous(x, pm, numPeriodsCurrent, skipSizeInSeconds, deltaInHz,
SinusoidalAnalysisParams.LP_SPEC);
}
}
}
return subbandTracks;
}
// Normalizes sinusoidal amplitudes when an overlapping subband filterbank structure is used
public void normalizeSinusoidalAmplitudes(SinusoidalTracks sinTracks, int samplingRateIn,
double[] normalizationFilterTransformedIR) {
int i, j, k;
int maxFreq = normalizationFilterTransformedIR.length - 1;
for (i = 0; i < sinTracks.tracks.length; i++) {
for (j = 0; j < sinTracks.tracks[i].totalSins; j++) {
k = SignalProcUtils.freq2index(SignalProcUtils.radian2hz(sinTracks.tracks[i].freqs[j], sinTracks.fs),
samplingRateIn, maxFreq);
sinTracks.tracks[i].amps[j] *= normalizationFilterTransformedIR[k];
}
}
}
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
double[] x = signal.getAllData();
int multiresolutionFilterbankType;
// multiresolutionFilterbankType = FilterBankAnalyserBase.FIR_BANDPASS_FILTERBANK;
multiresolutionFilterbankType = FilterBankAnalyserBase.FIR_WAVELET_FILTERBANK;
// multiresolutionFilterbankType = FilterBankAnalyserBase.COMPLEMENTARY_FILTERBANK;
int numBands = 4;
double lowestBandWindowSizeInSeconds = 0.020;
double startFreqInHz = 0.0;
double endFreqInHz = 0.5 * samplingRate;
int windowType = Window.HAMMING;
boolean bRefinePeakEstimatesParabola = true;
boolean bRefinePeakEstimatesBias = true;
boolean bSpectralReassignment = true;
boolean bAdjustNeighFreqDependent = true;
boolean bFreqLimitedAnalysis = false;
boolean bPitchSynchronous = false;
float numPeriods = 2.5f;
SinusoidalAnalysisParams params = new SinusoidalAnalysisParams(samplingRate, startFreqInHz, endFreqInHz, windowType,
bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent);
MultiresolutionSinusoidalAnalyzer msa = new MultiresolutionSinusoidalAnalyzer(multiresolutionFilterbankType, numBands,
samplingRate);
SinusoidalTracks[] subbandTracks = null;
if (!bPitchSynchronous)
subbandTracks = msa.analyze(x, lowestBandWindowSizeInSeconds, windowType, bRefinePeakEstimatesParabola,
bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent, bFreqLimitedAnalysis);
else {
String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
int pitchMarkOffset = 0;
PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, samplingRate, x.length,
f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds, true, pitchMarkOffset);
PitchSynchronousSinusoidalAnalyzer sa = new PitchSynchronousSinusoidalAnalyzer(params);
subbandTracks = msa.analyze(x, lowestBandWindowSizeInSeconds, windowType, bRefinePeakEstimatesParabola,
bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent, bFreqLimitedAnalysis, true, pm,
numPeriods);
}
}
}