/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.sinusoidal;
import marytts.signalproc.sinusoidal.hntm.analysis.pitch.HnmPitchVoicingAnalyzerParams;
import marytts.signalproc.window.Window;
import marytts.util.signal.SignalProcUtils;
/**
* Parameters of sinusoidal model based analysis
*
* @author oytun.turk
*
*/
public class SinusoidalAnalysisParams {
// Static values
public static final float DEFAULT_DELTA_IN_HZ = 50.0f; //
public static final float DEFAULT_ANALYSIS_WINDOW_SIZE = 0.020f; // Default fixed rate analysisi window size
public static final float DEFAULT_ANALYSIS_SKIP_SIZE = 0.010f; //
public static final double MIN_ENERGY_TH = 1e-50; // Minimum energy threshold to analyze a frame
public static final double MIN_PEAK_IN_DB_LOW = -200.0f; // Minimum allowed peak value in decibels for lower frequencies
public static final double MIN_PEAK_IN_DB_HIGH = -200.0f; // Minimum allowed peak value in decibels for higher frequencies
public static final double MIN_VOICED_FREQ_IN_HZ = 4000.0f; // Minimum voiced freq allowed (for voiced regions only)
public static final double MAX_VOICED_FREQ_IN_HZ = 5000.0f; // Maximum voiced freq allowed (for voiced regions only)
public static final boolean DEFAULT_REFINE_PEAK_ESTIMATES_PARABOLA = true; // Parabola fitting based refinement of peak
// amplitude values to cope with windowing effects
public static final boolean DEFAULT_REFINE_PEAK_ESTIMATES_BIAS = true; // Bias removal based refinement of peak amplitude
// values to cope with windowing effects
public static final int DEFAULT_FREQ_SAMP_NEIGHS_LOW = 2; // Default search range for low frequencies for spectral peak
// detection
public static final int DEFAULT_FREQ_SAMP_NEIGHS_HIGH = 2; // Default search range for high frequencies for spectral peak
// detection
public static final float MIN_WINDOW_SIZE = 0.020f;
public static final int NO_SPEC = -1; // No spectral envelope information is extracted
public static final int LP_SPEC = 0; // Linear Prediction (LP) based envelope (Makhoul, 1971)
public static final int SEEVOC_SPEC = 1; // Spectral Envelope Estimation Vocoder (SEEVOC) based envelope (Paul, 1981)
public static final int REGULARIZED_CEPS = 2; // Regularized cepstrum based envelope (Cappe, et. al. 1995, Stylianou, et. al.
// 1995)
//
HnmPitchVoicingAnalyzerParams hnmPitchVoicingAnalyzerParams;
public int fs; // Sampling rate in Hz
public int windowType; // Type of window (See class Window for details)
public int fftSize; // FFT size in points
public int LPOrder; // LP analysis order
public int lifterOrder; // Cepstral lifting order
public double startFreq; // Lowest analysis frequnecy in Hz
public double endFreq; // Highest analysis frequency in Hz
public boolean bRefinePeakEstimatesParabola; // Refine peak and frequency estimates by fitting parabolas?
public boolean bRefinePeakEstimatesBias; // Further refine peak and frequency estimates by correcting bias?
// (Only effective when bRefinePeakEstimatesParabola=true)
public boolean bSpectralReassignment; // Refine spectral peak frequencies considering windowing effect?
public int ws; // Window size in samples
public int ss; // Skip size in samples
public Window win; // Windowing applier
public boolean bAdjustNeighFreqDependent; // Adjust number of neighbouring samples to search for a peak adaptively depending
// on frequency?
public int minWindowSize; // Minimum window size allowed to satisfy 100 Hz criterion for unvoiced sounds computed from
// MIN_WINDOW_SIZE and sampling rate
public double absMax; // Keep absolute max of the input signal for normalization after resynthesis
public double totalEnergy; // Keep total energy for normalization after resynthesis
public int regularizedCepstrumWarpingMethod;
public SinusoidalAnalysisParams(SinusoidalAnalysisParams paramsIn) {
hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(paramsIn.hnmPitchVoicingAnalyzerParams);
fs = paramsIn.fs;
windowType = paramsIn.windowType;
fftSize = paramsIn.fftSize;
LPOrder = paramsIn.LPOrder;
lifterOrder = paramsIn.lifterOrder;
startFreq = paramsIn.startFreq;
endFreq = paramsIn.endFreq;
bRefinePeakEstimatesParabola = paramsIn.bRefinePeakEstimatesParabola;
bRefinePeakEstimatesBias = paramsIn.bRefinePeakEstimatesBias;
bSpectralReassignment = paramsIn.bSpectralReassignment;
ws = paramsIn.ws;
ss = paramsIn.ss;
win = paramsIn.win;
bAdjustNeighFreqDependent = paramsIn.bAdjustNeighFreqDependent;
minWindowSize = paramsIn.minWindowSize;
absMax = paramsIn.absMax;
totalEnergy = paramsIn.totalEnergy;
regularizedCepstrumWarpingMethod = paramsIn.regularizedCepstrumWarpingMethod;
}
public SinusoidalAnalysisParams(int samplingRate, double startFreqInHz, double endFreqInHz, int windowTypeIn,
boolean bRefinePeakEstimatesParabolaIn, boolean bRefinePeakEstimatesBiasIn, boolean bSpectralReassignmentIn,
boolean bAdjustNeighFreqDependentIn) {
hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams();
fs = samplingRate;
startFreq = startFreqInHz;
if (startFreq < 0.0)
startFreq = 0.0;
endFreq = endFreqInHz;
if (endFreq < 0.0)
endFreq = 0.5 * fs;
windowType = windowTypeIn;
setSinAnaFFTSize(getDefaultFFTSize(fs));
bRefinePeakEstimatesParabola = bRefinePeakEstimatesParabolaIn;
bRefinePeakEstimatesBias = bRefinePeakEstimatesBiasIn;
bSpectralReassignment = bSpectralReassignmentIn;
bAdjustNeighFreqDependent = bAdjustNeighFreqDependentIn;
minWindowSize = (int) (Math.floor(fs * MIN_WINDOW_SIZE + 0.5));
if (minWindowSize % 2 == 0) // Always use an odd window size to have a zero-phase analysis window
minWindowSize++;
absMax = -1.0;
totalEnergy = 0.0;
LPOrder = SignalProcUtils.getLPOrder(fs);
lifterOrder = SignalProcUtils.getLifterOrder(fs);
}
public static int getDefaultFFTSize(int samplingRate) {
if (samplingRate < 10000)
return 1024;
else if (samplingRate < 20000)
return 2048;
else
return 4096;
}
public void setSinAnaFFTSize(int fftSizeIn) {
fftSize = fftSizeIn;
}
}