/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * Permission is hereby granted, free of charge, to use and distribute * this software and its documentation without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of this work, and to * permit persons to whom this work is furnished to do so, subject to * the following conditions: * * 1. The code must retain the above copyright notice, this list of * conditions and the following disclaimer. * 2. Any modifications must be clearly marked as such. * 3. Original authors' names are not deleted. * 4. The authors' names are not used to endorse or promote products * derived from this software without specific prior written * permission. * * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF * THIS SOFTWARE. */ package marytts.signalproc.sinusoidal.hntm.analysis; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import marytts.signalproc.analysis.RegularizedCepstrumEstimator; import marytts.signalproc.sinusoidal.hntm.analysis.pitch.HnmPitchVoicingAnalyzerParams; import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams; import marytts.signalproc.window.Window; /** * Analysis parameters for harmonics plus noise model (HNM) * * @author Oytun Türk * */ public class HntmAnalyzerParams { public HnmPitchVoicingAnalyzerParams hnmPitchVoicingAnalyzerParams; // Parameters of pitch and voicing analyzer public boolean useJampackInAnalysis; // Use Jampack library for matrix operations (suggested for increased speed) public boolean isSilentAnalysis; // If false, displays a single line of message per frame during analysis public boolean readAnalysisResultsFromFile; // If true, analysis results are read from an existing binary file public int harmonicModel; // Harmonic model type public static final int HARMONICS_PLUS_NOISE = 1; public static final int HARMONICS_PLUS_TRANSIENTS_PLUS_NOISE = 2; public int noiseModel; // Noise model type public static final int WAVEFORM = 1; // Noise part model based on frame waveform (i.e. no model, overlap-add noise part // generation) public static final int LPC = 2; // Noise part model based on LPC public static final int PSEUDO_HARMONIC = 3; // Noise part model based on pseude harmonics for f0=NOISE_F0_IN_HZ public static final int VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM = 4; // noise part model based on LPC for voiced parts and // waveform for unvoiced parts public static final int UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM = 5; // noise part model based on LPC for unvoiced parts and // waveform for voiced parts public int regularizedCepstrumWarpingMethod; // Warping method for regularized cepstral envelope to be fitted to harmonic // amplitudes public int harmonicSynthesisMethodBeforeNoiseAnalysis; // Synthesize harmonic part before noise analysis for subtraction? public boolean useHarmonicAmplitudesDirectly; // If true, regularized cepstral envelope is not used public float regularizedCepstrumLambdaHarmonic; // Regularization parameter public boolean useWeightingInRegularizedCepstrumEstimationHarmonic; // If true, lower freuqnecies are assigned relatively more // weight in regularized cepstrum estimation public int harmonicPartCepstrumOrderPreMel; // Cepstrum order prior to mel scaling public int harmonicPartCepstrumOrder; // Cepstrum order in regularized cepstrum estimation public boolean computeNoisePartLpOrderFromSamplingRate; // If true, noise part LP order is auto-detected from sampling rate public int noisePartLpOrder; // Linear prediction order of noise part if it is not auto-detected from sampling rate public float preemphasisCoefNoise; // Pre-emphasis coefficient for the noise part public boolean hpfBeforeNoiseAnalysis; // Apply highpass filter before analyzing a noise frame? public boolean decimateNoiseWaveform; // Decimate voiced segment noise parts? public boolean overlapNoiseWaveformModel; // Perform overlap add processing for waveform based noise model // These parameters are effective only when the noise model is pseudo-harmonic public boolean useNoiseAmplitudesDirectly; // If true, regularized cepstral envelope is not used public float regularizedCepstrumEstimationLambdaNoise; // Regularization parameter public boolean useWeightingInRegularizedCesptrumEstimationNoise; // If true, lower freuqnecies are assigned relatively more // weight in regularized cepstrum estimation public int noisePartCepstrumOderPre; // Cepstrum order prior to mel scaling public int noisePartCepstrumOrder; // Cepstrum order in regularized cepstrum estimation public boolean usePosteriorMelWarpingNoise; // Perform posteriro mel-scale warping? public float noiseF0InHz; // Fixed f0 for noise part (to determine analysis window size) public float hpfTransitionBandwidthInHz; // Transition bandwidth of the highpass filter that separates noise part from // harmonic part public float noiseAnalysisWindowDurationInSeconds; // Fixed duration of noise analysis windows public float overlapBetweenHarmonicAndNoiseRegionsInHz; // Overlap amount in frequency between harmonic and noise regions public float overlapBetweenTransientAndNontransientSectionsInSeconds; // Overlap amount in time between transient and // non-transient segments public int harmonicAnalysisWindowType; // Window type for harmonic analysis public int noiseAnalysisWindowType; // Window type for noise analysis public int numHarmonicsForVoicing; // Number of lowest harmonics to use for voicing detection public float harmonicsNeigh; // A parameter between 0.0 and 1.0: How much the search range for voicing detection will be // extended beyond the first and the last harmonic // 0.3 means the region [0.7xf0, 4.3xf0] will be considered in voicing decision public float numPeriodsHarmonicsExtraction; // Total periods for hamronic part extraction public float fftPeakPickerPeriods; // Total periods for frequency domain peak picking public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_ANALYSIS = false; // Apply phase unwrapping along harmonic tracks // after analysis? public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_TIME_SCALING = false; // Apply phase unwrapping along harmonic // tracks after time scaling? public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_PITCH_SCALING = false; // Apply phase unwrapping along harmonic // tracks after pitch scaling? public HntmAnalyzerParams() { hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(); useJampackInAnalysis = true; isSilentAnalysis = false; harmonicModel = HARMONICS_PLUS_NOISE; noiseModel = WAVEFORM; regularizedCepstrumWarpingMethod = RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING; harmonicSynthesisMethodBeforeNoiseAnalysis = HntmSynthesizerParams.LINEAR_PHASE_INTERPOLATION; useHarmonicAmplitudesDirectly = true; // Use amplitudes directly, the following are only effective if this is false regularizedCepstrumLambdaHarmonic = 1.0e-5f; // Reducing this may increase harmonic amplitude estimation accuracy useWeightingInRegularizedCepstrumEstimationHarmonic = false; harmonicPartCepstrumOrder = 24; // Cepstrum order to represent harmonic amplitudes harmonicPartCepstrumOrderPreMel = 40; // Pre-cepstrum order to compute linear cepstral coefficients // 0 means auto computation from number of harmonics (See // RegularizedPostWarpedCepstrumEstimator.getAutoCepsOrderPre()). computeNoisePartLpOrderFromSamplingRate = false; // If true, noise LP order is determined using sampling rate (might be // high) noisePartLpOrder = 12; // Effective only if the above parameter is false preemphasisCoefNoise = 0.97f; hpfBeforeNoiseAnalysis = true; // False means the noise part will be full-band decimateNoiseWaveform = false; // Apply decimation when noise part is waveform (only in voiced parts) overlapNoiseWaveformModel = true; // Keep overlapping chunks of noise waveform for synthesis useNoiseAmplitudesDirectly = true; // If noise part is PSEUDE_HARMONICU and if this is true, use amplitudes directly. The // following are only effective if this is false regularizedCepstrumEstimationLambdaNoise = 2.0e-4f; // Reducing this may increase harmonic amplitude estimation accuracy useWeightingInRegularizedCesptrumEstimationNoise = false; noisePartCepstrumOderPre = 12; // Effective only for REGULARIZED_CEPS and PSEUDO_HARMONIC noise part types noisePartCepstrumOrder = 20; // Effective only for REGULARIZED_CEPS and PSEUDO_HARMONIC noise part types usePosteriorMelWarpingNoise = true; // If true, post-warping using Mel-scale is used, otherwise prior warping using // Bark-scale is employed noiseF0InHz = 100.0f; // Pseudo-pitch for unvoiced portions (will be used for pseudo harmonic modelling of the noise part) hpfTransitionBandwidthInHz = 0.0f; noiseAnalysisWindowDurationInSeconds = 0.060f; // Fixed window size for noise analysis, should be generally large (>=0.040 // seconds) overlapBetweenHarmonicAndNoiseRegionsInHz = 0.0f; overlapBetweenTransientAndNontransientSectionsInSeconds = 0.005f; harmonicAnalysisWindowType = Window.HAMMING; noiseAnalysisWindowType = Window.HAMMING; // Default search range for voicing detection, i.e. voicing criterion will be computed for frequency range: // [DEFAULT_VOICING_START_HARMONIC x f0, DEFAULT_VOICING_START_HARMONIC x f0] where f0 is the fundamental frequency // estimate numHarmonicsForVoicing = 4; harmonicsNeigh = 0.3f; // Between 0.0 and 1.0: How much the search range for voicing detection will be extended beyond the // first and the last harmonic // 0.3 means the region [0.7xf0, 4.3xf0] will be considered in voicing decision numPeriodsHarmonicsExtraction = 2.0f; fftPeakPickerPeriods = 3.0f; } public HntmAnalyzerParams(String binaryFile) { try { read(binaryFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public HntmAnalyzerParams(DataInputStream dis) { try { read(dis); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public HntmAnalyzerParams(HntmAnalyzerParams existing) { hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(existing.hnmPitchVoicingAnalyzerParams); useJampackInAnalysis = existing.useJampackInAnalysis; isSilentAnalysis = existing.isSilentAnalysis; harmonicModel = existing.harmonicModel; noiseModel = existing.noiseModel; regularizedCepstrumWarpingMethod = existing.regularizedCepstrumWarpingMethod; harmonicSynthesisMethodBeforeNoiseAnalysis = existing.harmonicSynthesisMethodBeforeNoiseAnalysis; useHarmonicAmplitudesDirectly = existing.useHarmonicAmplitudesDirectly; regularizedCepstrumLambdaHarmonic = existing.regularizedCepstrumLambdaHarmonic; useWeightingInRegularizedCepstrumEstimationHarmonic = existing.useWeightingInRegularizedCepstrumEstimationHarmonic; harmonicPartCepstrumOrderPreMel = existing.harmonicPartCepstrumOrderPreMel; harmonicPartCepstrumOrder = existing.harmonicPartCepstrumOrder; computeNoisePartLpOrderFromSamplingRate = existing.computeNoisePartLpOrderFromSamplingRate; noisePartLpOrder = existing.noisePartLpOrder; preemphasisCoefNoise = existing.preemphasisCoefNoise; hpfBeforeNoiseAnalysis = existing.hpfBeforeNoiseAnalysis; decimateNoiseWaveform = existing.decimateNoiseWaveform; overlapNoiseWaveformModel = existing.overlapNoiseWaveformModel; useNoiseAmplitudesDirectly = existing.useNoiseAmplitudesDirectly; regularizedCepstrumEstimationLambdaNoise = existing.regularizedCepstrumEstimationLambdaNoise; useWeightingInRegularizedCesptrumEstimationNoise = existing.useWeightingInRegularizedCesptrumEstimationNoise; noisePartCepstrumOderPre = existing.noisePartCepstrumOderPre; noisePartCepstrumOrder = existing.noisePartCepstrumOrder; usePosteriorMelWarpingNoise = existing.usePosteriorMelWarpingNoise; noiseF0InHz = existing.noiseF0InHz; hpfTransitionBandwidthInHz = existing.hpfTransitionBandwidthInHz; noiseAnalysisWindowDurationInSeconds = existing.noiseAnalysisWindowDurationInSeconds; overlapBetweenHarmonicAndNoiseRegionsInHz = existing.overlapBetweenHarmonicAndNoiseRegionsInHz; overlapBetweenTransientAndNontransientSectionsInSeconds = existing.overlapBetweenTransientAndNontransientSectionsInSeconds; harmonicAnalysisWindowType = existing.harmonicAnalysisWindowType; noiseAnalysisWindowType = existing.noiseAnalysisWindowType; numHarmonicsForVoicing = existing.numHarmonicsForVoicing; harmonicsNeigh = existing.harmonicsNeigh; numPeriodsHarmonicsExtraction = existing.numPeriodsHarmonicsExtraction; fftPeakPickerPeriods = existing.fftPeakPickerPeriods; } public boolean equals(HntmAnalyzerParams existing) { if (!hnmPitchVoicingAnalyzerParams.equals(existing.hnmPitchVoicingAnalyzerParams)) return false; if (useJampackInAnalysis != existing.useJampackInAnalysis) return false; if (isSilentAnalysis != existing.isSilentAnalysis) return false; if (harmonicModel != existing.harmonicModel) return false; if (noiseModel != existing.noiseModel) return false; if (regularizedCepstrumWarpingMethod != existing.regularizedCepstrumWarpingMethod) return false; if (harmonicSynthesisMethodBeforeNoiseAnalysis != existing.harmonicSynthesisMethodBeforeNoiseAnalysis) return false; if (useHarmonicAmplitudesDirectly != existing.useHarmonicAmplitudesDirectly) return false; if (regularizedCepstrumLambdaHarmonic != existing.regularizedCepstrumLambdaHarmonic) return false; if (useWeightingInRegularizedCepstrumEstimationHarmonic != existing.useWeightingInRegularizedCepstrumEstimationHarmonic) return false; if (harmonicPartCepstrumOrderPreMel != existing.harmonicPartCepstrumOrderPreMel) return false; if (harmonicPartCepstrumOrder != existing.harmonicPartCepstrumOrder) return false; if (computeNoisePartLpOrderFromSamplingRate != existing.computeNoisePartLpOrderFromSamplingRate) return false; if (noisePartLpOrder != existing.noisePartLpOrder) return false; if (preemphasisCoefNoise != existing.preemphasisCoefNoise) return false; if (hpfBeforeNoiseAnalysis != existing.hpfBeforeNoiseAnalysis) return false; if (decimateNoiseWaveform != existing.decimateNoiseWaveform) return false; if (overlapNoiseWaveformModel != existing.overlapNoiseWaveformModel) return false; if (useNoiseAmplitudesDirectly != existing.useNoiseAmplitudesDirectly) return false; if (regularizedCepstrumEstimationLambdaNoise != existing.regularizedCepstrumEstimationLambdaNoise) return false; if (useWeightingInRegularizedCesptrumEstimationNoise != existing.useWeightingInRegularizedCesptrumEstimationNoise) return false; if (noisePartCepstrumOderPre != existing.noisePartCepstrumOderPre) return false; if (noisePartCepstrumOrder != existing.noisePartCepstrumOrder) return false; if (usePosteriorMelWarpingNoise != existing.usePosteriorMelWarpingNoise) return false; if (noiseF0InHz != existing.noiseF0InHz) return false; if (hpfTransitionBandwidthInHz != existing.hpfTransitionBandwidthInHz) return false; if (noiseAnalysisWindowDurationInSeconds != existing.noiseAnalysisWindowDurationInSeconds) return false; if (overlapBetweenHarmonicAndNoiseRegionsInHz != existing.overlapBetweenHarmonicAndNoiseRegionsInHz) return false; if (overlapBetweenTransientAndNontransientSectionsInSeconds != existing.overlapBetweenTransientAndNontransientSectionsInSeconds) return false; if (harmonicAnalysisWindowType != existing.harmonicAnalysisWindowType) return false; if (noiseAnalysisWindowType != existing.noiseAnalysisWindowType) return false; if (numHarmonicsForVoicing != existing.numHarmonicsForVoicing) return false; if (harmonicsNeigh != existing.harmonicsNeigh) return false; if (numPeriodsHarmonicsExtraction != existing.numPeriodsHarmonicsExtraction) return false; if (fftPeakPickerPeriods != existing.fftPeakPickerPeriods) return false; return true; } public void write(String binaryFile) throws IOException { DataOutputStream dos = new DataOutputStream(new FileOutputStream(new File(binaryFile))); write(dos); } public void write(DataOutputStream dos) throws IOException { hnmPitchVoicingAnalyzerParams.write(dos); dos.writeBoolean(useJampackInAnalysis); dos.writeBoolean(isSilentAnalysis); dos.writeInt(harmonicModel); dos.writeInt(noiseModel); dos.writeInt(regularizedCepstrumWarpingMethod); dos.writeInt(harmonicSynthesisMethodBeforeNoiseAnalysis); dos.writeBoolean(useHarmonicAmplitudesDirectly); dos.writeFloat(regularizedCepstrumLambdaHarmonic); dos.writeBoolean(useWeightingInRegularizedCepstrumEstimationHarmonic); dos.writeInt(harmonicPartCepstrumOrderPreMel); dos.writeInt(harmonicPartCepstrumOrder); dos.writeBoolean(computeNoisePartLpOrderFromSamplingRate); dos.writeInt(noisePartLpOrder); dos.writeFloat(preemphasisCoefNoise); dos.writeBoolean(hpfBeforeNoiseAnalysis); dos.writeBoolean(decimateNoiseWaveform); dos.writeBoolean(overlapNoiseWaveformModel); dos.writeBoolean(useNoiseAmplitudesDirectly); dos.writeFloat(regularizedCepstrumEstimationLambdaNoise); dos.writeBoolean(useWeightingInRegularizedCesptrumEstimationNoise); dos.writeInt(noisePartCepstrumOderPre); dos.writeInt(noisePartCepstrumOrder); dos.writeBoolean(usePosteriorMelWarpingNoise); dos.writeFloat(noiseF0InHz); dos.writeFloat(hpfTransitionBandwidthInHz); dos.writeFloat(noiseAnalysisWindowDurationInSeconds); dos.writeFloat(overlapBetweenHarmonicAndNoiseRegionsInHz); dos.writeFloat(overlapBetweenTransientAndNontransientSectionsInSeconds); dos.writeInt(harmonicAnalysisWindowType); dos.writeInt(noiseAnalysisWindowType); dos.writeInt(numHarmonicsForVoicing); dos.writeFloat(harmonicsNeigh); dos.writeFloat(numPeriodsHarmonicsExtraction); dos.writeFloat(fftPeakPickerPeriods); } public void read(String binaryFile) throws IOException { DataInputStream dis = new DataInputStream(new FileInputStream(new File(binaryFile))); read(dis); } public void read(DataInputStream dis) throws IOException { hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(dis); useJampackInAnalysis = dis.readBoolean(); isSilentAnalysis = dis.readBoolean(); harmonicModel = dis.readInt(); noiseModel = dis.readInt(); regularizedCepstrumWarpingMethod = dis.readInt(); harmonicSynthesisMethodBeforeNoiseAnalysis = dis.readInt(); useHarmonicAmplitudesDirectly = dis.readBoolean(); regularizedCepstrumLambdaHarmonic = dis.readFloat(); useWeightingInRegularizedCepstrumEstimationHarmonic = dis.readBoolean(); harmonicPartCepstrumOrderPreMel = dis.readInt(); harmonicPartCepstrumOrder = dis.readInt(); computeNoisePartLpOrderFromSamplingRate = dis.readBoolean(); noisePartLpOrder = dis.readInt(); preemphasisCoefNoise = dis.readFloat(); hpfBeforeNoiseAnalysis = dis.readBoolean(); decimateNoiseWaveform = dis.readBoolean(); overlapNoiseWaveformModel = dis.readBoolean(); useNoiseAmplitudesDirectly = dis.readBoolean(); regularizedCepstrumEstimationLambdaNoise = dis.readFloat(); useWeightingInRegularizedCesptrumEstimationNoise = dis.readBoolean(); noisePartCepstrumOderPre = dis.readInt(); noisePartCepstrumOrder = dis.readInt(); usePosteriorMelWarpingNoise = dis.readBoolean(); noiseF0InHz = dis.readFloat(); hpfTransitionBandwidthInHz = dis.readFloat(); noiseAnalysisWindowDurationInSeconds = dis.readFloat(); overlapBetweenHarmonicAndNoiseRegionsInHz = dis.readFloat(); overlapBetweenTransientAndNontransientSectionsInSeconds = dis.readFloat(); harmonicAnalysisWindowType = dis.readInt(); noiseAnalysisWindowType = dis.readInt(); numHarmonicsForVoicing = dis.readInt(); harmonicsNeigh = dis.readFloat(); numPeriodsHarmonicsExtraction = dis.readFloat(); fftPeakPickerPeriods = dis.readFloat(); } }