HntmAnalyzerParams.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * Permission is hereby granted, free of charge, to use and distribute
 * this software and its documentation without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of this work, and to
 * permit persons to whom this work is furnished to do so, subject to
 * the following conditions:
 * 
 * 1. The code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 * 2. Any modifications must be clearly marked as such.
 * 3. Original authors' names are not deleted.
 * 4. The authors' names are not used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
 * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 * THIS SOFTWARE.
 */

package marytts.signalproc.sinusoidal.hntm.analysis;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import marytts.signalproc.analysis.RegularizedCepstrumEstimator;
import marytts.signalproc.sinusoidal.hntm.analysis.pitch.HnmPitchVoicingAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams;
import marytts.signalproc.window.Window;

/**
 * Analysis parameters for harmonics plus noise model (HNM)
 * 
 * @author Oytun Türk
 * 
 */
public class HntmAnalyzerParams {

	public HnmPitchVoicingAnalyzerParams hnmPitchVoicingAnalyzerParams; // Parameters of pitch and voicing analyzer

	public boolean useJampackInAnalysis; // Use Jampack library for matrix operations (suggested for increased speed)

	public boolean isSilentAnalysis; // If false, displays a single line of message per frame during analysis
	public boolean readAnalysisResultsFromFile; // If true, analysis results are read from an existing binary file

	public int harmonicModel; // Harmonic model type
	public static final int HARMONICS_PLUS_NOISE = 1;
	public static final int HARMONICS_PLUS_TRANSIENTS_PLUS_NOISE = 2;

	public int noiseModel; // Noise model type
	public static final int WAVEFORM = 1; // Noise part model based on frame waveform (i.e. no model, overlap-add noise part
											// generation)
	public static final int LPC = 2; // Noise part model based on LPC
	public static final int PSEUDO_HARMONIC = 3; // Noise part model based on pseude harmonics for f0=NOISE_F0_IN_HZ
	public static final int VOICEDNOISE_LPC_UNVOICEDNOISE_WAVEFORM = 4; // noise part model based on LPC for voiced parts and
																		// waveform for unvoiced parts
	public static final int UNVOICEDNOISE_LPC_VOICEDNOISE_WAVEFORM = 5; // noise part model based on LPC for unvoiced parts and
																		// waveform for voiced parts

	public int regularizedCepstrumWarpingMethod; // Warping method for regularized cepstral envelope to be fitted to harmonic
													// amplitudes
	public int harmonicSynthesisMethodBeforeNoiseAnalysis; // Synthesize harmonic part before noise analysis for subtraction?

	public boolean useHarmonicAmplitudesDirectly; // If true, regularized cepstral envelope is not used
	public float regularizedCepstrumLambdaHarmonic; // Regularization parameter
	public boolean useWeightingInRegularizedCepstrumEstimationHarmonic; // If true, lower freuqnecies are assigned relatively more
																		// weight in regularized cepstrum estimation
	public int harmonicPartCepstrumOrderPreMel; // Cepstrum order prior to mel scaling
	public int harmonicPartCepstrumOrder; // Cepstrum order in regularized cepstrum estimation

	public boolean computeNoisePartLpOrderFromSamplingRate; // If true, noise part LP order is auto-detected from sampling rate
	public int noisePartLpOrder; // Linear prediction order of noise part if it is not auto-detected from sampling rate
	public float preemphasisCoefNoise; // Pre-emphasis coefficient for the noise part
	public boolean hpfBeforeNoiseAnalysis; // Apply highpass filter before analyzing a noise frame?
	public boolean decimateNoiseWaveform; // Decimate voiced segment noise parts?
	public boolean overlapNoiseWaveformModel; // Perform overlap add processing for waveform based noise model

	// These parameters are effective only when the noise model is pseudo-harmonic
	public boolean useNoiseAmplitudesDirectly; // If true, regularized cepstral envelope is not used
	public float regularizedCepstrumEstimationLambdaNoise; // Regularization parameter
	public boolean useWeightingInRegularizedCesptrumEstimationNoise; // If true, lower freuqnecies are assigned relatively more
																		// weight in regularized cepstrum estimation
	public int noisePartCepstrumOderPre; // Cepstrum order prior to mel scaling
	public int noisePartCepstrumOrder; // Cepstrum order in regularized cepstrum estimation
	public boolean usePosteriorMelWarpingNoise; // Perform posteriro mel-scale warping?

	public float noiseF0InHz; // Fixed f0 for noise part (to determine analysis window size)
	public float hpfTransitionBandwidthInHz; // Transition bandwidth of the highpass filter that separates noise part from
												// harmonic part
	public float noiseAnalysisWindowDurationInSeconds; // Fixed duration of noise analysis windows
	public float overlapBetweenHarmonicAndNoiseRegionsInHz; // Overlap amount in frequency between harmonic and noise regions
	public float overlapBetweenTransientAndNontransientSectionsInSeconds; // Overlap amount in time between transient and
																			// non-transient segments

	public int harmonicAnalysisWindowType; // Window type for harmonic analysis
	public int noiseAnalysisWindowType; // Window type for noise analysis

	public int numHarmonicsForVoicing; // Number of lowest harmonics to use for voicing detection
	public float harmonicsNeigh; // A parameter between 0.0 and 1.0: How much the search range for voicing detection will be
									// extended beyond the first and the last harmonic
									// 0.3 means the region [0.7xf0, 4.3xf0] will be considered in voicing decision

	public float numPeriodsHarmonicsExtraction; // Total periods for hamronic part extraction
	public float fftPeakPickerPeriods; // Total periods for frequency domain peak picking

	public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_ANALYSIS = false; // Apply phase unwrapping along harmonic tracks
																				// after analysis?
	public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_TIME_SCALING = false; // Apply phase unwrapping along harmonic
																					// tracks after time scaling?
	public static boolean UNWRAP_PHASES_ALONG_HARMONICS_AFTER_PITCH_SCALING = false; // Apply phase unwrapping along harmonic
																						// tracks after pitch scaling?

	public HntmAnalyzerParams() {
		hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams();

		useJampackInAnalysis = true;

		isSilentAnalysis = false;

		harmonicModel = HARMONICS_PLUS_NOISE;
		noiseModel = WAVEFORM;

		regularizedCepstrumWarpingMethod = RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING;
		harmonicSynthesisMethodBeforeNoiseAnalysis = HntmSynthesizerParams.LINEAR_PHASE_INTERPOLATION;

		useHarmonicAmplitudesDirectly = true; // Use amplitudes directly, the following are only effective if this is false
		regularizedCepstrumLambdaHarmonic = 1.0e-5f; // Reducing this may increase harmonic amplitude estimation accuracy
		useWeightingInRegularizedCepstrumEstimationHarmonic = false;

		harmonicPartCepstrumOrder = 24; // Cepstrum order to represent harmonic amplitudes
		harmonicPartCepstrumOrderPreMel = 40; // Pre-cepstrum order to compute linear cepstral coefficients
												// 0 means auto computation from number of harmonics (See
												// RegularizedPostWarpedCepstrumEstimator.getAutoCepsOrderPre()).

		computeNoisePartLpOrderFromSamplingRate = false; // If true, noise LP order is determined using sampling rate (might be
															// high)
		noisePartLpOrder = 12; // Effective only if the above parameter is false
		preemphasisCoefNoise = 0.97f;
		hpfBeforeNoiseAnalysis = true; // False means the noise part will be full-band
		decimateNoiseWaveform = false; // Apply decimation when noise part is waveform (only in voiced parts)
		overlapNoiseWaveformModel = true; // Keep overlapping chunks of noise waveform for synthesis

		useNoiseAmplitudesDirectly = true; // If noise part is PSEUDE_HARMONICU and if this is true, use amplitudes directly. The
											// following are only effective if this is false
		regularizedCepstrumEstimationLambdaNoise = 2.0e-4f; // Reducing this may increase harmonic amplitude estimation accuracy
		useWeightingInRegularizedCesptrumEstimationNoise = false;
		noisePartCepstrumOderPre = 12; // Effective only for REGULARIZED_CEPS and PSEUDO_HARMONIC noise part types
		noisePartCepstrumOrder = 20; // Effective only for REGULARIZED_CEPS and PSEUDO_HARMONIC noise part types
		usePosteriorMelWarpingNoise = true; // If true, post-warping using Mel-scale is used, otherwise prior warping using
											// Bark-scale is employed

		noiseF0InHz = 100.0f; // Pseudo-pitch for unvoiced portions (will be used for pseudo harmonic modelling of the noise part)
		hpfTransitionBandwidthInHz = 0.0f;
		noiseAnalysisWindowDurationInSeconds = 0.060f; // Fixed window size for noise analysis, should be generally large (>=0.040
														// seconds)
		overlapBetweenHarmonicAndNoiseRegionsInHz = 0.0f;
		overlapBetweenTransientAndNontransientSectionsInSeconds = 0.005f;

		harmonicAnalysisWindowType = Window.HAMMING;
		noiseAnalysisWindowType = Window.HAMMING;

		// Default search range for voicing detection, i.e. voicing criterion will be computed for frequency range:
		// [DEFAULT_VOICING_START_HARMONIC x f0, DEFAULT_VOICING_START_HARMONIC x f0] where f0 is the fundamental frequency
		// estimate
		numHarmonicsForVoicing = 4;
		harmonicsNeigh = 0.3f; // Between 0.0 and 1.0: How much the search range for voicing detection will be extended beyond the
								// first and the last harmonic
								// 0.3 means the region [0.7xf0, 4.3xf0] will be considered in voicing decision

		numPeriodsHarmonicsExtraction = 2.0f;
		fftPeakPickerPeriods = 3.0f;
	}

	public HntmAnalyzerParams(String binaryFile) {
		try {
			read(binaryFile);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public HntmAnalyzerParams(DataInputStream dis) {
		try {
			read(dis);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public HntmAnalyzerParams(HntmAnalyzerParams existing) {
		hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(existing.hnmPitchVoicingAnalyzerParams);

		useJampackInAnalysis = existing.useJampackInAnalysis;

		isSilentAnalysis = existing.isSilentAnalysis;

		harmonicModel = existing.harmonicModel;
		noiseModel = existing.noiseModel;
		regularizedCepstrumWarpingMethod = existing.regularizedCepstrumWarpingMethod;
		harmonicSynthesisMethodBeforeNoiseAnalysis = existing.harmonicSynthesisMethodBeforeNoiseAnalysis;

		useHarmonicAmplitudesDirectly = existing.useHarmonicAmplitudesDirectly;
		regularizedCepstrumLambdaHarmonic = existing.regularizedCepstrumLambdaHarmonic;
		useWeightingInRegularizedCepstrumEstimationHarmonic = existing.useWeightingInRegularizedCepstrumEstimationHarmonic;
		harmonicPartCepstrumOrderPreMel = existing.harmonicPartCepstrumOrderPreMel;
		harmonicPartCepstrumOrder = existing.harmonicPartCepstrumOrder;

		computeNoisePartLpOrderFromSamplingRate = existing.computeNoisePartLpOrderFromSamplingRate;
		noisePartLpOrder = existing.noisePartLpOrder;
		preemphasisCoefNoise = existing.preemphasisCoefNoise;
		hpfBeforeNoiseAnalysis = existing.hpfBeforeNoiseAnalysis;
		decimateNoiseWaveform = existing.decimateNoiseWaveform;
		overlapNoiseWaveformModel = existing.overlapNoiseWaveformModel;

		useNoiseAmplitudesDirectly = existing.useNoiseAmplitudesDirectly;
		regularizedCepstrumEstimationLambdaNoise = existing.regularizedCepstrumEstimationLambdaNoise;
		useWeightingInRegularizedCesptrumEstimationNoise = existing.useWeightingInRegularizedCesptrumEstimationNoise;
		noisePartCepstrumOderPre = existing.noisePartCepstrumOderPre;
		noisePartCepstrumOrder = existing.noisePartCepstrumOrder;
		usePosteriorMelWarpingNoise = existing.usePosteriorMelWarpingNoise;

		noiseF0InHz = existing.noiseF0InHz;
		hpfTransitionBandwidthInHz = existing.hpfTransitionBandwidthInHz;
		noiseAnalysisWindowDurationInSeconds = existing.noiseAnalysisWindowDurationInSeconds;
		overlapBetweenHarmonicAndNoiseRegionsInHz = existing.overlapBetweenHarmonicAndNoiseRegionsInHz;
		overlapBetweenTransientAndNontransientSectionsInSeconds = existing.overlapBetweenTransientAndNontransientSectionsInSeconds;

		harmonicAnalysisWindowType = existing.harmonicAnalysisWindowType;
		noiseAnalysisWindowType = existing.noiseAnalysisWindowType;

		numHarmonicsForVoicing = existing.numHarmonicsForVoicing;
		harmonicsNeigh = existing.harmonicsNeigh;

		numPeriodsHarmonicsExtraction = existing.numPeriodsHarmonicsExtraction;
		fftPeakPickerPeriods = existing.fftPeakPickerPeriods;
	}

	public boolean equals(HntmAnalyzerParams existing) {
		if (!hnmPitchVoicingAnalyzerParams.equals(existing.hnmPitchVoicingAnalyzerParams))
			return false;

		if (useJampackInAnalysis != existing.useJampackInAnalysis)
			return false;

		if (isSilentAnalysis != existing.isSilentAnalysis)
			return false;

		if (harmonicModel != existing.harmonicModel)
			return false;
		if (noiseModel != existing.noiseModel)
			return false;
		if (regularizedCepstrumWarpingMethod != existing.regularizedCepstrumWarpingMethod)
			return false;
		if (harmonicSynthesisMethodBeforeNoiseAnalysis != existing.harmonicSynthesisMethodBeforeNoiseAnalysis)
			return false;

		if (useHarmonicAmplitudesDirectly != existing.useHarmonicAmplitudesDirectly)
			return false;
		if (regularizedCepstrumLambdaHarmonic != existing.regularizedCepstrumLambdaHarmonic)
			return false;
		if (useWeightingInRegularizedCepstrumEstimationHarmonic != existing.useWeightingInRegularizedCepstrumEstimationHarmonic)
			return false;
		if (harmonicPartCepstrumOrderPreMel != existing.harmonicPartCepstrumOrderPreMel)
			return false;
		if (harmonicPartCepstrumOrder != existing.harmonicPartCepstrumOrder)
			return false;

		if (computeNoisePartLpOrderFromSamplingRate != existing.computeNoisePartLpOrderFromSamplingRate)
			return false;
		if (noisePartLpOrder != existing.noisePartLpOrder)
			return false;
		if (preemphasisCoefNoise != existing.preemphasisCoefNoise)
			return false;
		if (hpfBeforeNoiseAnalysis != existing.hpfBeforeNoiseAnalysis)
			return false;
		if (decimateNoiseWaveform != existing.decimateNoiseWaveform)
			return false;
		if (overlapNoiseWaveformModel != existing.overlapNoiseWaveformModel)
			return false;

		if (useNoiseAmplitudesDirectly != existing.useNoiseAmplitudesDirectly)
			return false;
		if (regularizedCepstrumEstimationLambdaNoise != existing.regularizedCepstrumEstimationLambdaNoise)
			return false;
		if (useWeightingInRegularizedCesptrumEstimationNoise != existing.useWeightingInRegularizedCesptrumEstimationNoise)
			return false;
		if (noisePartCepstrumOderPre != existing.noisePartCepstrumOderPre)
			return false;
		if (noisePartCepstrumOrder != existing.noisePartCepstrumOrder)
			return false;
		if (usePosteriorMelWarpingNoise != existing.usePosteriorMelWarpingNoise)
			return false;

		if (noiseF0InHz != existing.noiseF0InHz)
			return false;
		if (hpfTransitionBandwidthInHz != existing.hpfTransitionBandwidthInHz)
			return false;
		if (noiseAnalysisWindowDurationInSeconds != existing.noiseAnalysisWindowDurationInSeconds)
			return false;
		if (overlapBetweenHarmonicAndNoiseRegionsInHz != existing.overlapBetweenHarmonicAndNoiseRegionsInHz)
			return false;
		if (overlapBetweenTransientAndNontransientSectionsInSeconds != existing.overlapBetweenTransientAndNontransientSectionsInSeconds)
			return false;

		if (harmonicAnalysisWindowType != existing.harmonicAnalysisWindowType)
			return false;
		if (noiseAnalysisWindowType != existing.noiseAnalysisWindowType)
			return false;

		if (numHarmonicsForVoicing != existing.numHarmonicsForVoicing)
			return false;
		if (harmonicsNeigh != existing.harmonicsNeigh)
			return false;

		if (numPeriodsHarmonicsExtraction != existing.numPeriodsHarmonicsExtraction)
			return false;
		if (fftPeakPickerPeriods != existing.fftPeakPickerPeriods)
			return false;

		return true;
	}

	public void write(String binaryFile) throws IOException {
		DataOutputStream dos = new DataOutputStream(new FileOutputStream(new File(binaryFile)));

		write(dos);
	}

	public void write(DataOutputStream dos) throws IOException {
		hnmPitchVoicingAnalyzerParams.write(dos);

		dos.writeBoolean(useJampackInAnalysis);

		dos.writeBoolean(isSilentAnalysis);

		dos.writeInt(harmonicModel);

		dos.writeInt(noiseModel);

		dos.writeInt(regularizedCepstrumWarpingMethod);
		dos.writeInt(harmonicSynthesisMethodBeforeNoiseAnalysis);

		dos.writeBoolean(useHarmonicAmplitudesDirectly);
		dos.writeFloat(regularizedCepstrumLambdaHarmonic);
		dos.writeBoolean(useWeightingInRegularizedCepstrumEstimationHarmonic);
		dos.writeInt(harmonicPartCepstrumOrderPreMel);
		dos.writeInt(harmonicPartCepstrumOrder);

		dos.writeBoolean(computeNoisePartLpOrderFromSamplingRate);
		dos.writeInt(noisePartLpOrder);
		dos.writeFloat(preemphasisCoefNoise);
		dos.writeBoolean(hpfBeforeNoiseAnalysis);
		dos.writeBoolean(decimateNoiseWaveform);
		dos.writeBoolean(overlapNoiseWaveformModel);

		dos.writeBoolean(useNoiseAmplitudesDirectly);
		dos.writeFloat(regularizedCepstrumEstimationLambdaNoise);
		dos.writeBoolean(useWeightingInRegularizedCesptrumEstimationNoise);
		dos.writeInt(noisePartCepstrumOderPre);
		dos.writeInt(noisePartCepstrumOrder);
		dos.writeBoolean(usePosteriorMelWarpingNoise);

		dos.writeFloat(noiseF0InHz);
		dos.writeFloat(hpfTransitionBandwidthInHz);
		dos.writeFloat(noiseAnalysisWindowDurationInSeconds);
		dos.writeFloat(overlapBetweenHarmonicAndNoiseRegionsInHz);
		dos.writeFloat(overlapBetweenTransientAndNontransientSectionsInSeconds);

		dos.writeInt(harmonicAnalysisWindowType);
		dos.writeInt(noiseAnalysisWindowType);

		dos.writeInt(numHarmonicsForVoicing);
		dos.writeFloat(harmonicsNeigh);

		dos.writeFloat(numPeriodsHarmonicsExtraction);
		dos.writeFloat(fftPeakPickerPeriods);
	}

	public void read(String binaryFile) throws IOException {
		DataInputStream dis = new DataInputStream(new FileInputStream(new File(binaryFile)));

		read(dis);
	}

	public void read(DataInputStream dis) throws IOException {
		hnmPitchVoicingAnalyzerParams = new HnmPitchVoicingAnalyzerParams(dis);

		useJampackInAnalysis = dis.readBoolean();

		isSilentAnalysis = dis.readBoolean();

		harmonicModel = dis.readInt();

		noiseModel = dis.readInt();

		regularizedCepstrumWarpingMethod = dis.readInt();
		harmonicSynthesisMethodBeforeNoiseAnalysis = dis.readInt();

		useHarmonicAmplitudesDirectly = dis.readBoolean();
		regularizedCepstrumLambdaHarmonic = dis.readFloat();
		useWeightingInRegularizedCepstrumEstimationHarmonic = dis.readBoolean();
		harmonicPartCepstrumOrderPreMel = dis.readInt();
		harmonicPartCepstrumOrder = dis.readInt();

		computeNoisePartLpOrderFromSamplingRate = dis.readBoolean();
		noisePartLpOrder = dis.readInt();
		preemphasisCoefNoise = dis.readFloat();
		hpfBeforeNoiseAnalysis = dis.readBoolean();
		decimateNoiseWaveform = dis.readBoolean();
		overlapNoiseWaveformModel = dis.readBoolean();

		useNoiseAmplitudesDirectly = dis.readBoolean();
		regularizedCepstrumEstimationLambdaNoise = dis.readFloat();
		useWeightingInRegularizedCesptrumEstimationNoise = dis.readBoolean();
		noisePartCepstrumOderPre = dis.readInt();
		noisePartCepstrumOrder = dis.readInt();
		usePosteriorMelWarpingNoise = dis.readBoolean();

		noiseF0InHz = dis.readFloat();
		hpfTransitionBandwidthInHz = dis.readFloat();
		noiseAnalysisWindowDurationInSeconds = dis.readFloat();
		overlapBetweenHarmonicAndNoiseRegionsInHz = dis.readFloat();
		overlapBetweenTransientAndNontransientSectionsInSeconds = dis.readFloat();

		harmonicAnalysisWindowType = dis.readInt();
		noiseAnalysisWindowType = dis.readInt();

		numHarmonicsForVoicing = dis.readInt();
		harmonicsNeigh = dis.readFloat();

		numPeriodsHarmonicsExtraction = dis.readFloat();
		fftPeakPickerPeriods = dis.readFloat();
	}
}