/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.sinusoidal.hntm.analysis.pitch;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
/**
* A class for maximum frequency of voicing, f0, and voicing analysis for HNMs
*
* @author oytun.turk
*
*/
public class HnmPitchVoicingAnalyzerParams {
public float mvfAnalysisWindowSizeInSeconds; // Max. freq. of voicing analysis window size in seconds
public float mvfAnalysisSkipSizeInSeconds; // Max. freq. of voicing analysis skip size in seconds
public float f0AnalysisWindowSizeInSeconds; // F0 detection window size in seconds
public float f0AnalysisSkipSizeInSeconds; // F0 detection skip size in seconds
public int fftSize; // DFT length for frequency domain analyses
public int numFilteringStages; // Total consecutive median-moving average filtering steps to smooth out the max. freq. of
// voicing curve
public int medianFilterLength; // Length of median filter for smoothing the max. freq. of voicing contour
public int movingAverageFilterLength; // Length of first moving averaging filter for smoothing the max. freq. of voicing
// contour
public float numPeriodsInitialPitchEstimation; // Total number of periods to use for initial pitch estimation
public float cumulativeAmpThreshold; // Decreased ==> Voicing increases (Orig: 2.0f)
public float maximumAmpThresholdInDB; // Decreased ==> Voicing increases (Orig: 13.0f)
public float harmonicDeviationPercent; // Increased ==> Voicing increases (Orig: 20.0f)
public float sharpPeakAmpDiffInDB; // Decreased ==> Voicing increases (Orig: 12.0f)
public int minimumTotalHarmonics; // Minimum number of total harmonics to be included in voiced region (effective only when
// f0>10.0)
public int maximumTotalHarmonics; // Maximum number of total harmonics to be included in voiced region (effective only when
// f0>10.0)
public float minimumVoicedFrequencyOfVoicing; // All voiced sections will have at least this freq. of voicing
public float maximumVoicedFrequencyOfVoicing; // All voiced sections will have at most this freq. of voicing
public float maximumFrequencyOfVoicingFinalShift; // The max freq. of voicing contour is shifted by this amount finally
public float runningMeanVoicingThreshold; // Between 0.0 and 1.0, decrease ==> Max. voicing freq increases
public int lastCorrelatedHarmonicNeighbour; // Assume correlation between at most among this many harmonics (-1 ==> full
// correlation approach)
public float vuvSearchMinHarmonicMultiplier; // Multiplied with f0, gives the minimum frequency above which voicing detection
// will be carried outv
public float vuvSearchMaxHarmonicMultiplier; // Multiplied with f0, gives the maximum frequency below which voicing detection
// will be carried out
public float neighsPercent; // Should be between 0.0f and 100.0f. 50.0f means the peak in the band should be greater than 50%
// of the half of the band samples
public HnmPitchVoicingAnalyzerParams() {
mvfAnalysisWindowSizeInSeconds = 0.040f;
mvfAnalysisSkipSizeInSeconds = 0.010f;
f0AnalysisWindowSizeInSeconds = 0.040f;
f0AnalysisSkipSizeInSeconds = 0.005f;
fftSize = 4096;
numFilteringStages = 2; // 2; //Total consecutive median-moving average filtering steps to smooth out the max. freq. of
// voicing curve
medianFilterLength = 12; // 12; //Length of median filter for smoothing the max. freq. of voicing contour
movingAverageFilterLength = 12; // 12; //Length of first moving averaging filter for smoothing the max. freq. of voicing
// contour
numPeriodsInitialPitchEstimation = 3.0f;
cumulativeAmpThreshold = 2.0f; // Decreased ==> Voicing increases (Orig: 2.0f)
maximumAmpThresholdInDB = 13.0f; // Decreased ==> Voicing increases (Orig: 13.0f)
harmonicDeviationPercent = 20.0f; // Increased ==> Voicing increases (Orig: 20.0f)
sharpPeakAmpDiffInDB = 12.0f; // Decreased ==> Voicing increases (Orig: 12.0f)
minimumTotalHarmonics = 0; // Minimum number of total harmonics to be included in voiced region (effective only when
// f0>10.0)
maximumTotalHarmonics = 100; // Maximum number of total harmonics to be included in voiced region (effective only when
// f0>10.0)
minimumVoicedFrequencyOfVoicing = 5000.0f; // All voiced sections will have at least this freq. of voicing
maximumVoicedFrequencyOfVoicing = 5000.0f; // All voiced sections will have at most this freq. of voicing
maximumFrequencyOfVoicingFinalShift = 0.0f; // The max freq. of voicing contour is shifted by this amount finally
runningMeanVoicingThreshold = 0.5f; // Between 0.0 and 1.0, decrease ==> Max. voicing freq increases
lastCorrelatedHarmonicNeighbour = -1; // Assume correlation between at most among this many harmonics (-1 ==> full
// correlation approach)
// For voicing detection
vuvSearchMinHarmonicMultiplier = 0.7f; // Multiplied with f0, gives the minimum frequency above which voicing detection
// will be carried out
vuvSearchMaxHarmonicMultiplier = 4.3f; // Multiplied with f0, gives the maximum frequency below which voicing detection
// will be carried out
//
neighsPercent = 50.0f; // Should be between 0.0f and 100.0f. 50.0f means the peak in the band should be greater than 50%
// of the half of the band samples
// //
}
public HnmPitchVoicingAnalyzerParams(DataInputStream dis) {
try {
read(dis);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public HnmPitchVoicingAnalyzerParams(HnmPitchVoicingAnalyzerParams existing) {
mvfAnalysisWindowSizeInSeconds = existing.mvfAnalysisWindowSizeInSeconds;
mvfAnalysisSkipSizeInSeconds = existing.mvfAnalysisSkipSizeInSeconds;
f0AnalysisWindowSizeInSeconds = existing.f0AnalysisWindowSizeInSeconds;
f0AnalysisSkipSizeInSeconds = existing.f0AnalysisSkipSizeInSeconds;
fftSize = existing.fftSize;
numFilteringStages = existing.numFilteringStages;
medianFilterLength = existing.medianFilterLength;
movingAverageFilterLength = existing.movingAverageFilterLength;
numPeriodsInitialPitchEstimation = existing.numPeriodsInitialPitchEstimation;
cumulativeAmpThreshold = existing.cumulativeAmpThreshold;
maximumAmpThresholdInDB = existing.maximumAmpThresholdInDB;
harmonicDeviationPercent = existing.harmonicDeviationPercent;
sharpPeakAmpDiffInDB = existing.sharpPeakAmpDiffInDB;
minimumTotalHarmonics = existing.minimumTotalHarmonics;
maximumTotalHarmonics = existing.maximumTotalHarmonics;
minimumVoicedFrequencyOfVoicing = existing.minimumVoicedFrequencyOfVoicing;
maximumVoicedFrequencyOfVoicing = existing.maximumVoicedFrequencyOfVoicing;
maximumFrequencyOfVoicingFinalShift = existing.maximumFrequencyOfVoicingFinalShift;
runningMeanVoicingThreshold = existing.runningMeanVoicingThreshold;
lastCorrelatedHarmonicNeighbour = existing.lastCorrelatedHarmonicNeighbour;
vuvSearchMinHarmonicMultiplier = existing.vuvSearchMinHarmonicMultiplier;
vuvSearchMaxHarmonicMultiplier = existing.vuvSearchMaxHarmonicMultiplier;
neighsPercent = existing.neighsPercent;
}
public boolean equals(HnmPitchVoicingAnalyzerParams existing) {
if (mvfAnalysisWindowSizeInSeconds != existing.mvfAnalysisWindowSizeInSeconds)
return false;
if (mvfAnalysisSkipSizeInSeconds != existing.mvfAnalysisSkipSizeInSeconds)
return false;
if (f0AnalysisWindowSizeInSeconds != existing.f0AnalysisWindowSizeInSeconds)
return false;
if (f0AnalysisSkipSizeInSeconds != existing.f0AnalysisSkipSizeInSeconds)
return false;
if (fftSize != existing.fftSize)
return false;
if (numFilteringStages != existing.numFilteringStages)
return false;
if (medianFilterLength != existing.medianFilterLength)
return false;
if (movingAverageFilterLength != existing.movingAverageFilterLength)
return false;
if (numPeriodsInitialPitchEstimation != existing.numPeriodsInitialPitchEstimation)
return false;
if (cumulativeAmpThreshold != existing.cumulativeAmpThreshold)
return false;
if (maximumAmpThresholdInDB != existing.maximumAmpThresholdInDB)
return false;
if (harmonicDeviationPercent != existing.harmonicDeviationPercent)
return false;
if (sharpPeakAmpDiffInDB != existing.sharpPeakAmpDiffInDB)
return false;
if (minimumTotalHarmonics != existing.minimumTotalHarmonics)
return false;
if (maximumTotalHarmonics != existing.maximumTotalHarmonics)
return false;
if (minimumVoicedFrequencyOfVoicing != existing.minimumVoicedFrequencyOfVoicing)
return false;
if (maximumVoicedFrequencyOfVoicing != existing.maximumVoicedFrequencyOfVoicing)
return false;
if (maximumFrequencyOfVoicingFinalShift != existing.maximumFrequencyOfVoicingFinalShift)
return false;
if (runningMeanVoicingThreshold != existing.runningMeanVoicingThreshold)
return false;
if (lastCorrelatedHarmonicNeighbour != existing.lastCorrelatedHarmonicNeighbour)
return false;
if (vuvSearchMinHarmonicMultiplier != existing.vuvSearchMinHarmonicMultiplier)
return false;
if (vuvSearchMaxHarmonicMultiplier != existing.vuvSearchMaxHarmonicMultiplier)
return false;
if (neighsPercent != existing.neighsPercent)
return false;
return true;
}
public void write(DataOutputStream dos) throws IOException {
dos.writeFloat(mvfAnalysisWindowSizeInSeconds);
dos.writeFloat(mvfAnalysisSkipSizeInSeconds);
dos.writeFloat(f0AnalysisWindowSizeInSeconds);
dos.writeFloat(f0AnalysisSkipSizeInSeconds);
dos.writeInt(fftSize);
dos.writeInt(numFilteringStages);
dos.writeInt(medianFilterLength);
dos.writeInt(movingAverageFilterLength);
dos.writeFloat(numPeriodsInitialPitchEstimation);
dos.writeDouble(cumulativeAmpThreshold);
dos.writeDouble(maximumAmpThresholdInDB);
dos.writeDouble(harmonicDeviationPercent);
dos.writeDouble(sharpPeakAmpDiffInDB);
dos.writeInt(minimumTotalHarmonics);
dos.writeInt(maximumTotalHarmonics);
dos.writeFloat(minimumVoicedFrequencyOfVoicing);
dos.writeFloat(maximumVoicedFrequencyOfVoicing);
dos.writeFloat(maximumFrequencyOfVoicingFinalShift);
dos.writeFloat(runningMeanVoicingThreshold);
dos.writeInt(lastCorrelatedHarmonicNeighbour);
dos.writeDouble(vuvSearchMinHarmonicMultiplier);
dos.writeDouble(vuvSearchMaxHarmonicMultiplier);
dos.writeDouble(neighsPercent);
}
public void read(DataInputStream dis) throws IOException {
mvfAnalysisWindowSizeInSeconds = dis.readFloat();
mvfAnalysisSkipSizeInSeconds = dis.readFloat();
f0AnalysisWindowSizeInSeconds = dis.readFloat();
f0AnalysisSkipSizeInSeconds = dis.readFloat();
fftSize = dis.readInt();
numFilteringStages = dis.readInt();
medianFilterLength = dis.readInt();
movingAverageFilterLength = dis.readInt();
numPeriodsInitialPitchEstimation = dis.readFloat();
cumulativeAmpThreshold = dis.readFloat();
maximumAmpThresholdInDB = dis.readFloat();
harmonicDeviationPercent = dis.readFloat();
sharpPeakAmpDiffInDB = dis.readFloat();
minimumTotalHarmonics = dis.readInt();
maximumTotalHarmonics = dis.readInt();
minimumVoicedFrequencyOfVoicing = dis.readFloat();
maximumVoicedFrequencyOfVoicing = dis.readFloat();
maximumFrequencyOfVoicingFinalShift = dis.readFloat();
runningMeanVoicingThreshold = dis.readFloat();
lastCorrelatedHarmonicNeighbour = dis.readInt();
vuvSearchMinHarmonicMultiplier = dis.readFloat();
vuvSearchMaxHarmonicMultiplier = dis.readFloat();
neighsPercent = dis.readFloat();
}
}