/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.frequencywarp;
import edu.cmu.sphinx.frontend.BaseDataProcessor;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.frontend.DataProcessingException;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.util.props.*;
/**
* Filters an input power spectrum through a PLP filterbank. The filters in the filterbank are placed in the frequency
* axis so as to mimic the critical band, representing different perceptual effect at different frequency bands. The
* filter outputs are also scaled for equal loudness preemphasis. The filter shapes are defined by the {@link PLPFilter}
* class. Like the {@link MelFrequencyFilterBank2}, this filter bank has characteristics defined by the {@link
* #PROP_NUMBER_FILTERS number of filters}, the {@link #PROP_MIN_FREQ minimum frequency}, and the {@link #PROP_MAX_FREQ
* maximum frequency}. Unlike the {@link MelFrequencyFilterBank2}, the minimum and maximum frequencies here refer to the
* <b>center</b> frequencies of the filters located at the leftmost and rightmost positions, and not to the edges.
* Therefore, this filter bank spans a frequency range that goes beyond the limits suggested by the minimum and maximum
* frequencies.
*
* @author <a href="mailto:rsingh@cs.cmu.edu">rsingh</a>
* @version 1.0
* @see PLPFilter
*/
public class PLPFrequencyFilterBank extends BaseDataProcessor {
/** The property for the number of filters in the filterbank. */
@S4Integer(defaultValue = 32)
public static final String PROP_NUMBER_FILTERS = "numberFilters";
/** The property for the center frequency of the lowest filter in the filterbank. */
@S4Double(defaultValue = 130.0)
public static final String PROP_MIN_FREQ = "minimumFrequency";
/** The property for the center frequency of the highest filter in the filterbank. */
@S4Double(defaultValue = 3600.0)
public static final String PROP_MAX_FREQ = "maximumFrequency";
private int sampleRate;
private int numberFftPoints;
private int numberFilters;
private double minFreq;
private double maxFreq;
private PLPFilter[] criticalBandFilter;
private double[] equalLoudnessScaling;
public PLPFrequencyFilterBank(double minFreq, double maxFreq, int numberFilters) {
initLogger();
this.minFreq = minFreq;
this.maxFreq = maxFreq;
this.numberFilters = numberFilters;
}
public PLPFrequencyFilterBank() {
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
*/
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
minFreq = ps.getDouble(PROP_MIN_FREQ);
maxFreq = ps.getDouble(PROP_MAX_FREQ);
numberFilters = ps.getInt(PROP_NUMBER_FILTERS);
}
/** Initializes this PLPFrequencyFilterBank object */
@Override
public void initialize() {
super.initialize();
}
/**
* Build a PLP filterbank with the parameters given. The center frequencies of the PLP filters will be uniformly
* spaced between the minimum and maximum analysis frequencies on the Bark scale. on the Bark scale.
*
* @throws IllegalArgumentException
*/
private void buildCriticalBandFilterbank() throws IllegalArgumentException {
double minBarkFreq;
double maxBarkFreq;
double deltaBarkFreq;
double nyquistFreq;
double centerFreq;
int numberDFTPoints = (numberFftPoints >> 1) + 1;
double[] DFTFrequencies;
/* This is the same class of warper called by PLPFilter.java */
FrequencyWarper bark = new FrequencyWarper();
this.criticalBandFilter = new PLPFilter[numberFilters];
if (numberFftPoints == 0) {
throw new IllegalArgumentException("Number of FFT points is zero");
}
if (numberFilters < 1) {
throw new IllegalArgumentException("Number of filters illegal: "
+ numberFilters);
}
DFTFrequencies = new double[numberDFTPoints];
nyquistFreq = sampleRate / 2;
for (int i = 0; i < numberDFTPoints; i++) {
DFTFrequencies[i] = i * nyquistFreq /
(numberDFTPoints - 1);
}
/**
* Find center frequencies of filters in the Bark scale
* translate to linear frequency and create PLP filters
* with these center frequencies.
*
* Note that minFreq and maxFreq specify the CENTER FREQUENCIES
* of the lowest and highest PLP filters
*/
minBarkFreq = bark.hertzToBark(minFreq);
maxBarkFreq = bark.hertzToBark(maxFreq);
if (numberFilters < 1) {
throw new IllegalArgumentException("Number of filters illegal: "
+ numberFilters);
}
deltaBarkFreq = (maxBarkFreq - minBarkFreq) / (numberFilters + 1);
for (int i = 0; i < numberFilters; i++) {
centerFreq = bark.barkToHertz(minBarkFreq + i * deltaBarkFreq);
criticalBandFilter[i] = new PLPFilter(DFTFrequencies, centerFreq);
}
}
/**
* This function return the equal loudness preemphasis factor at any frequency. The preemphasis function is given
* by
* <p>
* E(w) = f^4 / (f^2 + 1.6e5) ^ 2 * (f^2 + 1.44e6) / (f^2 + 9.61e6)
* <p>
* This is more modern one from HTK, for some reason it's preferred over old variant, and
* it doesn't require conversion to radians
* <p>
* E(w) = (w^2+56.8e6)*w^4/((w^2+6.3e6)^2(w^2+0.38e9)(w^6+9.58e26))
* <p>
* where w is frequency in radians/second
* @param freq
*/
private double loudnessScalingFunction(double freq) {
double fsq = freq * freq;
double fsub = fsq / (fsq + 1.6e5);
return fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
}
/** Create an array of equal loudness preemphasis scaling terms for all the filters */
private void buildEqualLoudnessScalingFactors() {
double centerFreq;
equalLoudnessScaling = new double[numberFilters];
for (int i = 0; i < numberFilters; i++) {
centerFreq = criticalBandFilter[i].centerFreqInHz;
equalLoudnessScaling[i] = loudnessScalingFunction(centerFreq);
}
}
/**
* Process data, creating the power spectrum from an input audio frame.
*
* @param input input power spectrum
* @return PLP power spectrum
* @throws java.lang.IllegalArgumentException
*
*/
private DoubleData process(DoubleData input) throws
IllegalArgumentException {
double[] in = input.getValues();
if (criticalBandFilter == null ||
sampleRate != input.getSampleRate()) {
numberFftPoints = (in.length - 1) << 1;
sampleRate = input.getSampleRate();
buildCriticalBandFilterbank();
buildEqualLoudnessScalingFactors();
} else if (in.length != ((numberFftPoints >> 1) + 1)) {
throw new IllegalArgumentException
("Window size is incorrect: in.length == " + in.length +
", numberFftPoints == " + ((numberFftPoints >> 1) + 1));
}
double[] outputPLPSpectralArray = new double[numberFilters];
/**
* Filter input power spectrum
*/
for (int i = 0; i < numberFilters; i++) {
// First compute critical band filter output
outputPLPSpectralArray[i] = criticalBandFilter[i].filterOutput(in);
// Then scale it for equal loudness preemphasis
outputPLPSpectralArray[i] *= equalLoudnessScaling[i];
}
DoubleData output = new DoubleData
(outputPLPSpectralArray, input.getSampleRate(),
input.getFirstSampleNumber());
return output;
}
/**
* Reads the next Data object, which is the power spectrum of an audio input frame. However, it can also be other
* Data objects like a Signal, which is returned unmodified.
*
* @return the next available Data object, returns null if no Data object is available
* @throws DataProcessingException if there is a data processing error
*/
@Override
public Data getData() throws DataProcessingException {
Data input = getPredecessor().getData();
if (input != null) {
if (input instanceof DoubleData) {
input = process((DoubleData) input);
}
}
return input;
}
}