/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.frequencywarp;
import edu.cmu.sphinx.frontend.BaseDataProcessor;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.frontend.DataProcessingException;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.util.props.*;
/**
* Computes the PLP cepstrum from a given PLP Spectrum. The power spectrum has the amplitude compressed by computing the
* cubed root of the PLP spectrum. This operation is an approximation to the power law of hearing and simulates the
* non-linear relationship between sound intensity and perceived loudness. Computationally, this operation is used to
* reduce the spectral amplitude of the critical band to enable all-pole modeling with relatively low order AR filters.
* The inverse discrete cosine transform (IDCT) is then applied to the autocorrelation coefficients. A linear prediction
* filter is then estimated from the autocorrelation values, and the linear prediction cepstrum (LPC cepstrum) is
* finally computed from the LP filter.
*
* @author <a href="mailto:rsingh@cs.cmu.edu">rsingh</a>
* @version 1.0
* @see LinearPredictor
*/
public class PLPCepstrumProducer extends BaseDataProcessor {
/** The property for the number of filters in the filter bank. */
@S4Integer(defaultValue = 32)
public static final String PROP_NUMBER_FILTERS = "numberFilters";
/** The property specifying the length of the cepstrum data. */
@S4Integer(defaultValue = 13)
public static final String PROP_CEPSTRUM_LENGTH
= "cepstrumLength";
/** The property specifying the LPC order. */
@S4Integer(defaultValue = 14)
public static final String PROP_LPC_ORDER = "lpcOrder";
private int cepstrumSize; // size of a Cepstrum
private int LPCOrder; // LPC Order to compute cepstrum
private int numberPLPFilters; // number of PLP filters
private double[][] cosine;
public PLPCepstrumProducer(int numberPLPFilters,int cepstrumSize,int LPCOrder) {
initLogger();
this.numberPLPFilters = numberPLPFilters;
this.cepstrumSize = cepstrumSize;
this.LPCOrder = LPCOrder;
}
public PLPCepstrumProducer() {
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
*/
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
numberPLPFilters = ps.getInt(PROP_NUMBER_FILTERS);
cepstrumSize = ps.getInt(PROP_CEPSTRUM_LENGTH);
LPCOrder = ps.getInt(PROP_LPC_ORDER);
}
/** Constructs a PLPCepstrumProducer */
@Override
public void initialize() {
super.initialize();
computeCosine();
}
/** Compute the Cosine values for IDCT. */
private void computeCosine() {
cosine = new double[LPCOrder + 1][numberPLPFilters];
double period = (double) 2 * numberPLPFilters;
for (int i = 0; i <= LPCOrder; i++) {
double frequency = 2 * Math.PI * i / period;
for (int j = 0; j < numberPLPFilters; j++) {
cosine[i][j] = Math.cos(frequency * (j + 0.5));
}
}
}
/**
* Applies the intensity loudness power law. This operation is an approximation to the power law of hearing and
* simulates the non-linear relationship between sound intensity and percieved loudness. Computationally, this
* operation is used to reduce the spectral amplitude of the critical band to enable all-pole modeling with
* relatively low order AR filters.
* @param inspectrum
*/
private double[] powerLawCompress(double[] inspectrum) {
double[] compressedspectrum = new double[inspectrum.length];
for (int i = 0; i < inspectrum.length; i++) {
compressedspectrum[i] = Math.pow(inspectrum[i], 1.0 / 3.0);
}
return compressedspectrum;
}
/**
* Returns the next Data object, which is the PLP cepstrum of the input frame. However, it can also be other Data
* objects like a EndPointSignal.
*
* @return the next available Data object, returns null if no Data object is available
* @throws DataProcessingException if there is an error reading the Data objects
*/
@Override
public Data getData() throws DataProcessingException {
Data input = getPredecessor().getData();
Data output = input;
if (input != null) {
if (input instanceof DoubleData) {
output = process((DoubleData) input);
}
}
return output;
}
/**
* Process data, creating the PLP cepstrum from an input audio frame.
*
* @param input a PLP Spectrum frame
* @return a PLP Data frame
* @throws IllegalArgumentException
*/
private Data process(DoubleData input) throws IllegalArgumentException {
double[] plpspectrum = input.getValues();
if (plpspectrum.length != numberPLPFilters) {
throw new IllegalArgumentException
("PLPSpectrum size is incorrect: plpspectrum.length == " +
plpspectrum.length + ", numberPLPFilters == " +
numberPLPFilters);
}
// power law compress spectrum
double[] compressedspectrum = powerLawCompress(plpspectrum);
// compute autocorrelation values
double[] autocor = applyCosine(compressedspectrum);
LinearPredictor LPC = new LinearPredictor(LPCOrder);
// Compute LPC Parameters
LPC.getARFilter(autocor);
// Compute LPC Cepstra
double[] cepstrumDouble = LPC.getData(cepstrumSize);
DoubleData cepstrum = new DoubleData
(cepstrumDouble, input.getSampleRate(),
input.getFirstSampleNumber());
return cepstrum;
}
/**
* Compute the discrete Cosine transform for the given power spectrum
*
* @param plpspectrum the PLPSpectrum data
* @return autocorrelation computed from PLP spectral values
*/
private double[] applyCosine(double[] plpspectrum) {
double[] autocor = new double[LPCOrder + 1];
double period = numberPLPFilters;
double beta = 0.5f;
// apply the idct
for (int i = 0; i <= LPCOrder; i++) {
if (numberPLPFilters > 0) {
double[] cosine_i = cosine[i];
int j = 0;
autocor[i] += (beta * plpspectrum[j] * cosine_i[j]);
for (j = 1; j < numberPLPFilters; j++) {
autocor[i] += (plpspectrum[j] * cosine_i[j]);
}
autocor[i] /= period;
}
}
return autocor;
}
}