/**
*
* Funf: Open Sensing Framework
* Copyright (C) 2010-2011 Nadav Aharony, Wei Pan, Alex Pentland.
* Acknowledgments: Alan Gardner
* Contact: nadav@media.mit.edu
*
* This file is part of Funf.
*
* Funf is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Funf is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Funf. If not, see <http://www.gnu.org/licenses/>.
*
*/
package edu.mit.media.funf.math;
public class MFCC
{
private static double minMelFreq = 0;
private static double maxMelFreq = 4000;
private static double lifterExp = 0.6;
private int numCoeffs;
private int melBands;
private int numFreqs;
private double sampleRate;
public Matrix melWeights = null;
public Matrix dctMat = null;
public double[] lifterWeights;
public MFCC(int fftSize, int numCoeffs, int melBands, double sampleRate)
{
// Precompute mel-scale auditory perceptual spectrum
melWeights = new Matrix(melBands, fftSize, 0);
// Number of non-redundant frequency bins
numFreqs = fftSize/2 + 1;
this.numCoeffs = numCoeffs;
this.melBands = melBands;
this.sampleRate = sampleRate;
double fftFreqs[] = new double[fftSize];
for (int i = 0; i < fftSize; i ++)
{
fftFreqs[i] = (double)i/(double)fftSize*this.sampleRate;
}
double minMel = fhz2mel(minMelFreq);
double maxMel = fhz2mel(maxMelFreq);
double binFreqs[] = new double[melBands + 2];
for (int i = 0; i < melBands + 2; i ++)
{
binFreqs[i] = fmel2hz(minMel + (double)i/((double)melBands + 1.0) * (maxMel - minMel));
}
for (int i = 0; i < melBands; i ++)
{
for (int j = 0; j < fftSize; j ++)
{
double loSlope = (fftFreqs[j] - binFreqs[i])/(binFreqs[i+1] - binFreqs[i]);
double hiSlope = (binFreqs[i+2] - fftFreqs[j])/(binFreqs[i+2] - binFreqs[i+1]);
melWeights.A[i][j] = Math.max(0, Math.min(loSlope, hiSlope));
}
}
// Keep only positive frequency parts of Fourier transform
melWeights = melWeights.getMatrix(0, melBands - 1, 0, numFreqs - 1);
// Precompute DCT matrix
dctMat = new Matrix(numCoeffs, melBands, 0);
double scale = Math.sqrt(2.0/melBands);
for (int i = 0; i < numCoeffs; i ++)
{
for (int j = 0; j < melBands; j ++)
{
double phase = j*2 + 1;
dctMat.A[i][j] = Math.cos((double)i*phase/(2.0*(double)melBands)*Math.PI)*scale;
}
}
double root2 = 1.0/Math.sqrt(2.0);
for (int j = 0; j < melBands; j ++)
{
dctMat.A[0][j] *= root2;
}
// Precompute liftering vector
lifterWeights = new double[numCoeffs];
lifterWeights[0] = 1.0;
for (int i = 1; i < numCoeffs; i ++)
{
lifterWeights[i] = Math.pow((double)i, lifterExp);
}
}
public double[] cepstrum(double[] re, double[] im)
{
Matrix powerSpec = new Matrix(numFreqs, 1);
for (int i = 0; i < numFreqs; i ++)
{
powerSpec.A[i][0] = re[i]*re[i] + im[i]*im[i];
}
// melWeights - melBands x numFreqs
// powerSpec - numFreqs x 1
// melWeights*powerSpec - melBands x 1
// aSpec - melBands x 1
// dctMat - numCoeffs x melBands
// dctMat*log(aSpec) - numCoeffs x 1
Matrix aSpec = melWeights.times(powerSpec);
Matrix logMelSpec = new Matrix(melBands, 1);
for (int i = 0; i < melBands; i ++)
{
logMelSpec.A[i][0] = Math.log(aSpec.A[i][0]);
}
Matrix melCeps = dctMat.times(logMelSpec);
double[] ceps = new double[numCoeffs];
for (int i = 0; i < numCoeffs; i ++)
{
ceps[i] = lifterWeights[i]*melCeps.A[i][0];
}
return ceps;
}
public double fmel2hz(double mel)
{
return 700.0*(Math.pow(10.0, mel/2595.0) - 1.0);
}
public double fhz2mel(double freq)
{
return 2595.0*Math.log10(1.0 + freq/700.0);
}
}