MFCC.java example

Explorer
newFunf-master
/**
 * 
 * Funf: Open Sensing Framework
 * Copyright (C) 2010-2011 Nadav Aharony, Wei Pan, Alex Pentland.
 * Acknowledgments: Alan Gardner
 * Contact: nadav@media.mit.edu
 * 
 * This file is part of Funf.
 * 
 * Funf is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 * 
 * Funf is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with Funf. If not, see <http://www.gnu.org/licenses/>.
 * 
 */
package edu.mit.media.funf.math;


public class MFCC
{
	private static double minMelFreq = 0;
	private static double maxMelFreq = 4000;
	private static double lifterExp = 0.6;
	private int numCoeffs;
	private int melBands;
	private int numFreqs;
	private double sampleRate;
	public Matrix melWeights = null;
	public Matrix dctMat = null;
	public double[] lifterWeights;

	public MFCC(int fftSize, int numCoeffs, int melBands, double sampleRate)
	{
	    // Precompute mel-scale auditory perceptual spectrum
	    melWeights = new Matrix(melBands, fftSize, 0);
	    
	    // Number of non-redundant frequency bins
	    numFreqs = fftSize/2 + 1;
	    this.numCoeffs = numCoeffs;
	    this.melBands = melBands;
	    this.sampleRate = sampleRate;
	    
	    double fftFreqs[] = new double[fftSize];
	    for (int i = 0; i < fftSize; i ++)
	    {
	    	fftFreqs[i] = (double)i/(double)fftSize*this.sampleRate;
	    }
	    
	    double minMel = fhz2mel(minMelFreq);
	    double maxMel = fhz2mel(maxMelFreq);
	    
	    double binFreqs[] = new double[melBands + 2];
	    for (int i = 0; i < melBands + 2; i ++)
	    {
	    	binFreqs[i] = fmel2hz(minMel + (double)i/((double)melBands + 1.0) * (maxMel - minMel));
	    }
	    
	    for (int i = 0; i < melBands; i ++)
	    {
	    	for (int j = 0; j < fftSize; j ++)
	    	{
	    		double loSlope = (fftFreqs[j] - binFreqs[i])/(binFreqs[i+1] - binFreqs[i]);
		    	double hiSlope = (binFreqs[i+2] - fftFreqs[j])/(binFreqs[i+2] - binFreqs[i+1]);
		    	melWeights.A[i][j] = Math.max(0, Math.min(loSlope, hiSlope));
	    	}
	    }
	    
	    // Keep only positive frequency parts of Fourier transform
	    melWeights = melWeights.getMatrix(0, melBands - 1, 0, numFreqs - 1);
	    
	    // Precompute DCT matrix
	    dctMat = new Matrix(numCoeffs, melBands, 0);
	    double scale = Math.sqrt(2.0/melBands);
	    for (int i = 0; i < numCoeffs; i ++)
	    {
	    	for (int j = 0; j < melBands; j ++)
	    	{
	    		double phase = j*2 + 1;
		    	dctMat.A[i][j] = Math.cos((double)i*phase/(2.0*(double)melBands)*Math.PI)*scale;
	    	}
	    }
	    double root2 = 1.0/Math.sqrt(2.0);
	    for (int j = 0; j < melBands; j ++)
	    {
	    	dctMat.A[0][j] *= root2;
	    }
	    
	    // Precompute liftering vector
	    lifterWeights = new double[numCoeffs];
	    lifterWeights[0] = 1.0;
	    for (int i = 1; i < numCoeffs; i ++)
	    {
	    	lifterWeights[i] = Math.pow((double)i, lifterExp);
	    }
	}
	
	public double[] cepstrum(double[] re, double[] im)
	{
		Matrix powerSpec = new Matrix(numFreqs, 1);
		for (int i = 0; i < numFreqs; i ++)
		{
			powerSpec.A[i][0] = re[i]*re[i] + im[i]*im[i];
		}

		// melWeights - melBands x numFreqs
		// powerSpec  - numFreqs x 1
		// melWeights*powerSpec - melBands x 1
		// aSpec      - melBands x 1
		// dctMat     - numCoeffs x melBands
		// dctMat*log(aSpec) - numCoeffs x 1
		
		Matrix aSpec = melWeights.times(powerSpec);
		Matrix logMelSpec = new Matrix(melBands, 1);
		for (int i = 0; i < melBands; i ++)
		{
			logMelSpec.A[i][0] = Math.log(aSpec.A[i][0]);
		}

		Matrix melCeps = dctMat.times(logMelSpec);
			
		double[] ceps = new double[numCoeffs];
		for (int i = 0; i < numCoeffs; i ++)
		{
			ceps[i] = lifterWeights[i]*melCeps.A[i][0];
		}

		return ceps;
	}

	
	public double fmel2hz(double mel)
	{
		return 700.0*(Math.pow(10.0, mel/2595.0) - 1.0);
	}
	
	public double fhz2mel(double freq)
	{
		return 2595.0*Math.log10(1.0 + freq/700.0);
	}

}