/* * Copyright (c) 2007 - 2008 by Damien Di Fede <ddf@compartmental.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as published * by the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package ddf.minim.analysis; import ddf.minim.Minim; /** * FFT stands for Fast Fourier Transform. It is an efficient way to calculate the Complex * Discrete Fourier Transform. There is not much to say about this class other than the fact * that when you want to analyze the spectrum of an audio buffer you will almost always use * this class. One restriction of this class is that the audio buffers you want to analyze * must have a length that is a power of two. If you try to construct an FFT with a * <code>timeSize</code> that is not a power of two, an IllegalArgumentException will be * thrown. * <p> * A Fourier Transform is an algorithm that transforms a signal in the time * domain, such as a sample buffer, into a signal in the frequency domain, often * called the spectrum. The spectrum does not represent individual frequencies, * but actually represents frequency bands centered on particular frequencies. * The center frequency of each band is usually expressed as a fraction of the * sampling rate of the time domain signal and is equal to the index of the * frequency band divided by the total number of bands. The total number of * frequency bands is usually equal to the length of the time domain signal, but * access is only provided to frequency bands with indices less than half the * length, because they correspond to frequencies below the <a * href="http://en.wikipedia.org/wiki/Nyquist_frequency">Nyquist frequency</a>. * In other words, given a signal of length <code>N</code>, there will be * <code>N/2</code> frequency bands in the spectrum. * <p> * As an example, if you construct an FFT with a * <code>timeSize</code> of 1024 and and a <code>sampleRate</code> of 44100 * Hz, then the spectrum will contain values for frequencies below 22010 Hz, * which is the Nyquist frequency (half the sample rate). If you ask for the * value of band number 5, this will correspond to a frequency band centered on * <code>5/1024 * 44100 = 0.0048828125 * 44100 = 215 Hz</code>. The width of * that frequency band is equal to <code>2/1024</code>, expressed as a * fraction of the total bandwidth of the spectrum. The total bandwith of the * spectrum is equal to the Nyquist frequency, which in this case is 22050, so * the bandwidth is equal to about 50 Hz. It is not necessary for you to * remember all of these relationships, though it is good to be aware of them. * The function <code>getFreq()</code> allows you to query the spectrum with a * frequency in Hz and the function <code>getBandWidth()</code> will return * the bandwidth in Hz of each frequency band in the spectrum. * <p> * <b>Usage</b> * <p> * A typical usage of the FFT is to analyze a signal so that the * frequency spectrum may be represented in some way, typically with vertical * lines. You could do this in Processing with the following code, where * <code>audio</code> is an AudioSource and <code>fft</code> is an FFT. * * <pre> * fft.forward(audio.left); * for (int i = 0; i < fft.specSize(); i++) * { * // draw the line for frequency band i, scaling it by 4 so we can see it a bit better * line(i, height, i, height - fft.getBand(i) * 4); * } * </pre> * * <b>Windowing</b> * <p> * Windowing is the process of shaping the audio samples before transforming them * to the frequency domain. The Fourier Transform assumes the sample buffer is is a * repetitive signal, if a sample buffer is not truly periodic within the measured * interval sharp discontinuities may arise that can introduce spectral leakage. * Spectral leakage is the speading of signal energy across multiple FFT bins. This * "spreading" can drown out narrow band signals and hinder detection. * </p> * <p> * A <a href="http://en.wikipedia.org/wiki/Window_function">windowing function</a> * attempts to reduce spectral leakage by attenuating the measured sample buffer * at its end points to eliminate discontinuities. If you call the <code>window()</code> * function with an appropriate WindowFunction, such as <code>HammingWindow()</code>, * the sample buffers passed to the object for analysis will be shaped by the current * window before being transformed. The result of using a window is to reduce * the leakage in the spectrum somewhat. * <p> * <b>Averages</b> * <p> * FFT also has functions that allow you to request the creation of * an average spectrum. An average spectrum is simply a spectrum with fewer * bands than the full spectrum where each average band is the average of the * amplitudes of some number of contiguous frequency bands in the full spectrum. * <p> * <code>linAverages()</code> allows you to specify the number of averages * that you want and will group frequency bands into groups of equal number. So * if you have a spectrum with 512 frequency bands and you ask for 64 averages, * each average will span 8 bands of the full spectrum. * <p> * <code>logAverages()</code> will group frequency bands by octave and allows * you to specify the size of the smallest octave to use (in Hz) and also how * many bands to split each octave into. So you might ask for the smallest * octave to be 60 Hz and to split each octave into two bands. The result is * that the bandwidth of each average is different. One frequency is an octave * above another when it's frequency is twice that of the lower frequency. So, * 120 Hz is an octave above 60 Hz, 240 Hz is an octave above 120 Hz, and so on. * When octaves are split, they are split based on Hz, so if you split the * octave 60-120 Hz in half, you will get 60-90Hz and 90-120Hz. You can see how * these bandwidths increase as your octave sizes grow. For instance, the last * octave will always span <code>sampleRate/4 - sampleRate/2</code>, which in * the case of audio sampled at 44100 Hz is 11025-22010 Hz. These * logarithmically spaced averages are usually much more useful than the full * spectrum or the linearly spaced averages because they map more directly to * how humans perceive sound. * <p> * <code>calcAvg()</code> allows you to specify the frequency band you want an * average calculated for. You might ask for 60-500Hz and this function will * group together the bands from the full spectrum that fall into that range and * average their amplitudes for you. * <p> * If you don't want any averages calculated, then you can call * <code>noAverages()</code>. This will not impact your ability to use * <code>calcAvg()</code>, it will merely prevent the object from calculating * an average array every time you use <code>forward()</code>. * <p> * <b>Inverse Transform</b> * <p> * FFT also supports taking the inverse transform of a spectrum. * This means that a frequency spectrum will be transformed into a time domain * signal and placed in a provided sample buffer. The length of the time domain * signal will be <code>timeSize()</code> long. The <code>set</code> and * <code>scale</code> functions allow you the ability to shape the spectrum * already stored in the object before taking the inverse transform. You might * use these to filter frequencies in a spectrum or modify it in some other way. * * @example Basics/AnalyzeSound * * @see FourierTransform * @see <a href="http://www.dspguide.com/ch12.htm">The Fast Fourier Transform</a> * * @author Damien Di Fede * */ public class FFT extends FourierTransform { /** * Constructs an FFT that will accept sample buffers that are * <code>timeSize</code> long and have been recorded with a sample rate of * <code>sampleRate</code>. <code>timeSize</code> <em>must</em> be a * power of two. This will throw an exception if it is not. * * @param timeSize * int: the length of the sample buffers you will be analyzing * @param sampleRate * float: the sample rate of the audio you will be analyzing */ public FFT(int timeSize, float sampleRate) { super(timeSize, sampleRate); if ((timeSize & (timeSize - 1)) != 0) { throw new IllegalArgumentException("FFT: timeSize must be a power of two."); } buildReverseTable(); buildTrigTables(); } protected void allocateArrays() { spectrum = new float[timeSize / 2 + 1]; real = new float[timeSize]; imag = new float[timeSize]; } public void scaleBand(int i, float s) { if (s < 0) { Minim.error("Can't scale a frequency band by a negative value."); return; } real[i] *= s; imag[i] *= s; spectrum[i] *= s; if (i != 0 && i != timeSize / 2) { real[timeSize - i] = real[i]; imag[timeSize - i] = -imag[i]; } } public void setBand(int i, float a) { if (a < 0) { Minim.error("Can't set a frequency band to a negative value."); return; } if (real[i] == 0 && imag[i] == 0) { real[i] = a; spectrum[i] = a; } else { real[i] /= spectrum[i]; imag[i] /= spectrum[i]; spectrum[i] = a; real[i] *= spectrum[i]; imag[i] *= spectrum[i]; } if (i != 0 && i != timeSize / 2) { real[timeSize - i] = real[i]; imag[timeSize - i] = -imag[i]; } } // performs an in-place fft on the data in the real and imag arrays // bit reversing is not necessary as the data will already be bit reversed private void fft() { for (int halfSize = 1; halfSize < real.length; halfSize *= 2) { // float k = -(float)Math.PI/halfSize; // phase shift step // float phaseShiftStepR = (float)Math.cos(k); // float phaseShiftStepI = (float)Math.sin(k); // using lookup table float phaseShiftStepR = cos(halfSize); float phaseShiftStepI = sin(halfSize); // current phase shift float currentPhaseShiftR = 1.0f; float currentPhaseShiftI = 0.0f; for (int fftStep = 0; fftStep < halfSize; fftStep++) { for (int i = fftStep; i < real.length; i += 2 * halfSize) { int off = i + halfSize; float tr = (currentPhaseShiftR * real[off]) - (currentPhaseShiftI * imag[off]); float ti = (currentPhaseShiftR * imag[off]) + (currentPhaseShiftI * real[off]); real[off] = real[i] - tr; imag[off] = imag[i] - ti; real[i] += tr; imag[i] += ti; } float tmpR = currentPhaseShiftR; currentPhaseShiftR = (tmpR * phaseShiftStepR) - (currentPhaseShiftI * phaseShiftStepI); currentPhaseShiftI = (tmpR * phaseShiftStepI) + (currentPhaseShiftI * phaseShiftStepR); } } } public void forward(float[] buffer) { if (buffer.length != timeSize) { Minim .error("FFT.forward: The length of the passed sample buffer must be equal to timeSize()."); return; } doWindow(buffer); // copy samples to real/imag in bit-reversed order bitReverseSamples(buffer, 0); // perform the fft fft(); // fill the spectrum buffer with amplitudes fillSpectrum(); } @Override public void forward(float[] buffer, int startAt) { if ( buffer.length - startAt < timeSize ) { Minim.error( "FourierTransform.forward: not enough samples in the buffer between " + startAt + " and " + buffer.length + " to perform a transform." ); return; } currentWindow.apply( buffer, startAt, timeSize ); bitReverseSamples(buffer, startAt); fft(); fillSpectrum(); } /** * Performs a forward transform on the passed buffers. * * @param buffReal the real part of the time domain signal to transform * @param buffImag the imaginary part of the time domain signal to transform */ public void forward(float[] buffReal, float[] buffImag) { if (buffReal.length != timeSize || buffImag.length != timeSize) { Minim .error("FFT.forward: The length of the passed buffers must be equal to timeSize()."); return; } setComplex(buffReal, buffImag); bitReverseComplex(); fft(); fillSpectrum(); } public void inverse(float[] buffer) { if (buffer.length > real.length) { Minim .error("FFT.inverse: the passed array's length must equal FFT.timeSize()."); return; } // conjugate for (int i = 0; i < timeSize; i++) { imag[i] *= -1; } bitReverseComplex(); fft(); // copy the result in real into buffer, scaling as we do for (int i = 0; i < buffer.length; i++) { buffer[i] = real[i] / real.length; } } private int[] reverse; private void buildReverseTable() { int N = timeSize; reverse = new int[N]; // set up the bit reversing table reverse[0] = 0; for (int limit = 1, bit = N / 2; limit < N; limit <<= 1, bit >>= 1) for (int i = 0; i < limit; i++) reverse[i + limit] = reverse[i] + bit; } // copies the values in the samples array into the real array // in bit reversed order. the imag array is filled with zeros. private void bitReverseSamples(float[] samples, int startAt) { for (int i = 0; i < timeSize; ++i) { real[i] = samples[ startAt + reverse[i] ]; imag[i] = 0.0f; } } // bit reverse real[] and imag[] private void bitReverseComplex() { float[] revReal = new float[real.length]; float[] revImag = new float[imag.length]; for (int i = 0; i < real.length; i++) { revReal[i] = real[reverse[i]]; revImag[i] = imag[reverse[i]]; } real = revReal; imag = revImag; } // lookup tables private float[] sinlookup; private float[] coslookup; private float sin(int i) { return sinlookup[i]; } private float cos(int i) { return coslookup[i]; } private void buildTrigTables() { int N = timeSize; sinlookup = new float[N]; coslookup = new float[N]; for (int i = 0; i < N; i++) { sinlookup[i] = (float) Math.sin(-(float) Math.PI / i); coslookup[i] = (float) Math.cos(-(float) Math.PI / i); } } }