FFT.java example

Explorer
jpcsp-master
/*
This file is part of jpcsp.

Jpcsp is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Jpcsp is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Jpcsp.  If not, see <http://www.gnu.org/licenses/>.
 */
package jpcsp.media.codec.util;

import static java.lang.Math.abs;
import static java.lang.Math.cos;
import static java.lang.Math.sin;
import static java.lang.Math.sqrt;
import jpcsp.media.codec.CodecFactory;
import jpcsp.util.Utilities;

import org.apache.log4j.Logger;

public class FFT {
	private static Logger log = CodecFactory.log;
	int nbits;
	boolean inverse;
	int revtab[] = new int[0];
	float tmpBuf[] = new float[0];
	int mdctSize; // size of MDCT (i.e. number of input data * 2)
	int mdctBits; // n = 2^nbits
	// pre/post rotation tables
	float tcos[] = new float[0];
	float tsin[] = new float[0];
	public static final double M_SQRT1_2 = 0.70710678118654752440; // 1/sqrt(2)
	private static final float sqrthalf = (float) M_SQRT1_2;
	private static final float[] ff_cos_16  = new float[16 / 2];
	private static final float[] ff_cos_32  = new float[32 / 2];
	private static final float[] ff_cos_64  = new float[64 / 2];
	private static final float[] ff_cos_128 = new float[128 / 2];
	private static final float[] ff_cos_256 = new float[256 / 2];
	private static final float[] ff_cos_512 = new float[512 / 2];

	public void copy(FFT that) {
		nbits = that.nbits;
		inverse = that.inverse;
		Utilities.copy(revtab, that.revtab);
		Utilities.copy(tmpBuf, that.tmpBuf);
		mdctSize = that.mdctSize;
		mdctBits = that.mdctBits;
		Utilities.copy(tcos, that.tcos);
		Utilities.copy(tsin, that.tsin);
	}

	private static void initFfCosTabs(float[] tab, int m) {
		double freq = 2 * Math.PI / m;
		for (int i = 0; i <= m / 4; i++) {
			tab[i] = (float) cos(i * freq);
		}
		for (int i = 1; i < m / 4; i++) {
			tab[m / 2 - i] = tab[i];
		}
	}

	private static int splitRadixPermutation(int i, int n, boolean inverse) {
		if (n <= 2) {
			return i & 1;
		}
		int m = n >> 1;
		if ((i & m) == 0) {
			return splitRadixPermutation(i, m, inverse) * 2;
		}
		m >>= 1;
		return splitRadixPermutation(i, m, inverse) * 4 + (inverse == ((i & m) == 0) ? 1 : -1);
	}

	private int fftInit(int nbits, boolean inverse) {
		if (nbits < 2 || nbits > 16) {
			revtab = null;
			tmpBuf = null;
			return -1;
		}

		this.nbits = nbits;
		this.inverse = inverse;

		int n = 1 << nbits;
		revtab = new int[n];
		tmpBuf = new float[n * 2];

		initFfCosTabs(ff_cos_16 ,  16);
		initFfCosTabs(ff_cos_32 ,  32);
		initFfCosTabs(ff_cos_64 ,  64);
		initFfCosTabs(ff_cos_128, 128);
		initFfCosTabs(ff_cos_256, 256);
		initFfCosTabs(ff_cos_512, 512);

		for (int i = 0; i < n; i++) {
			revtab[-splitRadixPermutation(i, n, inverse) & (n - 1)] = i;
		}

		return 0;
	}

	public int mdctInit(int nbits, boolean inverse, double scale) {
		int n = 1 << nbits;
		mdctBits = nbits;
		mdctSize = n;
		int n4 = n >> 2;

		int ret = fftInit(mdctBits - 2, inverse);
		if (ret < 0) {
			return ret;
		}

		tcos = new float[n4];
		tsin = new float[n4];

		double theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
		scale = sqrt(abs(scale));
		for (int i = 0; i < n4; i++) {
			double alpha = 2 * Math.PI * (i + theta) / n;
			tcos[i] = (float) (-cos(alpha) * scale);
			tsin[i] = (float) (-sin(alpha) * scale);
		}

		return 0;
	}

	/**
	 * Compute inverse MDCT of size N = 2^nbits
	 * @param output N samples
	 * @param input N/2 samples
	 */
	public void imdctCalc(float[] output, int outputOffset, float[] input, int inputOffset) {
		int n = 1 << mdctBits;
		int n2 = n >> 1;
		int n4 = n >> 2;

		imdctHalf(output, outputOffset + n4, input, inputOffset);

		for (int k = 0; k < n4; k++) {
			output[outputOffset + k] = -output[outputOffset + n2 - k - 1];
			output[outputOffset + n - k - 1] = output[outputOffset + n2 + k];
		}
	}

	/**
	 * Compute the middle half of the inverse MDCT of size N = 2^nbits,
	 * thus excluding the parts that can be derived by symmetry
	 * @param output N/2 samples
	 * @param input N/2 samples
	 */
	public void imdctHalf(float[] output, int outputOffset, final float[] input, int inputOffset) {
		int n = 1 << mdctBits;
		int n2 = n >> 1;
		int n4 = n >> 2;
		int n8 = n >> 3;

		// pre rotation
		int in1 = 0;
		int in2 = n2 - 1;
		for (int k = 0; k < n4; k++) {
			int j = revtab[k];
			CMUL(output, outputOffset + j * 2, outputOffset + j * 2 + 1, input[inputOffset + in2], input[inputOffset + in1], tcos[k], tsin[k]);
			in1 += 2;
			in2 -= 2;
		}
		fftCalcFloat(output, outputOffset);

		// post rotation + reordering
		final float[] r = new float[4];
		for (int k = 0; k < n8; k++) {
			CMUL(r, 0, 3, output[outputOffset + (n8 - k - 1) * 2 + 1], output[outputOffset + (n8 - k - 1) * 2 + 0], tsin[n8 - k - 1], tcos[n8 - k - 1]);
			CMUL(r, 2, 1, output[outputOffset + (n8 + k    ) * 2 + 1], output[outputOffset + (n8 + k    ) * 2 + 0], tsin[n8 + k    ], tcos[n8 + k    ]);
			output[outputOffset + (n8 - k - 1) * 2 + 0] = r[0];
			output[outputOffset + (n8 - k - 1) * 2 + 1] = r[1];
			output[outputOffset + (n8 + k    ) * 2 + 0] = r[2];
			output[outputOffset + (n8 + k    ) * 2 + 1] = r[3];
		}
	}

	private static void CMUL(float[] d, int dre, int dim, float are, float aim, float bre, float bim) {
		d[dre] = are * bre - aim * bim;
		d[dim] = are * bim + aim * bre;
	}

	private void fft4(float[] z, int o) {
	    // BF(t3, t1, z[0].re, z[1].re);
	    // BF(t8, t6, z[3].re, z[2].re);
	    // BF(z[2].re, z[0].re, t1, t6);
	    // BF(t4, t2, z[0].im, z[1].im);
	    // BF(t7, t5, z[2].im, z[3].im);
	    // BF(z[3].im, z[1].im, t4, t8);
	    // BF(z[3].re, z[1].re, t3, t7);
	    // BF(z[2].im, z[0].im, t2, t5);
		double t3 = z[o + 0] - z[o + 2];
		double t1 = z[o + 0] + z[o + 2];
		double t8 = z[o + 6] - z[o + 4];
		double t6 = z[o + 6] + z[o + 4];
		z[o + 4] = (float) (t1 - t6);
		z[o + 0] = (float) (t1 + t6);
		double t4 = z[o + 1] - z[o + 3];
		double t2 = z[o + 1] + z[o + 3];
		double t7 = z[o + 5] - z[o + 7];
		double t5 = z[o + 5] + z[o + 7];
		z[o + 7] = (float) (t4 - t8);
		z[o + 3] = (float) (t4 + t8);
		z[o + 6] = (float) (t3 - t7);
		z[o + 2] = (float) (t3 + t7);
		z[o + 5] = (float) (t2 - t5);
		z[o + 1] = (float) (t2 + t5);
	}

	private void fft8(float[] z, int o) {
		fft4(z, o);

	    // BF(t1, z[5].re, z[4].re, -z[5].re);
	    // BF(t2, z[5].im, z[4].im, -z[5].im);
		// BF(t5, z[7].re, z[6].re, -z[7].re);
		// BF(t6, z[7].im, z[6].im, -z[7].im);
		double t1 = z[o +  8] + z[o + 10];
		z[o + 10] = z[o +  8] - z[o + 10];
		double t2 = z[o +  9] + z[o + 11];
		z[o + 11] = z[o +  9] - z[o + 11];
		double t5 = z[o + 12] + z[o + 14];
		z[o + 14] = z[o + 12] - z[o + 14];
		double t6 = z[o + 13] + z[o + 15];
		z[o + 15] = z[o + 13] - z[o + 15];

		// BUTTERFLIES(z[0],z[2],z[4],z[6]);
		double t3 = t5 - t1;
		t5 = t5 + t1;
		z[o +  8] = (float) (z[o + 0] - t5);
		z[o +  0] = (float) (z[o + 0] + t5);
		z[o + 13] = (float) (z[o + 5] - t3);
		z[o +  5] = (float) (z[o + 5] + t3);
		double t4 = t2 - t6;
		t6 = t2 + t6;
		z[o + 12] = (float) (z[o + 4] - t4);
		z[o +  4] = (float) (z[o + 4] + t4);
		z[o +  9] = (float) (z[o + 1] - t6);
		z[o +  1] = (float) (z[o + 1] + t6);

		// TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
		//   CMUL(t1, t2, a2.re, a2.im, wre, -wim);
		t1 =   z[o + 10] * sqrthalf + z[o + 11] * sqrthalf;
		t2 = - z[o + 10] * sqrthalf + z[o + 11] * sqrthalf;
		//   CMUL(t5, t6, a3.re, a3.im, wre,  wim);
		t5 = z[o + 14] * sqrthalf - z[o + 15] * sqrthalf;
		t6 = z[o + 14] * sqrthalf + z[o + 15] * sqrthalf;
		//   BUTTERFLIES(a0,a1,a2,a3)
		t3 = t5 - t1;
		t5 = t5 + t1;
		z[o + 10] = (float) (z[o + 2] - t5);
		z[o +  2] = (float) (z[o + 2] + t5);
		z[o + 15] = (float) (z[o + 7] - t3);
		z[o +  7] = (float) (z[o + 7] + t3);
		t4 = t2 - t6;
		t6 = t2 + t6;
		z[o + 14] = (float) (z[o + 6] - t4);
		z[o +  6] = (float) (z[o + 6] + t4);
		z[o + 11] = (float) (z[o + 3] - t6);
		z[o +  3] = (float) (z[o + 3] + t6);
	}

	private void pass(float[] z, int o, float[] cos, int n) {
		int o0 = o;
		int o1 = o + 2 * n * 2;
		int o2 = o + 4 * n * 2;
		int o3 = o + 6 * n * 2;
		int wre = 0;
		int wim = 2 * n;
		n--;

		// TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);
		double t1 = z[o2 + 0];
		double t2 = z[o2 + 1];
		double t5 = z[o3 + 0];
		double t6 = z[o3 + 1];
		//   BUTTERFLIES(a0,a1,a2,a3)
		double t3 = t5 - t1;
		t5 = t5 + t1;
		z[o2 + 0] = (float) (z[o0 + 0] - t5);
		z[o0 + 0] = (float) (z[o0 + 0] + t5);
		z[o3 + 1] = (float) (z[o1 + 1] - t3);
		z[o1 + 1] = (float) (z[o1 + 1] + t3);
		double t4 = t2 - t6;
		t6 = t2 + t6;
		z[o3 + 0] = (float) (z[o1 + 0] - t4);
		z[o1 + 0] = (float) (z[o1 + 0] + t4);
		z[o2 + 1] = (float) (z[o0 + 1] - t6);
		z[o0 + 1] = (float) (z[o0 + 1] + t6);
		// TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);
		//   CMUL(t1, t2, a2.re, a2.im, wre, -wim);
		t1 =   z[o2 + 2] * cos[wre + 1] + z[o2 + 3] * cos[wim - 1];
		t2 = - z[o2 + 2] * cos[wim - 1] + z[o2 + 3] * cos[wre + 1];
		//   CMUL(t5, t6, a3.re, a3.im, wre,  wim);
		t5 = z[o3 + 2] * cos[wre + 1] - z[o3 + 3] * cos[wim - 1];
		t6 = z[o3 + 2] * cos[wim - 1] + z[o3 + 3] * cos[wre + 1];
		//   BUTTERFLIES(a0,a1,a2,a3)
		t3 = t5 - t1;
		t5 = t5 + t1;
		z[o2 + 2] = (float) (z[o0 + 2] - t5);
		z[o0 + 2] = (float) (z[o0 + 2] + t5);
		z[o3 + 3] = (float) (z[o1 + 3] - t3);
		z[o1 + 3] = (float) (z[o1 + 3] + t3);
		t4 = t2 - t6;
		t6 = t2 + t6;
		z[o3 + 2] = (float) (z[o1 + 2] - t4);
		z[o1 + 2] = (float) (z[o1 + 2] + t4);
		z[o2 + 3] = (float) (z[o0 + 3] - t6);
		z[o0 + 3] = (float) (z[o0 + 3] + t6);

		do {
			o0 += 4;
			o1 += 4;
			o2 += 4;
			o3 += 4;
			wre += 2;
			wim -= 2;
			// TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);
			//   CMUL(t1, t2, a2.re, a2.im, wre, -wim);
			t1 =   z[o2 + 0] * cos[wre] + z[o2 + 1] * cos[wim];
			t2 = - z[o2 + 0] * cos[wim] + z[o2 + 1] * cos[wre];
			//   CMUL(t5, t6, a3.re, a3.im, wre,  wim);
			t5 = z[o3 + 0] * cos[wre] - z[o3 + 1] * cos[wim];
			t6 = z[o3 + 0] * cos[wim] + z[o3 + 1] * cos[wre];
			//   BUTTERFLIES(a0,a1,a2,a3)
			t3 = t5 - t1;
			t5 = t5 + t1;
			z[o2 + 0] = (float) (z[o0 + 0] - t5);
			z[o0 + 0] = (float) (z[o0 + 0] + t5);
			z[o3 + 1] = (float) (z[o1 + 1] - t3);
			z[o1 + 1] = (float) (z[o1 + 1] + t3);
			t4 = t2 - t6;
			t6 = t2 + t6;
			z[o3 + 0] = (float) (z[o1 + 0] - t4);
			z[o1 + 0] = (float) (z[o1 + 0] + t4);
			z[o2 + 1] = (float) (z[o0 + 1] - t6);
			z[o0 + 1] = (float) (z[o0 + 1] + t6);
			// TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);
			//   CMUL(t1, t2, a2.re, a2.im, wre, -wim);
			t1 =   z[o2 + 2] * cos[wre + 1] + z[o2 + 3] * cos[wim - 1];
			t2 = - z[o2 + 2] * cos[wim - 1] + z[o2 + 3] * cos[wre + 1];
			//   CMUL(t5, t6, a3.re, a3.im, wre,  wim);
			t5 = z[o3 + 2] * cos[wre + 1] - z[o3 + 3] * cos[wim - 1];
			t6 = z[o3 + 2] * cos[wim - 1] + z[o3 + 3] * cos[wre + 1];
			//   BUTTERFLIES(a0,a1,a2,a3)
			t3 = t5 - t1;
			t5 = t5 + t1;
			z[o2 + 2] = (float) (z[o0 + 2] - t5);
			z[o0 + 2] = (float) (z[o0 + 2] + t5);
			z[o3 + 3] = (float) (z[o1 + 3] - t3);
			z[o1 + 3] = (float) (z[o1 + 3] + t3);
			t4 = t2 - t6;
			t6 = t2 + t6;
			z[o3 + 2] = (float) (z[o1 + 2] - t4);
			z[o1 + 2] = (float) (z[o1 + 2] + t4);
			z[o2 + 3] = (float) (z[o0 + 3] - t6);
			z[o0 + 3] = (float) (z[o0 + 3] + t6);
		} while (--n != 0);
	}

	private void fft16(float[] z, int o) {
		fft8(z, o);
		fft4(z, o + 16);
		fft4(z, o + 24);
		pass(z, o, ff_cos_16, 2);
	}

	private void fft32(float[] z, int o) {
		fft16(z, o);
		fft8(z, o + 32);
		fft8(z, o + 48);
		pass(z, o, ff_cos_32, 4);
	}

	private void fft64(float[] z, int o) {
		fft32(z, o);
		fft16(z, o + 64);
		fft16(z, o + 96);
		pass(z, o, ff_cos_64, 8);
	}

	private void fft128(float[] z, int o) {
		fft64(z, o);
		fft32(z, o + 128);
		fft32(z, o + 192);
		pass(z, o, ff_cos_128, 16);
	}

	private void fft256(float[] z, int o) {
		fft128(z, o);
		fft64(z, o + 256);
		fft64(z, o + 384);
		pass(z, o, ff_cos_256, 32);
	}

	private void fft512(float[] z, int o) {
		fft256(z, o);
		fft128(z, o + 512);
		fft128(z, o + 768);
		pass(z, o, ff_cos_512, 64);
	}

	public void fftCalcFloat(float[] z, int o) {
		switch (nbits) {
			case 2: fft4  (z, 0); break;
			case 3: fft8  (z, o); break;
			case 4: fft16 (z, 0); break;
			case 5: fft32 (z, 0); break;
			case 6: fft64 (z, o); break;
			case 7: fft128(z, o); break;
			case 8: fft256(z, 0); break;
			case 9: fft512(z, 0); break;
			default:
				log.error(String.format("FFT nbits=%d not implemented", nbits));
				break;
		}
	}

	/**
	 * Compute MDCT of size N = 2^nbits
	 * @param input N samples
	 * @param out N/2 samples
	 */
	public void mdctCalc(float[] output, int outputOffset, float[] input, int inputOffset) {
		int n = 1 << mdctBits;
		int n2 = n >> 1;
		int n4 = n >> 2;
		int n8 = n >> 3;
		int n3 = 3 * n4;

		// pre rotation
		for (int i = 0; i < n8; i++) {
			float re = -input[inputOffset + 2 * i + n3] - input[inputOffset + n3 - 1 - 2 * i];
			float im = -input[inputOffset + n4 + 2 * i] + input[inputOffset + n4 - 1 - 2 * i];
			int j = revtab[i];
			CMUL(output, outputOffset + 2 * j + 0, outputOffset + 2 * j + 1, re, im, -tcos[i], tsin[i]);

			re =  input[inputOffset + 2 * i     ] - input[inputOffset + n2 - 1 - 2 * i];
			im = -input[inputOffset + n2 + 2 * i] - input[inputOffset + n  - 1 - 2 * i];
			j = revtab[n8 + i];
			CMUL(output, outputOffset + 2 * j + 0, outputOffset + 2 * j + 1, re, im, -tcos[n8 + i], tsin[n8 + i]);
		}

		fftCalcFloat(output, outputOffset);

		// post rotation
		final float r[] = new float[4];
		for (int i = 0; i < n8; i++) {
			CMUL(r, 3, 0, output[outputOffset + (n8 - i - 1) * 2 + 0], output[outputOffset + (n8 - i - 1) * 2 + 1], -tsin[n8 - i - 1], -tcos[n8 - i - 1]);
			CMUL(r, 1, 2, output[outputOffset + (n8 + i    ) * 2 + 0], output[outputOffset + (n8 + i    ) * 2 + 1], -tsin[n8 + i    ], -tcos[n8 + i    ]);
			output[outputOffset + (n8 - i - 1) * 2 + 0] = r[0];
			output[outputOffset + (n8 - i - 1) * 2 + 1] = r[1];
			output[outputOffset + (n8 + i    ) * 2 + 0] = r[2];
			output[outputOffset + (n8 + i    ) * 2 + 1] = r[3];
		}
	}
}