/* This file is part of jpcsp. Jpcsp is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Jpcsp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Jpcsp. If not, see <http://www.gnu.org/licenses/>. */ package jpcsp.media.codec.util; import static java.lang.Math.abs; import static java.lang.Math.cos; import static java.lang.Math.sin; import static java.lang.Math.sqrt; import jpcsp.media.codec.CodecFactory; import jpcsp.util.Utilities; import org.apache.log4j.Logger; public class FFT { private static Logger log = CodecFactory.log; int nbits; boolean inverse; int revtab[] = new int[0]; float tmpBuf[] = new float[0]; int mdctSize; // size of MDCT (i.e. number of input data * 2) int mdctBits; // n = 2^nbits // pre/post rotation tables float tcos[] = new float[0]; float tsin[] = new float[0]; public static final double M_SQRT1_2 = 0.70710678118654752440; // 1/sqrt(2) private static final float sqrthalf = (float) M_SQRT1_2; private static final float[] ff_cos_16 = new float[16 / 2]; private static final float[] ff_cos_32 = new float[32 / 2]; private static final float[] ff_cos_64 = new float[64 / 2]; private static final float[] ff_cos_128 = new float[128 / 2]; private static final float[] ff_cos_256 = new float[256 / 2]; private static final float[] ff_cos_512 = new float[512 / 2]; public void copy(FFT that) { nbits = that.nbits; inverse = that.inverse; Utilities.copy(revtab, that.revtab); Utilities.copy(tmpBuf, that.tmpBuf); mdctSize = that.mdctSize; mdctBits = that.mdctBits; Utilities.copy(tcos, that.tcos); Utilities.copy(tsin, that.tsin); } private static void initFfCosTabs(float[] tab, int m) { double freq = 2 * Math.PI / m; for (int i = 0; i <= m / 4; i++) { tab[i] = (float) cos(i * freq); } for (int i = 1; i < m / 4; i++) { tab[m / 2 - i] = tab[i]; } } private static int splitRadixPermutation(int i, int n, boolean inverse) { if (n <= 2) { return i & 1; } int m = n >> 1; if ((i & m) == 0) { return splitRadixPermutation(i, m, inverse) * 2; } m >>= 1; return splitRadixPermutation(i, m, inverse) * 4 + (inverse == ((i & m) == 0) ? 1 : -1); } private int fftInit(int nbits, boolean inverse) { if (nbits < 2 || nbits > 16) { revtab = null; tmpBuf = null; return -1; } this.nbits = nbits; this.inverse = inverse; int n = 1 << nbits; revtab = new int[n]; tmpBuf = new float[n * 2]; initFfCosTabs(ff_cos_16 , 16); initFfCosTabs(ff_cos_32 , 32); initFfCosTabs(ff_cos_64 , 64); initFfCosTabs(ff_cos_128, 128); initFfCosTabs(ff_cos_256, 256); initFfCosTabs(ff_cos_512, 512); for (int i = 0; i < n; i++) { revtab[-splitRadixPermutation(i, n, inverse) & (n - 1)] = i; } return 0; } public int mdctInit(int nbits, boolean inverse, double scale) { int n = 1 << nbits; mdctBits = nbits; mdctSize = n; int n4 = n >> 2; int ret = fftInit(mdctBits - 2, inverse); if (ret < 0) { return ret; } tcos = new float[n4]; tsin = new float[n4]; double theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0); scale = sqrt(abs(scale)); for (int i = 0; i < n4; i++) { double alpha = 2 * Math.PI * (i + theta) / n; tcos[i] = (float) (-cos(alpha) * scale); tsin[i] = (float) (-sin(alpha) * scale); } return 0; } /** * Compute inverse MDCT of size N = 2^nbits * @param output N samples * @param input N/2 samples */ public void imdctCalc(float[] output, int outputOffset, float[] input, int inputOffset) { int n = 1 << mdctBits; int n2 = n >> 1; int n4 = n >> 2; imdctHalf(output, outputOffset + n4, input, inputOffset); for (int k = 0; k < n4; k++) { output[outputOffset + k] = -output[outputOffset + n2 - k - 1]; output[outputOffset + n - k - 1] = output[outputOffset + n2 + k]; } } /** * Compute the middle half of the inverse MDCT of size N = 2^nbits, * thus excluding the parts that can be derived by symmetry * @param output N/2 samples * @param input N/2 samples */ public void imdctHalf(float[] output, int outputOffset, final float[] input, int inputOffset) { int n = 1 << mdctBits; int n2 = n >> 1; int n4 = n >> 2; int n8 = n >> 3; // pre rotation int in1 = 0; int in2 = n2 - 1; for (int k = 0; k < n4; k++) { int j = revtab[k]; CMUL(output, outputOffset + j * 2, outputOffset + j * 2 + 1, input[inputOffset + in2], input[inputOffset + in1], tcos[k], tsin[k]); in1 += 2; in2 -= 2; } fftCalcFloat(output, outputOffset); // post rotation + reordering final float[] r = new float[4]; for (int k = 0; k < n8; k++) { CMUL(r, 0, 3, output[outputOffset + (n8 - k - 1) * 2 + 1], output[outputOffset + (n8 - k - 1) * 2 + 0], tsin[n8 - k - 1], tcos[n8 - k - 1]); CMUL(r, 2, 1, output[outputOffset + (n8 + k ) * 2 + 1], output[outputOffset + (n8 + k ) * 2 + 0], tsin[n8 + k ], tcos[n8 + k ]); output[outputOffset + (n8 - k - 1) * 2 + 0] = r[0]; output[outputOffset + (n8 - k - 1) * 2 + 1] = r[1]; output[outputOffset + (n8 + k ) * 2 + 0] = r[2]; output[outputOffset + (n8 + k ) * 2 + 1] = r[3]; } } private static void CMUL(float[] d, int dre, int dim, float are, float aim, float bre, float bim) { d[dre] = are * bre - aim * bim; d[dim] = are * bim + aim * bre; } private void fft4(float[] z, int o) { // BF(t3, t1, z[0].re, z[1].re); // BF(t8, t6, z[3].re, z[2].re); // BF(z[2].re, z[0].re, t1, t6); // BF(t4, t2, z[0].im, z[1].im); // BF(t7, t5, z[2].im, z[3].im); // BF(z[3].im, z[1].im, t4, t8); // BF(z[3].re, z[1].re, t3, t7); // BF(z[2].im, z[0].im, t2, t5); double t3 = z[o + 0] - z[o + 2]; double t1 = z[o + 0] + z[o + 2]; double t8 = z[o + 6] - z[o + 4]; double t6 = z[o + 6] + z[o + 4]; z[o + 4] = (float) (t1 - t6); z[o + 0] = (float) (t1 + t6); double t4 = z[o + 1] - z[o + 3]; double t2 = z[o + 1] + z[o + 3]; double t7 = z[o + 5] - z[o + 7]; double t5 = z[o + 5] + z[o + 7]; z[o + 7] = (float) (t4 - t8); z[o + 3] = (float) (t4 + t8); z[o + 6] = (float) (t3 - t7); z[o + 2] = (float) (t3 + t7); z[o + 5] = (float) (t2 - t5); z[o + 1] = (float) (t2 + t5); } private void fft8(float[] z, int o) { fft4(z, o); // BF(t1, z[5].re, z[4].re, -z[5].re); // BF(t2, z[5].im, z[4].im, -z[5].im); // BF(t5, z[7].re, z[6].re, -z[7].re); // BF(t6, z[7].im, z[6].im, -z[7].im); double t1 = z[o + 8] + z[o + 10]; z[o + 10] = z[o + 8] - z[o + 10]; double t2 = z[o + 9] + z[o + 11]; z[o + 11] = z[o + 9] - z[o + 11]; double t5 = z[o + 12] + z[o + 14]; z[o + 14] = z[o + 12] - z[o + 14]; double t6 = z[o + 13] + z[o + 15]; z[o + 15] = z[o + 13] - z[o + 15]; // BUTTERFLIES(z[0],z[2],z[4],z[6]); double t3 = t5 - t1; t5 = t5 + t1; z[o + 8] = (float) (z[o + 0] - t5); z[o + 0] = (float) (z[o + 0] + t5); z[o + 13] = (float) (z[o + 5] - t3); z[o + 5] = (float) (z[o + 5] + t3); double t4 = t2 - t6; t6 = t2 + t6; z[o + 12] = (float) (z[o + 4] - t4); z[o + 4] = (float) (z[o + 4] + t4); z[o + 9] = (float) (z[o + 1] - t6); z[o + 1] = (float) (z[o + 1] + t6); // TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); // CMUL(t1, t2, a2.re, a2.im, wre, -wim); t1 = z[o + 10] * sqrthalf + z[o + 11] * sqrthalf; t2 = - z[o + 10] * sqrthalf + z[o + 11] * sqrthalf; // CMUL(t5, t6, a3.re, a3.im, wre, wim); t5 = z[o + 14] * sqrthalf - z[o + 15] * sqrthalf; t6 = z[o + 14] * sqrthalf + z[o + 15] * sqrthalf; // BUTTERFLIES(a0,a1,a2,a3) t3 = t5 - t1; t5 = t5 + t1; z[o + 10] = (float) (z[o + 2] - t5); z[o + 2] = (float) (z[o + 2] + t5); z[o + 15] = (float) (z[o + 7] - t3); z[o + 7] = (float) (z[o + 7] + t3); t4 = t2 - t6; t6 = t2 + t6; z[o + 14] = (float) (z[o + 6] - t4); z[o + 6] = (float) (z[o + 6] + t4); z[o + 11] = (float) (z[o + 3] - t6); z[o + 3] = (float) (z[o + 3] + t6); } private void pass(float[] z, int o, float[] cos, int n) { int o0 = o; int o1 = o + 2 * n * 2; int o2 = o + 4 * n * 2; int o3 = o + 6 * n * 2; int wre = 0; int wim = 2 * n; n--; // TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]); double t1 = z[o2 + 0]; double t2 = z[o2 + 1]; double t5 = z[o3 + 0]; double t6 = z[o3 + 1]; // BUTTERFLIES(a0,a1,a2,a3) double t3 = t5 - t1; t5 = t5 + t1; z[o2 + 0] = (float) (z[o0 + 0] - t5); z[o0 + 0] = (float) (z[o0 + 0] + t5); z[o3 + 1] = (float) (z[o1 + 1] - t3); z[o1 + 1] = (float) (z[o1 + 1] + t3); double t4 = t2 - t6; t6 = t2 + t6; z[o3 + 0] = (float) (z[o1 + 0] - t4); z[o1 + 0] = (float) (z[o1 + 0] + t4); z[o2 + 1] = (float) (z[o0 + 1] - t6); z[o0 + 1] = (float) (z[o0 + 1] + t6); // TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]); // CMUL(t1, t2, a2.re, a2.im, wre, -wim); t1 = z[o2 + 2] * cos[wre + 1] + z[o2 + 3] * cos[wim - 1]; t2 = - z[o2 + 2] * cos[wim - 1] + z[o2 + 3] * cos[wre + 1]; // CMUL(t5, t6, a3.re, a3.im, wre, wim); t5 = z[o3 + 2] * cos[wre + 1] - z[o3 + 3] * cos[wim - 1]; t6 = z[o3 + 2] * cos[wim - 1] + z[o3 + 3] * cos[wre + 1]; // BUTTERFLIES(a0,a1,a2,a3) t3 = t5 - t1; t5 = t5 + t1; z[o2 + 2] = (float) (z[o0 + 2] - t5); z[o0 + 2] = (float) (z[o0 + 2] + t5); z[o3 + 3] = (float) (z[o1 + 3] - t3); z[o1 + 3] = (float) (z[o1 + 3] + t3); t4 = t2 - t6; t6 = t2 + t6; z[o3 + 2] = (float) (z[o1 + 2] - t4); z[o1 + 2] = (float) (z[o1 + 2] + t4); z[o2 + 3] = (float) (z[o0 + 3] - t6); z[o0 + 3] = (float) (z[o0 + 3] + t6); do { o0 += 4; o1 += 4; o2 += 4; o3 += 4; wre += 2; wim -= 2; // TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]); // CMUL(t1, t2, a2.re, a2.im, wre, -wim); t1 = z[o2 + 0] * cos[wre] + z[o2 + 1] * cos[wim]; t2 = - z[o2 + 0] * cos[wim] + z[o2 + 1] * cos[wre]; // CMUL(t5, t6, a3.re, a3.im, wre, wim); t5 = z[o3 + 0] * cos[wre] - z[o3 + 1] * cos[wim]; t6 = z[o3 + 0] * cos[wim] + z[o3 + 1] * cos[wre]; // BUTTERFLIES(a0,a1,a2,a3) t3 = t5 - t1; t5 = t5 + t1; z[o2 + 0] = (float) (z[o0 + 0] - t5); z[o0 + 0] = (float) (z[o0 + 0] + t5); z[o3 + 1] = (float) (z[o1 + 1] - t3); z[o1 + 1] = (float) (z[o1 + 1] + t3); t4 = t2 - t6; t6 = t2 + t6; z[o3 + 0] = (float) (z[o1 + 0] - t4); z[o1 + 0] = (float) (z[o1 + 0] + t4); z[o2 + 1] = (float) (z[o0 + 1] - t6); z[o0 + 1] = (float) (z[o0 + 1] + t6); // TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]); // CMUL(t1, t2, a2.re, a2.im, wre, -wim); t1 = z[o2 + 2] * cos[wre + 1] + z[o2 + 3] * cos[wim - 1]; t2 = - z[o2 + 2] * cos[wim - 1] + z[o2 + 3] * cos[wre + 1]; // CMUL(t5, t6, a3.re, a3.im, wre, wim); t5 = z[o3 + 2] * cos[wre + 1] - z[o3 + 3] * cos[wim - 1]; t6 = z[o3 + 2] * cos[wim - 1] + z[o3 + 3] * cos[wre + 1]; // BUTTERFLIES(a0,a1,a2,a3) t3 = t5 - t1; t5 = t5 + t1; z[o2 + 2] = (float) (z[o0 + 2] - t5); z[o0 + 2] = (float) (z[o0 + 2] + t5); z[o3 + 3] = (float) (z[o1 + 3] - t3); z[o1 + 3] = (float) (z[o1 + 3] + t3); t4 = t2 - t6; t6 = t2 + t6; z[o3 + 2] = (float) (z[o1 + 2] - t4); z[o1 + 2] = (float) (z[o1 + 2] + t4); z[o2 + 3] = (float) (z[o0 + 3] - t6); z[o0 + 3] = (float) (z[o0 + 3] + t6); } while (--n != 0); } private void fft16(float[] z, int o) { fft8(z, o); fft4(z, o + 16); fft4(z, o + 24); pass(z, o, ff_cos_16, 2); } private void fft32(float[] z, int o) { fft16(z, o); fft8(z, o + 32); fft8(z, o + 48); pass(z, o, ff_cos_32, 4); } private void fft64(float[] z, int o) { fft32(z, o); fft16(z, o + 64); fft16(z, o + 96); pass(z, o, ff_cos_64, 8); } private void fft128(float[] z, int o) { fft64(z, o); fft32(z, o + 128); fft32(z, o + 192); pass(z, o, ff_cos_128, 16); } private void fft256(float[] z, int o) { fft128(z, o); fft64(z, o + 256); fft64(z, o + 384); pass(z, o, ff_cos_256, 32); } private void fft512(float[] z, int o) { fft256(z, o); fft128(z, o + 512); fft128(z, o + 768); pass(z, o, ff_cos_512, 64); } public void fftCalcFloat(float[] z, int o) { switch (nbits) { case 2: fft4 (z, 0); break; case 3: fft8 (z, o); break; case 4: fft16 (z, 0); break; case 5: fft32 (z, 0); break; case 6: fft64 (z, o); break; case 7: fft128(z, o); break; case 8: fft256(z, 0); break; case 9: fft512(z, 0); break; default: log.error(String.format("FFT nbits=%d not implemented", nbits)); break; } } /** * Compute MDCT of size N = 2^nbits * @param input N samples * @param out N/2 samples */ public void mdctCalc(float[] output, int outputOffset, float[] input, int inputOffset) { int n = 1 << mdctBits; int n2 = n >> 1; int n4 = n >> 2; int n8 = n >> 3; int n3 = 3 * n4; // pre rotation for (int i = 0; i < n8; i++) { float re = -input[inputOffset + 2 * i + n3] - input[inputOffset + n3 - 1 - 2 * i]; float im = -input[inputOffset + n4 + 2 * i] + input[inputOffset + n4 - 1 - 2 * i]; int j = revtab[i]; CMUL(output, outputOffset + 2 * j + 0, outputOffset + 2 * j + 1, re, im, -tcos[i], tsin[i]); re = input[inputOffset + 2 * i ] - input[inputOffset + n2 - 1 - 2 * i]; im = -input[inputOffset + n2 + 2 * i] - input[inputOffset + n - 1 - 2 * i]; j = revtab[n8 + i]; CMUL(output, outputOffset + 2 * j + 0, outputOffset + 2 * j + 1, re, im, -tcos[n8 + i], tsin[n8 + i]); } fftCalcFloat(output, outputOffset); // post rotation final float r[] = new float[4]; for (int i = 0; i < n8; i++) { CMUL(r, 3, 0, output[outputOffset + (n8 - i - 1) * 2 + 0], output[outputOffset + (n8 - i - 1) * 2 + 1], -tsin[n8 - i - 1], -tcos[n8 - i - 1]); CMUL(r, 1, 2, output[outputOffset + (n8 + i ) * 2 + 0], output[outputOffset + (n8 + i ) * 2 + 1], -tsin[n8 + i ], -tcos[n8 + i ]); output[outputOffset + (n8 - i - 1) * 2 + 0] = r[0]; output[outputOffset + (n8 - i - 1) * 2 + 1] = r[1]; output[outputOffset + (n8 + i ) * 2 + 0] = r[2]; output[outputOffset + (n8 + i ) * 2 + 1] = r[3]; } } }