/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is JTransforms. * * The Initial Developer of the Original Code is * Piotr Wendykier, Emory University. * Portions created by the Initial Developer are Copyright (C) 2007 * the Initial Developer. All Rights Reserved. * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ package edu.emory.mathcs.jtransforms.fft; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import edu.emory.mathcs.utils.ConcurrencyUtils; /** * Computes 2D Discrete Fourier Transform (DFT) of complex and real, single * precision data. The sizes of both dimensions must be power-of-two numbers. * This is a parallel implementation of split-radix algorithm optimized for SMP * systems. <br> * <br> * This code is derived from General Purpose FFT Package written by Takuya Ooura * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) * * @author Piotr Wendykier (piotr.wendykier@gmail.com) * */ public class FloatFFT_2D { private int n1; private int n2; private int[] ip; private float[] w; private float[] t; private FloatFFT_1D fftn2, fftn1; private int oldNthread; private int nt; /** * Creates new instance of FloatFFT_2D. * * @param n1 * number of rows - must be a power-of-two number * @param n2 * number of columns - must be a power-of-two number */ public FloatFFT_2D(int n1, int n2) { if (!ConcurrencyUtils.isPowerOf2(n1) || !ConcurrencyUtils.isPowerOf2(n2)) throw new IllegalArgumentException("n1, n2 must be power of two numbers"); if (n1 <= 1 || n2 <= 1) { throw new IllegalArgumentException("n1, n2 must be greater than 1"); } this.n1 = n1; this.n2 = n2; ip = new int[2 + (int) Math.ceil(Math.sqrt(Math.max(n1, n2)))]; this.w = new float[(int) Math.ceil(Math.max(Math.max(n1 / 2, n2 / 2), Math.max(n1 / 2, n2 / 4) + n2 / 4))]; fftn2 = new FloatFFT_1D(n2, ip, w); fftn1 = new FloatFFT_1D(n1, ip, w); oldNthread = ConcurrencyUtils.getNumberOfProcessors(); nt = 8 * oldNthread * n1; if (2 * n2 == 4 * oldNthread) { nt >>= 1; } else if (2 * n2 < 4 * oldNthread) { nt >>= 2; } t = new float[nt]; } /** * Computes 2D forward DFT of complex data leaving the result in * <code>a</code>. The data is stored in 1D array in row-major order. * Complex number is stored as two float values in sequence: the real and * imaginary part, i.e. the input array must be of size n1*2*n2. The * physical layout of the input data is as follows: * * <pre> * a[k1*2*n2+2*k2] = Re[k1][k2], * a[k1*2*n2+2*k2+1] = Im[k1][k2], 0<=k1<n1, 0<=k2<n2, * </pre> * * @param a * data to transform */ public void complexForward(float[] a) { int nthread, n, i; int oldn2 = n2; n2 = 2 * n2; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * oldn2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(0, -1, a, true); cdft2d_subth(-1, a, true); } else { for (i = 0; i < n1; i++) { fftn2.complexForward(a, i * n2); } cdft2d_sub(-1, a, true); } n2 = oldn2; } /** * Computes 2D forward DFT of complex data leaving the result in * <code>a</code>. The data is stored in 2D array. Complex data is * represented by 2 float values in sequence: the real and imaginary part, * i.e. the input array must be of size n1 by 2*n2. The physical layout of * the input data is as follows: * * <pre> * a[k1][2*k2] = Re[k1][k2], * a[k1][2*k2+1] = Im[k1][k2], 0<=k1<n1, 0<=k2<n2, * </pre> * * @param a * data to transform */ public void complexForward(float[][] a) { int nthread, n, i; int oldn2 = n2; n2 = 2 * n2; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * oldn2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(0, -1, a, true); cdft2d_subth(-1, a, true); } else { for (i = 0; i < n1; i++) { fftn2.complexForward(a[i]); } cdft2d_sub(-1, a, true); } n2 = oldn2; } /** * Computes 2D inverse DFT of complex data leaving the result in * <code>a</code>. The data is stored in 1D array in row-major order. * Complex number is stored as two float values in sequence: the real and * imaginary part, i.e. the input array must be of size n1*2*n2. The * physical layout of the input data is as follows: * * <pre> * a[k1*2*n2+2*k2] = Re[k1][k2], * a[k1*2*n2+2*k2+1] = Im[k1][k2], 0<=k1<n1, 0<=k2<n2, * </pre> * * @param a * data to transform * @param scale * if true then scaling is performed * */ public void complexInverse(float[] a, boolean scale) { int nthread, n, i; int oldn2 = n2; n2 = 2 * n2; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * oldn2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(0, 1, a, scale); cdft2d_subth(1, a, scale); } else { for (i = 0; i < n1; i++) { fftn2.complexInverse(a, i * n2, scale); } cdft2d_sub(1, a, scale); } n2 = oldn2; } /** * Computes 2D inverse DFT of complex data leaving the result in * <code>a</code>. The data is stored in 2D array. Complex data is * represented by 2 float values in sequence: the real and imaginary part, * i.e. the input array must be of size n1 by 2*n2. The physical layout of * the input data is as follows: * * <pre> * a[k1][2*k2] = Re[k1][k2], * a[k1][2*k2+1] = Im[k1][k2], 0<=k1<n1, 0<=k2<n2, * </pre> * * @param a * data to transform * @param scale * if true then scaling is performed * */ public void complexInverse(float[][] a, boolean scale) { int nthread, n, i; int oldn2 = n2; n2 = 2 * n2; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * oldn2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(0, 1, a, scale); cdft2d_subth(1, a, scale); } else { for (i = 0; i < n1; i++) { fftn2.complexInverse(a[i], scale); } cdft2d_sub(1, a, scale); } n2 = oldn2; } /** * Computes 2D forward DFT of real data leaving the result in <code>a</code> * . The physical layout of the output data is as follows: * * <pre> * a[k1*n2+2*k2] = Re[k1][k2] = Re[n1-k1][n2-k2], * a[k1*n2+2*k2+1] = Im[k1][k2] = -Im[n1-k1][n2-k2], * 0<k1<n1, 0<k2<n2/2, * a[2*k2] = Re[0][k2] = Re[0][n2-k2], * a[2*k2+1] = Im[0][k2] = -Im[0][n2-k2], * 0<k2<n2/2, * a[k1*n2] = Re[k1][0] = Re[n1-k1][0], * a[k1*n2+1] = Im[k1][0] = -Im[n1-k1][0], * a[(n1-k1)*n2+1] = Re[k1][n2/2] = Re[n1-k1][n2/2], * a[(n1-k1)*n2] = -Im[k1][n2/2] = Im[n1-k1][n2/2], * 0<k1<n1/2, * a[0] = Re[0][0], * a[1] = Re[0][n2/2], * a[(n1/2)*n2] = Re[n1/2][0], * a[(n1/2)*n2+1] = Re[n1/2][n2/2] * </pre> * * This method computes only half of the elements of the real transform. The * other half satisfies the symmetry condition. If you want the full real * forward transform, use <code>realForwardFull</code>. To get back the * original data, use <code>realInverse</code> on the output of this method. * * @param a * data to transform */ public void realForward(float[] a) { int nthread, n, nw, nc, i; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(1, 1, a, true); cdft2d_subth(-1, a, true); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realForward(a, i * n2); } cdft2d_sub(-1, a, true); rdft2d_sub(1, a); } } /** * Computes 2D forward DFT of real data leaving the result in <code>a</code> * . The physical layout of the output data is as follows: * * <pre> * a[k1][2*k2] = Re[k1][k2] = Re[n1-k1][n2-k2], * a[k1][2*k2+1] = Im[k1][k2] = -Im[n1-k1][n2-k2], * 0<k1<n1, 0<k2<n2/2, * a[0][2*k2] = Re[0][k2] = Re[0][n2-k2], * a[0][2*k2+1] = Im[0][k2] = -Im[0][n2-k2], * 0<k2<n2/2, * a[k1][0] = Re[k1][0] = Re[n1-k1][0], * a[k1][1] = Im[k1][0] = -Im[n1-k1][0], * a[n1-k1][1] = Re[k1][n2/2] = Re[n1-k1][n2/2], * a[n1-k1][0] = -Im[k1][n2/2] = Im[n1-k1][n2/2], * 0<k1<n1/2, * a[0][0] = Re[0][0], * a[0][1] = Re[0][n2/2], * a[n1/2][0] = Re[n1/2][0], * a[n1/2][1] = Re[n1/2][n2/2] * </pre> * * This method computes only half of the elements of the real transform. The * other half satisfies the symmetry condition. If you want the full real * forward transform, use <code>realForwardFull</code>. To get back the * original data, use <code>realInverse</code> on the output of this method. * * @param a * data to transform */ public void realForward(float[][] a) { int nthread, n, nw, nc, i; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(1, 1, a, true); cdft2d_subth(-1, a, true); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realForward(a[i]); } cdft2d_sub(-1, a, true); rdft2d_sub(1, a); } } /** * Computes 2D forward DFT of real data leaving the result in <code>a</code> * . This method computes the full real forward transform, i.e. you will get * the same result as from <code>complexForward</code> called with all * imaginary part equal 0. Because the result is stored in <code>a</code>, * the input array must be of size n1*2*n2, with only the first n1*n2 * elements filled with real data. To get back the original data, use * <code>complexInverse</code> on the output of this method. * * @param a * data to transform */ public void realForwardFull(float[] a) { int nthread, n, nw, nc, i, newn2; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(1, 1, a, true); cdft2d_subth(-1, a, true); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realForward(a, i * n2); } cdft2d_sub(-1, a, true); rdft2d_sub(1, a); } newn2 = 2 * n2; int idx1, idx2, idx3; int n1d2 = n1 / 2; for (int k1 = (n1 - 1); k1 >= 1; k1--) { idx1 = k1 * n2; idx2 = 2 * idx1; for (int k2 = 0; k2 < n2; k2 += 2) { a[idx2 + k2] = a[idx1 + k2]; a[idx1 + k2] = 0; a[idx2 + k2 + 1] = a[idx1 + k2 + 1]; a[idx1 + k2 + 1] = 0; } } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { fillSymmetric(a); } else { for (int k1 = 1; k1 < n1d2; k1++) { idx2 = k1 * newn2; idx3 = (n1 - k1) * newn2; a[idx2 + n2] = a[idx3 + 1]; a[idx2 + n2 + 1] = -a[idx3]; } for (int k1 = 1; k1 < n1d2; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 += 2) { idx2 = k1 * newn2; idx3 = (n1 - k1) * newn2; a[idx2 + k2] = a[idx3 + newn2 - k2]; a[idx2 + k2 + 1] = -a[idx3 + newn2 - k2 + 1]; } } for (int k1 = 0; k1 <= n1 / 2; k1++) { for (int k2 = 0; k2 < newn2; k2 += 2) { idx2 = k1 * newn2 + k2; idx3 = ((n1 - k1) % n1) * newn2 + (newn2 - k2) % newn2; a[idx3] = a[idx2]; a[idx3 + 1] = -a[idx2 + 1]; } } } a[n2] = -a[1]; a[1] = 0; a[n1d2 * newn2 + n2] = -a[n1d2 * newn2 + 1]; a[n1d2 * newn2 + 1] = 0; a[n1d2 * newn2 + n2 + 1] = 0; } /** * Computes 2D forward DFT of real data leaving the result in <code>a</code> * . This method computes the full real forward transform, i.e. you will get * the same result as from <code>complexForward</code> called with all * imaginary part equal 0. Because the result is stored in <code>a</code>, * the input array must be of size n1 by 2*n2, with only the first n1 by n2 * elements filled with real data. To get back the original data, use * <code>complexInverse</code> on the output of this method. * * @param a * data to transform */ public void realForwardFull(float[][] a) { int nthread, n, nw, nc, i, newn2; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth1(1, 1, a, true); cdft2d_subth(-1, a, true); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realForward(a[i]); } cdft2d_sub(-1, a, true); rdft2d_sub(1, a); } newn2 = 2 * n2; int n1d2 = n1 / 2; if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { fillSymmetric(a); } else { for (int k1 = 1; k1 < n1d2; k1++) { a[k1][n2] = a[n1 - k1][1]; a[k1][n2 + 1] = -a[n1 - k1][0]; } for (int k1 = 1; k1 < n1d2; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 += 2) { a[k1][k2] = a[n1 - k1][newn2 - k2]; a[k1][k2 + 1] = -a[n1 - k1][newn2 - k2 + 1]; } } for (int k1 = 0; k1 <= n1 / 2; k1++) { for (int k2 = 0; k2 < newn2; k2 += 2) { a[(n1 - k1) % n1][(newn2 - k2) % newn2] = a[k1][k2]; a[(n1 - k1) % n1][(newn2 - k2) % newn2 + 1] = -a[k1][k2 + 1]; } } } a[0][n2] = -a[0][1]; a[0][1] = 0; a[n1d2][n2] = -a[n1d2][1]; a[n1d2][1] = 0; a[n1d2][n2 + 1] = 0; } /** * Computes 2D inverse DFT of real data leaving the result in <code>a</code> * . The physical layout of the input data has to be as follows: * * <pre> * a[k1*n2+2*k2] = Re[k1][k2] = Re[n1-k1][n2-k2], * a[k1*n2+2*k2+1] = Im[k1][k2] = -Im[n1-k1][n2-k2], * 0<k1<n1, 0<k2<n2/2, * a[2*k2] = Re[0][k2] = Re[0][n2-k2], * a[2*k2+1] = Im[0][k2] = -Im[0][n2-k2], * 0<k2<n2/2, * a[k1*n2] = Re[k1][0] = Re[n1-k1][0], * a[k1*n2+1] = Im[k1][0] = -Im[n1-k1][0], * a[(n1-k1)*n2+1] = Re[k1][n2/2] = Re[n1-k1][n2/2], * a[(n1-k1)*n2] = -Im[k1][n2/2] = Im[n1-k1][n2/2], * 0<k1<n1/2, * a[0] = Re[0][0], * a[1] = Re[0][n2/2], * a[(n1/2)*n2] = Re[n1/2][0], * a[(n1/2)*n2+1] = Re[n1/2][n2/2] * </pre> * * This method computes only half of the elements of the real transform. The * other half satisfies the symmetry condition. If you want the full real * inverse transform, use <code>realInverseFull</code>. * * @param a * data to transform * * @param scale * if true then scaling is performed */ public void realInverse(float[] a, boolean scale) { int nthread, n, nw, nc, i; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { rdft2d_sub(-1, a); cdft2d_subth(1, a, scale); xdft2d0_subth1(1, -1, a, scale); } else { rdft2d_sub(-1, a); cdft2d_sub(1, a, scale); for (i = 0; i < n1; i++) { fftn2.realInverse(a, i * n2, scale); } } } /** * Computes 2D inverse DFT of real data leaving the result in <code>a</code> * . The physical layout of the input data has to be as follows: * * <pre> * a[k1][2*k2] = Re[k1][k2] = Re[n1-k1][n2-k2], * a[k1][2*k2+1] = Im[k1][k2] = -Im[n1-k1][n2-k2], * 0<k1<n1, 0<k2<n2/2, * a[0][2*k2] = Re[0][k2] = Re[0][n2-k2], * a[0][2*k2+1] = Im[0][k2] = -Im[0][n2-k2], * 0<k2<n2/2, * a[k1][0] = Re[k1][0] = Re[n1-k1][0], * a[k1][1] = Im[k1][0] = -Im[n1-k1][0], * a[n1-k1][1] = Re[k1][n2/2] = Re[n1-k1][n2/2], * a[n1-k1][0] = -Im[k1][n2/2] = Im[n1-k1][n2/2], * 0<k1<n1/2, * a[0][0] = Re[0][0], * a[0][1] = Re[0][n2/2], * a[n1/2][0] = Re[n1/2][0], * a[n1/2][1] = Re[n1/2][n2/2] * </pre> * * This method computes only half of the elements of the real transform. The * other half satisfies the symmetry condition. If you want the full real * inverse transform, use <code>realInverseFull</code>. * * @param a * data to transform * * @param scale * if true then scaling is performed */ public void realInverse(float[][] a, boolean scale) { int nthread, n, nw, nc, i; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { rdft2d_sub(-1, a); cdft2d_subth(1, a, scale); xdft2d0_subth1(1, -1, a, scale); } else { rdft2d_sub(-1, a); cdft2d_sub(1, a, scale); for (i = 0; i < n1; i++) { fftn2.realInverse(a[i], scale); } } } /** * Computes 2D inverse DFT of real data leaving the result in <code>a</code> * . This method computes the full real inverse transform, i.e. you will get * the same result as from <code>complexInverse</code> called with all * imaginary part equal 0. Because the result is stored in <code>a</code>, * the input array must be of size n1*2*n2, with only the first n1*n2 * elements filled with real data. * * @param a * data to transform * * @param scale * if true then scaling is performed */ public void realInverseFull(float[] a, boolean scale) { int nthread, n, nw, nc, i, newn2; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth2(1, -1, a, scale); cdft2d_subth(1, a, scale); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realInverse2(a, i * n2, scale); } cdft2d_sub(1, a, scale); rdft2d_sub(1, a); } newn2 = 2 * n2; int idx1, idx2, idx3; int n1d2 = n1 / 2; for (int k1 = (n1 - 1); k1 >= 1; k1--) { idx1 = k1 * n2; idx2 = 2 * idx1; for (int k2 = 0; k2 < n2; k2 += 2) { a[idx2 + k2] = a[idx1 + k2]; a[idx1 + k2] = 0; a[idx2 + k2 + 1] = a[idx1 + k2 + 1]; a[idx1 + k2 + 1] = 0; } } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { fillSymmetric(a); } else { for (int k1 = 1; k1 < n1d2; k1++) { idx2 = k1 * newn2; idx3 = (n1 - k1) * newn2; a[idx2 + n2] = a[idx3 + 1]; a[idx2 + n2 + 1] = -a[idx3]; } for (int k1 = 1; k1 < n1d2; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 += 2) { idx2 = k1 * newn2; idx3 = (n1 - k1) * newn2; a[idx2 + k2] = a[idx3 + newn2 - k2]; a[idx2 + k2 + 1] = -a[idx3 + newn2 - k2 + 1]; } } for (int k1 = 0; k1 <= n1 / 2; k1++) { for (int k2 = 0; k2 < newn2; k2 += 2) { idx2 = k1 * newn2 + k2; idx3 = ((n1 - k1) % n1) * newn2 + (newn2 - k2) % newn2; a[idx3] = a[idx2]; a[idx3 + 1] = -a[idx2 + 1]; } } } a[n2] = -a[1]; a[1] = 0; a[n1d2 * newn2 + n2] = -a[n1d2 * newn2 + 1]; a[n1d2 * newn2 + 1] = 0; a[n1d2 * newn2 + n2 + 1] = 0; } /** * Computes 2D inverse DFT of real data leaving the result in <code>a</code> * . This method computes the full real inverse transform, i.e. you will get * the same result as from <code>complexInverse</code> called with all * imaginary part equal 0. Because the result is stored in <code>a</code>, * the input array must be of size n1 by 2*n2, with only the first n1 by n2 * elements filled with real data. * * @param a * data to transform * * @param scale * if true then scaling is performed */ public void realInverseFull(float[][] a, boolean scale) { int nthread, n, nw, nc, i, newn2; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, w, nw); } nthread = ConcurrencyUtils.getNumberOfProcessors(); if (nthread != oldNthread) { nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = new float[nt]; oldNthread = nthread; } if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { xdft2d0_subth2(1, -1, a, scale); cdft2d_subth(1, a, scale); rdft2d_sub(1, a); } else { for (i = 0; i < n1; i++) { fftn2.realInverse2(a[i], 0, scale); } cdft2d_sub(1, a, scale); rdft2d_sub(1, a); } newn2 = 2 * n2; int n1d2 = n1 / 2; if ((nthread > 1) && (n1 * n2 >= ConcurrencyUtils.getThreadsBeginN_2D())) { fillSymmetric(a); } else { for (int k1 = 1; k1 < n1d2; k1++) { a[k1][n2] = a[n1 - k1][1]; a[k1][n2 + 1] = -a[n1 - k1][0]; } for (int k1 = 1; k1 < n1d2; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 += 2) { a[k1][k2] = a[n1 - k1][newn2 - k2]; a[k1][k2 + 1] = -a[n1 - k1][newn2 - k2 + 1]; } } for (int k1 = 0; k1 <= n1 / 2; k1++) { for (int k2 = 0; k2 < newn2; k2 += 2) { a[(n1 - k1) % n1][(newn2 - k2) % newn2] = a[k1][k2]; a[(n1 - k1) % n1][(newn2 - k2) % newn2 + 1] = -a[k1][k2 + 1]; } } } a[0][n2] = -a[0][1]; a[0][1] = 0; a[n1d2][n2] = -a[n1d2][1]; a[n1d2][1] = 0; a[n1d2][n2 + 1] = 0; } /* -------- initializing routines -------- */ private void makewt(int nw) { int j, nwh, nw0, nw1; float delta, wn4r, wk1r, wk1i, wk3r, wk3i; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = (float) (Math.atan(1.0) / nwh); wn4r = (float) Math.cos(delta * nwh); w[0] = 1; w[1] = wn4r; if (nwh == 4) { w[2] = (float) Math.cos(delta * 2); w[3] = (float) Math.sin(delta * 2); } else if (nwh > 4) { makeipt(nw); w[2] = (float) (0.5 / Math.cos(delta * 2)); w[3] = (float) (0.5 / Math.cos(delta * 6)); for (j = 4; j < nwh; j += 4) { w[j] = (float) Math.cos(delta * j); w[j + 1] = (float) Math.sin(delta * j); w[j + 2] = (float) Math.cos(3 * delta * j); w[j + 3] = (float) -Math.sin(3 * delta * j); } } nw0 = 0; while (nwh > 2) { nw1 = nw0 + nwh; nwh >>= 1; w[nw1] = 1; w[nw1 + 1] = wn4r; if (nwh == 4) { wk1r = w[nw0 + 4]; wk1i = w[nw0 + 5]; w[nw1 + 2] = wk1r; w[nw1 + 3] = wk1i; } else if (nwh > 4) { wk1r = w[nw0 + 4]; wk3r = w[nw0 + 6]; w[nw1 + 2] = (float) (0.5 / wk1r); w[nw1 + 3] = (float) (0.5 / wk3r); for (j = 4; j < nwh; j += 4) { int idx1 = nw0 + 2 * j; int idx2 = nw1 + j; wk1r = w[idx1]; wk1i = w[idx1 + 1]; wk3r = w[idx1 + 2]; wk3i = w[idx1 + 3]; w[idx2] = wk1r; w[idx2 + 1] = wk1i; w[idx2 + 2] = wk3r; w[idx2 + 3] = wk3i; } } nw0 = nw1; } } } private void makeipt(int nw) { int j, l, m, m2, p, q; ip[2] = 0; ip[3] = 16; m = 2; for (l = nw; l > 32; l >>= 2) { m2 = m << 1; q = m2 << 3; for (j = m; j < m2; j++) { p = ip[j] << 2; ip[m + j] = p; ip[m2 + j] = p + q; } m = m2; } } private void makect(int nc, float[] c, int startc) { int j, nch; float delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = (float) Math.atan(1.0) / nch; c[startc] = (float) Math.cos(delta * nch); c[startc + nch] = 0.5f * c[startc]; for (j = 1; j < nch; j++) { c[startc + j] = (float) (0.5 * Math.cos(delta * j)); c[startc + nc - j] = (float) (0.5 * Math.sin(delta * j)); } } } /* -------- child routines -------- */ private void rdft2d_sub(int isgn, float[] a) { int n1h, i, j; float xi; int idx1, idx2; n1h = n1 >> 1; if (isgn < 0) { for (i = 1; i < n1h; i++) { j = n1 - i; idx1 = i * n2; idx2 = j * n2; xi = a[idx1] - a[idx2]; a[idx1] += a[idx2]; a[idx2] = xi; xi = a[idx2 + 1] - a[idx1 + 1]; a[idx1 + 1] += a[idx2 + 1]; a[idx2 + 1] = xi; } } else { for (i = 1; i < n1h; i++) { j = n1 - i; idx1 = i * n2; idx2 = j * n2; a[idx2] = 0.5f * (a[idx1] - a[idx2]); a[idx1] -= a[idx2]; a[idx2 + 1] = 0.5f * (a[idx1 + 1] + a[idx2 + 1]); a[idx1 + 1] -= a[idx2 + 1]; } } } private void rdft2d_sub(int isgn, float[][] a) { int n1h, i, j; float xi; n1h = n1 >> 1; if (isgn < 0) { for (i = 1; i < n1h; i++) { j = n1 - i; xi = a[i][0] - a[j][0]; a[i][0] += a[j][0]; a[j][0] = xi; xi = a[j][1] - a[i][1]; a[i][1] += a[j][1]; a[j][1] = xi; } } else { for (i = 1; i < n1h; i++) { j = n1 - i; a[j][0] = 0.5f * (a[i][0] - a[j][0]); a[i][0] -= a[j][0]; a[j][1] = 0.5f * (a[i][1] + a[j][1]); a[i][1] -= a[j][1]; } } } private void cdft2d_sub(int isgn, float[] a, boolean scale) { int i, j; int idx1, idx2, idx3, idx4, idx5; if (isgn == -1) { if (n2 > 4) { for (j = 0; j < n2; j += 8) { for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; t[idx4] = a[idx1 + 4]; t[idx4 + 1] = a[idx1 + 5]; t[idx5] = a[idx1 + 6]; t[idx5 + 1] = a[idx1 + 7]; } fftn1.complexForward(t, 0); fftn1.complexForward(t, 2 * n1); fftn1.complexForward(t, 4 * n1); fftn1.complexForward(t, 6 * n1); for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; a[idx1 + 4] = t[idx4]; a[idx1 + 5] = t[idx4 + 1]; a[idx1 + 6] = t[idx5]; a[idx1 + 7] = t[idx5 + 1]; } } } else if (n2 == 4) { for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; } fftn1.complexForward(t, 0); fftn1.complexForward(t, 2 * n1); for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; } } else if (n2 == 2) { for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; } fftn1.complexForward(t, 0); for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; } } } else { if (n2 > 4) { for (j = 0; j < n2; j += 8) { for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; t[idx4] = a[idx1 + 4]; t[idx4 + 1] = a[idx1 + 5]; t[idx5] = a[idx1 + 6]; t[idx5 + 1] = a[idx1 + 7]; } fftn1.complexInverse(t, 0, scale); fftn1.complexInverse(t, 2 * n1, scale); fftn1.complexInverse(t, 4 * n1, scale); fftn1.complexInverse(t, 6 * n1, scale); for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; a[idx1 + 4] = t[idx4]; a[idx1 + 5] = t[idx4 + 1]; a[idx1 + 6] = t[idx5]; a[idx1 + 7] = t[idx5 + 1]; } } } else if (n2 == 4) { for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; } fftn1.complexInverse(t, 0, scale); fftn1.complexInverse(t, 2 * n1, scale); for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; } } else if (n2 == 2) { for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; } fftn1.complexInverse(t, 0, scale); for (i = 0; i < n1; i++) { idx1 = i * n2; idx2 = 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; } } } } private void cdft2d_sub(int isgn, float[][] a, boolean scale) { int i, j; int idx2, idx3, idx4, idx5; if (isgn == -1) { if (n2 > 4) { for (j = 0; j < n2; j += 8) { for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[i][j]; t[idx2 + 1] = a[i][j + 1]; t[idx3] = a[i][j + 2]; t[idx3 + 1] = a[i][j + 3]; t[idx4] = a[i][j + 4]; t[idx4 + 1] = a[i][j + 5]; t[idx5] = a[i][j + 6]; t[idx5 + 1] = a[i][j + 7]; } fftn1.complexForward(t, 0); fftn1.complexForward(t, 2 * n1); fftn1.complexForward(t, 4 * n1); fftn1.complexForward(t, 6 * n1); for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[i][j] = t[idx2]; a[i][j + 1] = t[idx2 + 1]; a[i][j + 2] = t[idx3]; a[i][j + 3] = t[idx3 + 1]; a[i][j + 4] = t[idx4]; a[i][j + 5] = t[idx4 + 1]; a[i][j + 6] = t[idx5]; a[i][j + 7] = t[idx5 + 1]; } } } else if (n2 == 4) { for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; t[idx2] = a[i][0]; t[idx2 + 1] = a[i][1]; t[idx3] = a[i][2]; t[idx3 + 1] = a[i][3]; } fftn1.complexForward(t, 0); fftn1.complexForward(t, 2 * n1); for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; a[i][0] = t[idx2]; a[i][1] = t[idx2 + 1]; a[i][2] = t[idx3]; a[i][3] = t[idx3 + 1]; } } else if (n2 == 2) { for (i = 0; i < n1; i++) { idx2 = 2 * i; t[idx2] = a[i][0]; t[idx2 + 1] = a[i][1]; } fftn1.complexForward(t, 0); for (i = 0; i < n1; i++) { idx2 = 2 * i; a[i][0] = t[idx2]; a[i][1] = t[idx2 + 1]; } } } else { if (n2 > 4) { for (j = 0; j < n2; j += 8) { for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[i][j]; t[idx2 + 1] = a[i][j + 1]; t[idx3] = a[i][j + 2]; t[idx3 + 1] = a[i][j + 3]; t[idx4] = a[i][j + 4]; t[idx4 + 1] = a[i][j + 5]; t[idx5] = a[i][j + 6]; t[idx5 + 1] = a[i][j + 7]; } fftn1.complexInverse(t, 0, scale); fftn1.complexInverse(t, 2 * n1, scale); fftn1.complexInverse(t, 4 * n1, scale); fftn1.complexInverse(t, 6 * n1, scale); for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[i][j] = t[idx2]; a[i][j + 1] = t[idx2 + 1]; a[i][j + 2] = t[idx3]; a[i][j + 3] = t[idx3 + 1]; a[i][j + 4] = t[idx4]; a[i][j + 5] = t[idx4 + 1]; a[i][j + 6] = t[idx5]; a[i][j + 7] = t[idx5 + 1]; } } } else if (n2 == 4) { for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; t[idx2] = a[i][0]; t[idx2 + 1] = a[i][1]; t[idx3] = a[i][2]; t[idx3 + 1] = a[i][3]; } fftn1.complexInverse(t, 0, scale); fftn1.complexInverse(t, 2 * n1, scale); for (i = 0; i < n1; i++) { idx2 = 2 * i; idx3 = 2 * n1 + 2 * i; a[i][0] = t[idx2]; a[i][1] = t[idx2 + 1]; a[i][2] = t[idx3]; a[i][3] = t[idx3 + 1]; } } else if (n2 == 2) { for (i = 0; i < n1; i++) { idx2 = 2 * i; t[idx2] = a[i][0]; t[idx2 + 1] = a[i][1]; } fftn1.complexInverse(t, 0, scale); for (i = 0; i < n1; i++) { idx2 = 2 * i; a[i][0] = t[idx2]; a[i][1] = t[idx2 + 1]; } } } } private void xdft2d0_subth1(final int icr, final int isgn, final float[] a, final boolean scale) { final int nthread; int i; int np = ConcurrencyUtils.getNumberOfProcessors(); if (np > n1) { nthread = n1; } else { nthread = np; } Future[] futures = new Future[nthread]; for (i = 0; i < nthread; i++) { final int n0 = i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i; if (icr == 0) { if (isgn == -1) { for (i = n0; i < n1; i += nthread) { fftn2.complexForward(a, i * n2); } } else { for (i = n0; i < n1; i += nthread) { fftn2.complexInverse(a, i * n2, scale); } } } else { if (isgn == 1) { for (i = n0; i < n1; i += nthread) { fftn2.realForward(a, i * n2); } } else { for (i = n0; i < n1; i += nthread) { fftn2.realInverse(a, i * n2, scale); } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void xdft2d0_subth2(final int icr, final int isgn, final float[] a, final boolean scale) { final int nthread; int i; int np = ConcurrencyUtils.getNumberOfProcessors(); if (np > n1) { nthread = n1; } else { nthread = np; } Future[] futures = new Future[nthread]; for (i = 0; i < nthread; i++) { final int n0 = i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i; if (icr == 0) { if (isgn == -1) { for (i = n0; i < n1; i += nthread) { fftn2.complexForward(a, i * n2); } } else { for (i = n0; i < n1; i += nthread) { fftn2.complexInverse(a, i * n2, scale); } } } else { if (isgn == 1) { for (i = n0; i < n1; i += nthread) { fftn2.realForward(a, i * n2); } } else { for (i = n0; i < n1; i += nthread) { fftn2.realInverse2(a, i * n2, scale); } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void xdft2d0_subth1(final int icr, final int isgn, final float[][] a, final boolean scale) { final int nthread; int i; int np = ConcurrencyUtils.getNumberOfProcessors(); if (np > n1) { nthread = n1; } else { nthread = np; } Future[] futures = new Future[nthread]; for (i = 0; i < nthread; i++) { final int n0 = i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i; if (icr == 0) { if (isgn == -1) { for (i = n0; i < n1; i += nthread) { fftn2.complexForward(a[i]); } } else { for (i = n0; i < n1; i += nthread) { fftn2.complexInverse(a[i], scale); } } } else { if (isgn == 1) { for (i = n0; i < n1; i += nthread) { fftn2.realForward(a[i]); } } else { for (i = n0; i < n1; i += nthread) { fftn2.realInverse(a[i], scale); } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void xdft2d0_subth2(final int icr, final int isgn, final float[][] a, final boolean scale) { final int nthread; int i; int np = ConcurrencyUtils.getNumberOfProcessors(); if (np > n1) { nthread = n1; } else { nthread = np; } Future[] futures = new Future[nthread]; for (i = 0; i < nthread; i++) { final int n0 = i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i; if (icr == 0) { if (isgn == -1) { for (i = n0; i < n1; i += nthread) { fftn2.complexForward(a[i]); } } else { for (i = n0; i < n1; i += nthread) { fftn2.complexInverse(a[i], scale); } } } else { if (isgn == 1) { for (i = n0; i < n1; i += nthread) { fftn2.realForward(a[i]); } } else { for (i = n0; i < n1; i += nthread) { fftn2.realInverse2(a[i], 0, scale); } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void cdft2d_subth(final int isgn, final float[] a, final boolean scale) { int nthread; int nt, i; int np = ConcurrencyUtils.getNumberOfProcessors(); nthread = np; nt = 8 * n1; if (n2 == 4 * np) { nt >>= 1; } else if (n2 < 4 * np) { nthread = n2 >> 1; nt >>= 2; } Future[] futures = new Future[nthread]; final int nthread_f = nthread; for (i = 0; i < nthread; i++) { final int n0 = i; final int startt = nt * i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i, j; int idx1, idx2, idx3, idx4, idx5; if (isgn == -1) { if (n2 > 4 * nthread_f) { for (j = 8 * n0; j < n2; j += 8 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; t[idx4] = a[idx1 + 4]; t[idx4 + 1] = a[idx1 + 5]; t[idx5] = a[idx1 + 6]; t[idx5 + 1] = a[idx1 + 7]; } fftn1.complexForward(t, startt); fftn1.complexForward(t, startt + 2 * n1); fftn1.complexForward(t, startt + 4 * n1); fftn1.complexForward(t, startt + 6 * n1); for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; a[idx1 + 4] = t[idx4]; a[idx1 + 5] = t[idx4 + 1]; a[idx1 + 6] = t[idx5]; a[idx1 + 7] = t[idx5 + 1]; } } } else if (n2 == 4 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + 4 * n0; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; } fftn1.complexForward(t, startt); fftn1.complexForward(t, startt + 2 * n1); for (i = 0; i < n1; i++) { idx1 = i * n2 + 4 * n0; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; } } else if (n2 == 2 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + 2 * n0; idx2 = startt + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; } fftn1.complexForward(t, startt); for (i = 0; i < n1; i++) { idx1 = i * n2 + 2 * n0; idx2 = startt + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; } } } else { if (n2 > 4 * nthread_f) { for (j = 8 * n0; j < n2; j += 8 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; t[idx4] = a[idx1 + 4]; t[idx4 + 1] = a[idx1 + 5]; t[idx5] = a[idx1 + 6]; t[idx5 + 1] = a[idx1 + 7]; } fftn1.complexInverse(t, startt, scale); fftn1.complexInverse(t, startt + 2 * n1, scale); fftn1.complexInverse(t, startt + 4 * n1, scale); fftn1.complexInverse(t, startt + 6 * n1, scale); for (i = 0; i < n1; i++) { idx1 = i * n2 + j; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; a[idx1 + 4] = t[idx4]; a[idx1 + 5] = t[idx4 + 1]; a[idx1 + 6] = t[idx5]; a[idx1 + 7] = t[idx5 + 1]; } } } else if (n2 == 4 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + 4 * n0; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; t[idx3] = a[idx1 + 2]; t[idx3 + 1] = a[idx1 + 3]; } fftn1.complexInverse(t, startt, scale); fftn1.complexInverse(t, startt + 2 * n1, scale); for (i = 0; i < n1; i++) { idx1 = i * n2 + 4 * n0; idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; a[idx1 + 2] = t[idx3]; a[idx1 + 3] = t[idx3 + 1]; } } else if (n2 == 2 * nthread_f) { for (i = 0; i < n1; i++) { idx1 = i * n2 + 2 * n0; idx2 = startt + 2 * i; t[idx2] = a[idx1]; t[idx2 + 1] = a[idx1 + 1]; } fftn1.complexInverse(t, startt, scale); for (i = 0; i < n1; i++) { idx1 = i * n2 + 2 * n0; idx2 = startt + 2 * i; a[idx1] = t[idx2]; a[idx1 + 1] = t[idx2 + 1]; } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void cdft2d_subth(final int isgn, final float[][] a, final boolean scale) { int nthread; int nt, i; int np = ConcurrencyUtils.getNumberOfProcessors(); nthread = np; nt = 8 * n1; if (n2 == 4 * np) { nt >>= 1; } else if (n2 < 4 * np) { nthread = n2 >> 1; nt >>= 2; } Future[] futures = new Future[nthread]; final int nthread_f = nthread; for (i = 0; i < nthread; i++) { final int n0 = i; final int startt = nt * i; futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int i, j; int idx2, idx3, idx4, idx5; if (isgn == -1) { if (n2 > 4 * nthread_f) { for (j = 8 * n0; j < n2; j += 8 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[i][j]; t[idx2 + 1] = a[i][j + 1]; t[idx3] = a[i][j + 2]; t[idx3 + 1] = a[i][j + 3]; t[idx4] = a[i][j + 4]; t[idx4 + 1] = a[i][j + 5]; t[idx5] = a[i][j + 6]; t[idx5 + 1] = a[i][j + 7]; } fftn1.complexForward(t, startt); fftn1.complexForward(t, startt + 2 * n1); fftn1.complexForward(t, startt + 4 * n1); fftn1.complexForward(t, startt + 6 * n1); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[i][j] = t[idx2]; a[i][j + 1] = t[idx2 + 1]; a[i][j + 2] = t[idx3]; a[i][j + 3] = t[idx3 + 1]; a[i][j + 4] = t[idx4]; a[i][j + 5] = t[idx4 + 1]; a[i][j + 6] = t[idx5]; a[i][j + 7] = t[idx5 + 1]; } } } else if (n2 == 4 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; t[idx2] = a[i][4 * n0]; t[idx2 + 1] = a[i][4 * n0 + 1]; t[idx3] = a[i][4 * n0 + 2]; t[idx3 + 1] = a[i][4 * n0 + 3]; } fftn1.complexForward(t, startt); fftn1.complexForward(t, startt + 2 * n1); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; a[i][4 * n0] = t[idx2]; a[i][4 * n0 + 1] = t[idx2 + 1]; a[i][4 * n0 + 2] = t[idx3]; a[i][4 * n0 + 3] = t[idx3 + 1]; } } else if (n2 == 2 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; t[idx2] = a[i][2 * n0]; t[idx2 + 1] = a[i][2 * n0 + 1]; } fftn1.complexForward(t, startt); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; a[i][2 * n0] = t[idx2]; a[i][2 * n0 + 1] = t[idx2 + 1]; } } } else { if (n2 > 4 * nthread_f) { for (j = 8 * n0; j < n2; j += 8 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; t[idx2] = a[i][j]; t[idx2 + 1] = a[i][j + 1]; t[idx3] = a[i][j + 2]; t[idx3 + 1] = a[i][j + 3]; t[idx4] = a[i][j + 4]; t[idx4 + 1] = a[i][j + 5]; t[idx5] = a[i][j + 6]; t[idx5 + 1] = a[i][j + 7]; } fftn1.complexInverse(t, startt, scale); fftn1.complexInverse(t, startt + 2 * n1, scale); fftn1.complexInverse(t, startt + 4 * n1, scale); fftn1.complexInverse(t, startt + 6 * n1, scale); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; idx4 = idx3 + 2 * n1; idx5 = idx4 + 2 * n1; a[i][j] = t[idx2]; a[i][j + 1] = t[idx2 + 1]; a[i][j + 2] = t[idx3]; a[i][j + 3] = t[idx3 + 1]; a[i][j + 4] = t[idx4]; a[i][j + 5] = t[idx4 + 1]; a[i][j + 6] = t[idx5]; a[i][j + 7] = t[idx5 + 1]; } } } else if (n2 == 4 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; t[idx2] = a[i][4 * n0]; t[idx2 + 1] = a[i][4 * n0 + 1]; t[idx3] = a[i][4 * n0 + 2]; t[idx3 + 1] = a[i][4 * n0 + 3]; } fftn1.complexInverse(t, startt, scale); fftn1.complexInverse(t, startt + 2 * n1, scale); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; idx3 = startt + 2 * n1 + 2 * i; a[i][4 * n0] = t[idx2]; a[i][4 * n0 + 1] = t[idx2 + 1]; a[i][4 * n0 + 2] = t[idx3]; a[i][4 * n0 + 3] = t[idx3 + 1]; } } else if (n2 == 2 * nthread_f) { for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; t[idx2] = a[i][2 * n0]; t[idx2 + 1] = a[i][2 * n0 + 1]; } fftn1.complexInverse(t, startt, scale); for (i = 0; i < n1; i++) { idx2 = startt + 2 * i; a[i][2 * n0] = t[idx2]; a[i][2 * n0 + 1] = t[idx2 + 1]; } } } } }); } try { for (int j = 0; j < nthread; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void fillSymmetric(final float[] a) { int np = ConcurrencyUtils.getNumberOfProcessors(); Future[] futures = new Future[np]; int n1d2 = n1 / 2; int l1k = n1d2 / np; final int newn2 = 2 * n2; for (int i = 0; i < np; i++) { final int l1offa, l1stopa, l2offa, l2stopa; if (i == 0) l1offa = i * l1k + 1; else { l1offa = i * l1k; } l1stopa = i * l1k + l1k; l2offa = i * l1k; if (i == np - 1) { l2stopa = i * l1k + l1k + 1; } else { l2stopa = i * l1k + l1k; } futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { int idx1, idx2; for (int k1 = l1offa; k1 < l1stopa; k1++) { idx1 = k1 * newn2; idx2 = (n1 - k1) * newn2; a[idx1 + n2] = a[idx2 + 1]; a[idx1 + n2 + 1] = -a[idx2]; } for (int k1 = l1offa; k1 < l1stopa; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 = k2 + 2) { idx1 = k1 * newn2; idx2 = (n1 - k1) * newn2 + newn2 - k2; a[idx1 + k2] = a[idx2]; a[idx1 + k2 + 1] = -a[idx2 + 1]; } } for (int k1 = l2offa; k1 < l2stopa; k1++) { for (int k2 = 0; k2 < newn2; k2 = k2 + 2) { idx1 = ((n1 - k1) % n1) * newn2 + (newn2 - k2) % newn2; idx2 = k1 * newn2 + k2; a[idx1] = a[idx2]; a[idx1 + 1] = -a[idx2 + 1]; } } } }); } try { for (int j = 0; j < np; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } private void fillSymmetric(final float[][] a) { int np = ConcurrencyUtils.getNumberOfProcessors(); Future[] futures = new Future[np]; int n1d2 = n1 / 2; int l1k = n1d2 / np; final int newn2 = 2 * n2; for (int i = 0; i < np; i++) { final int l1offa, l1stopa, l2offa, l2stopa; if (i == 0) l1offa = i * l1k + 1; else { l1offa = i * l1k; } l1stopa = i * l1k + l1k; l2offa = i * l1k; if (i == np - 1) { l2stopa = i * l1k + l1k + 1; } else { l2stopa = i * l1k + l1k; } futures[i] = ConcurrencyUtils.threadPool.submit(new Runnable() { public void run() { for (int k1 = l1offa; k1 < l1stopa; k1++) { a[k1][n2] = a[n1 - k1][1]; a[k1][n2 + 1] = -a[n1 - k1][0]; } for (int k1 = l1offa; k1 < l1stopa; k1++) { for (int k2 = n2 + 2; k2 < newn2; k2 = k2 + 2) { a[k1][k2] = a[n1 - k1][newn2 - k2]; a[k1][k2 + 1] = -a[n1 - k1][newn2 - k2 + 1]; } } for (int k1 = l2offa; k1 < l2stopa; k1++) { for (int k2 = 0; k2 < newn2; k2 = k2 + 2) { a[(n1 - k1) % n1][(newn2 - k2) % newn2] = a[k1][k2]; a[(n1 - k1) % n1][(newn2 - k2) % newn2 + 1] = -a[k1][k2 + 1]; } } } }); } try { for (int j = 0; j < np; j++) { futures[j].get(); } } catch (ExecutionException ex) { ex.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } }