/*- * #%L * Fiji distribution of ImageJ for the life sciences. * %% * Copyright (C) 2007 - 2017 Fiji developers. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-2.0.html>. * #L% */ package spim.process.cuda; import mpicbg.imglib.util.Util; /** * Executes gaussian convolution using the native CUDA implementation * (https://github.com/StephanPreibisch/SeparableConvolutionCUDALib) * * @author Stephan Preibisch (stephan.preibisch@gmx.de) */ public class CUDASeparableConvolutionFunctions { public static enum OutOfBounds{ ZERO, VALUE, EXTEND_BORDER_PIXELS }; final static int[] supportedKernelSizes = new int[]{ 7, 15, 31, 63, 127 }; final CUDASeparableConvolution cuda; final int cudaDeviceId; public CUDASeparableConvolutionFunctions( final CUDASeparableConvolution cuda, final int cudaDeviceId ) { this.cuda = cuda; this.cudaDeviceId = cudaDeviceId; } /** * Compute Gaussian convolution with and extend-border-pixels outofbounds strategy * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma for all dimensions * @return */ public boolean gauss( final float[] img, final int[] dim, final double sigma ) { return gauss( img, dim, sigma, OutOfBounds.EXTEND_BORDER_PIXELS, 0, cuda, cudaDeviceId ); } /** * Compute Gaussian convolution with and extend-border-pixels outofbounds strategy * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma per dimension * @return */ public boolean gauss( final float[] img, final int[] dim, final double[] sigma ) { return gauss( img, dim, sigma, OutOfBounds.EXTEND_BORDER_PIXELS, 0, cuda, cudaDeviceId ); } public boolean gauss( final float[] img, final int[] dim, final float[] sigma ) { final double[] sigmaD = new double[ sigma.length ]; for ( int d = 0; d < sigmaD.length; ++d ) sigmaD[ d ] = sigma[ d ]; return gauss( img, dim, sigmaD, OutOfBounds.EXTEND_BORDER_PIXELS, 0, cuda, cudaDeviceId ); } /** * Compute Gaussian convolution * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma for all dimensions * @param oobs - the OutOfBounds strategy * @return */ public boolean gauss( final float[] img, final int[] dim, final double sigma, final OutOfBounds oobs, final float oobsValue ) { return gauss( img, dim, sigma, oobs, oobsValue, cuda, cudaDeviceId ); } /** * Compute Gaussian convolution * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma per dimension * @param oobs - the OutOfBounds strategy * @param oobsValue - the value for out of image pixels if the OutOfBoundsStrategy is OutOfBounds.VALUE * @return */ public boolean gauss( final float[] img, final int[] dim, final double[] sigma, final OutOfBounds oobs, final float oobsValue ) { return gauss( img, dim, sigma, oobs, oobsValue, cuda, cudaDeviceId ); } /** * Compute Gaussian convolution * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma for all dimensions * @param oobs - the OutOfBounds strategy * @param oobsValue - the value for out of image pixels if the OutOfBoundsStrategy is OutOfBounds.VALUE * @param cuda - The {@link CUDASeparableConvolution} interface that loaded the external native library * @param cudaDeviceId - which CUDA device to use, -1 means single-threaded computation on CPU using native code * @return */ final public static boolean gauss( final float[] img, final int[] dim, final double sigma, final OutOfBounds oobs, final float oobsValue, final CUDASeparableConvolution cuda, final int cudaDeviceId ) { if ( dim == null || dim.length == 0 || dim.length > 3 ) return false; final double[] sigmas = new double[ dim.length ]; for ( int d = 0; d < dim.length; ++d ) sigmas[ d ] = sigma; return gauss( img, dim, sigmas, oobs, oobsValue, cuda, cudaDeviceId ); } /** * Compute Gaussian convolution * * @param img - the image (1d/2d/3d) as flat float array * @param dim - the dimensions of the image in 1d/2d/3d * @param sigma - the sigma per dimension * @param oobs - the OutOfBounds strategy * @param oobsValue - the value for out of image pixels if the OutOfBoundsStrategy is OutOfBounds.VALUE * @param cuda - The {@link CUDASeparableConvolution} interface that loaded the external native library * @param cudaDeviceId - which CUDA device to use, -1 means single-threaded computation on CPU using native code * @return */ final public static boolean gauss( final float[] img, final int[] dim, final double[] sigma, final OutOfBounds oobs, final float oobsValue, final CUDASeparableConvolution cuda, final int cudaDeviceId ) { if ( dim == null || img == null || sigma == null ) { System.out.println( "Input(s) are null." ); return false; } final int n = dim.length; if ( sigma.length != n || n > 3 || n == 0 ) { System.out.println( "Inputs inconsistent or wrong dimensionality." ); return false; } final float[][] kernelsCUDA = getCUDAKernels( sigma, supportedKernelSizes ); final int size = kernelsCUDA[ 0 ].length; // query the common parameters final int w,h,d; final float[] kernelX, kernelY, kernelZ; if ( n == 1 ) { w = dim[ 0 ]; h = d = 1; kernelX = kernelsCUDA[ 0 ]; kernelY = kernelZ = null; } else if ( n == 2 ) { w = dim[ 0 ]; h = dim[ 1 ]; d = 1; kernelX = kernelsCUDA[ 0 ]; kernelY = kernelsCUDA[ 1 ]; kernelZ = null; } else { w = dim[ 0 ]; h = dim[ 1 ]; d = dim[ 2 ]; kernelX = kernelsCUDA[ 0 ]; kernelY = kernelsCUDA[ 1 ]; kernelZ = kernelsCUDA[ 2 ]; } switch ( size ) { case 7: cuda.convolve_7( img, kernelX, kernelY, kernelZ, w, h, d, kernelX != null, kernelY != null, kernelZ != null, oobs.ordinal(), oobsValue, cudaDeviceId ); break; case 15: cuda.convolve_15( img, kernelX, kernelY, kernelZ, w, h, d, kernelX != null, kernelY != null, kernelZ != null, oobs.ordinal(), oobsValue, cudaDeviceId ); break; case 31: cuda.convolve_31( img, kernelX, kernelY, kernelZ, w, h, d, kernelX != null, kernelY != null, kernelZ != null, oobs.ordinal(), oobsValue, cudaDeviceId ); break; case 63: cuda.convolve_63( img, kernelX, kernelY, kernelZ, w, h, d, kernelX != null, kernelY != null, kernelZ != null, oobs.ordinal(), oobsValue, cudaDeviceId ); break; case 127: cuda.convolve_127( img, kernelX, kernelY, kernelZ, w, h, d, kernelX != null, kernelY != null, kernelZ != null, oobs.ordinal(), oobsValue, cudaDeviceId ); break; default: return false; } return true; } public static float[][] getCUDAKernels( final double[] sigma, final int[] supportedKernelSizes ) { final int n = sigma.length; final double[][] kernels = new double[ n ][]; int maxLength = -1; for ( int d = 0; d < n; ++d ) { kernels[ d ] = Util.createGaussianKernel1DDouble( sigma[ d ], true ); maxLength = Math.max( maxLength, kernels[ d ].length ); } int size = Integer.MAX_VALUE; for ( final int s : supportedKernelSizes ) if ( maxLength <= s ) size = Math.min( s, size ); if ( size == Integer.MAX_VALUE ) { System.out.println( "Kernel bigger than maximally supported size. Quitting." ); return null; } final float[][] kernelsCUDA = new float[ kernels.length ][]; for ( int d = 0; d < kernels.length; ++d ) kernelsCUDA[ d ] = getFloatKernelPadded( kernels[ d ], size ); return kernelsCUDA; } public static float[] getFloatKernelPadded( final double[] kernel, final int size ) { if ( kernel.length > size ) return null; final float[] k = new float[ size ]; final int s = ( size - kernel.length )/2; for ( int i = 0; i < kernel.length; ++i ) k[ s + i ] = (float)kernel[ i ]; return k; } }