MVDeconFFTThreads.java example

Explorer
SPIM_Registration-master
- src
  - main
    - java
/*-
 * #%L
 * Fiji distribution of ImageJ for the life sciences.
 * %%
 * Copyright (C) 2007 - 2017 Fiji developers.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-2.0.html>.
 * #L%
 */
package spim.process.fusion.deconvolution;

import java.util.concurrent.atomic.AtomicInteger;

import net.imglib2.algorithm.fft2.FFTConvolution;
import net.imglib2.img.Img;
import net.imglib2.img.ImgFactory;
import net.imglib2.img.array.ArrayImg;
import net.imglib2.img.basictypeaccess.array.FloatArray;
import net.imglib2.type.numeric.real.FloatType;
import net.imglib2.util.Util;
import net.imglib2.view.Views;
import spim.process.cuda.Block;
import spim.process.interestpointdetection.DifferenceOfGaussianCUDA.CUDAOutput;

public class MVDeconFFTThreads
{
	final protected static void convolve1BlockCPU(
			final Block blockStruct, final Img< FloatType > image, final Img< FloatType > result,
			final Img< FloatType > block, final FFTConvolution< FloatType > fftConvolution1, final int i )
	{
		long time = System.currentTimeMillis();
		blockStruct.copyBlock( Views.extendMirrorSingle( image ), block );
		System.out.println( " block " + i + "(CPU): copy " + (System.currentTimeMillis() - time) );

		time = System.currentTimeMillis();
		fftConvolution1.setImg( block );
		fftConvolution1.setOutput( block );
		fftConvolution1.convolve();
		System.out.println( " block " + i + "(CPU): compute " + (System.currentTimeMillis() - time) );

		time = System.currentTimeMillis();
		blockStruct.pasteBlock( result, block );
		System.out.println( " block " + i + "(CPU): paste " + (System.currentTimeMillis() - time) );
	}
	
	final protected static void convolve2BlockCPU(
			final Block blockStruct, final Img< FloatType > image, final Img< FloatType > result,
			final Img< FloatType > block, final FFTConvolution< FloatType > fftConvolution2 )
	{
		// ratio outside of the deconvolved space (psi) is 1
		blockStruct.copyBlock( Views.extendValue( image, new FloatType( 1.0f ) ), block );

		fftConvolution2.setImg( block );
		fftConvolution2.setOutput( block );
		fftConvolution2.convolve();
		
		blockStruct.pasteBlock( result, block );
	}
	
	@SuppressWarnings("unchecked")
	final protected static void convolve1BlockCUDA(
			final Block blockStruct, final int deviceId, final Img< FloatType > image,
			final Img< FloatType > result, final Img< FloatType > block, final Img< FloatType > kernel1, final int i )
	{
		long time = System.currentTimeMillis();
		blockStruct.copyBlock( Views.extendMirrorSingle( image ), block );
		System.out.println( " block " + i + "(CPU  " + deviceId + "): copy " + (System.currentTimeMillis() - time) );

		// convolve block with kernel1 using CUDA
		time = System.currentTimeMillis();
		final float[] blockF = ((FloatArray)((ArrayImg< net.imglib2.type.numeric.real.FloatType, ? > )block).update( null ) ).getCurrentStorageArray();
		final float[] kernel1F = ((FloatArray)((ArrayImg< net.imglib2.type.numeric.real.FloatType, ? > )kernel1).update( null ) ).getCurrentStorageArray();
		
		MVDeconFFT.cuda.convolution3DfftCUDAInPlace(
				blockF, getCUDACoordinates( CUDAOutput.getImgSizeInt( block ) ),
				kernel1F, getCUDACoordinates( CUDAOutput.getImgSizeInt( kernel1 ) ),
				deviceId );
		System.out.println( " block " + i + "(CUDA " + deviceId + "): compute " + (System.currentTimeMillis() - time) );

		time = System.currentTimeMillis();
		blockStruct.pasteBlock( result, block );
		System.out.println( " block " + i + "(CPU  " + deviceId + "): paste " + (System.currentTimeMillis() - time) );
	}

	@SuppressWarnings("unchecked")
	final protected static void convolve2BlockCUDA(
			final Block blockStruct, final int deviceId, final Img< FloatType > image,
			final Img< FloatType > result, final Img< FloatType > block, final Img< FloatType > kernel2 )
	{
		// ratio outside of the deconvolved space (psi) is 1
		blockStruct.copyBlock( Views.extendValue( image, new FloatType( 1.0f ) ), block );

		// convolve block with kernel2 using CUDA
		final float[] blockF = ((FloatArray)((ArrayImg< net.imglib2.type.numeric.real.FloatType, ? > )block).update( null ) ).getCurrentStorageArray();
		final float[] kernel2F = ((FloatArray)((ArrayImg< net.imglib2.type.numeric.real.FloatType, ? > )kernel2).update( null ) ).getCurrentStorageArray();

		MVDeconFFT.cuda.convolution3DfftCUDAInPlace(
				blockF, getCUDACoordinates( CUDAOutput.getImgSizeInt( block ) ),
				kernel2F, getCUDACoordinates( CUDAOutput.getImgSizeInt( kernel2 ) ),
				deviceId );

		blockStruct.pasteBlock( result, block );
	}

	final protected static Thread getCUDAThread1(
			final AtomicInteger ai, final ImgFactory< FloatType > blockFactory, final Block[] blocks, final int[] blockSize,
			final Img< FloatType > image, final Img< FloatType > result, final int deviceId, final Img< FloatType > kernel1 )
	{
		final Thread cudaThread1 = new Thread( new Runnable()
		{
			public void run()
			{
				final Img< FloatType > block = blockFactory.create( Util.int2long( blockSize ), new FloatType() );

				int i;

				while ( ( i = ai.getAndIncrement() ) < blocks.length )
					convolve1BlockCUDA( blocks[ i ], deviceId, image, result, block, kernel1, i );
			}
		});
		
		return cudaThread1;
	}

	final protected static Thread getCUDAThread2(
			final AtomicInteger ai, final ImgFactory< FloatType > blockFactory, final Block[] blocks, final int[] blockSize,
			final Img< FloatType > image, final Img< FloatType > result, final int deviceId, final Img< FloatType > kernel2 )
	{
		final Thread cudaThread2 = new Thread( new Runnable()
		{
			public void run()
			{
				final Img< FloatType > block = blockFactory.create( Util.int2long( blockSize ), new FloatType() );

				int i;

				while ( ( i = ai.getAndIncrement() ) < blocks.length )
					convolve2BlockCUDA( blocks[ i ], deviceId, image, result, block, kernel2 );
			}
		});
		
		return cudaThread2;
	}

	private final static int[] getCUDACoordinates( final int[] c )
	{
		final int[] cuda = new int[ c.length ];
		
		for ( int d = 0; d < c.length; ++d )
			cuda[ c.length - d - 1 ] = c[ d ];
		
		return cuda;
	}
}