MVDeconFFT.java example

Explorer
SPIM_Registration-master
- src
  - main
    - java
/*-
 * #%L
 * Fiji distribution of ImageJ for the life sciences.
 * %%
 * Copyright (C) 2007 - 2017 Fiji developers.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-2.0.html>.
 * #L%
 */
package spim.process.fusion.deconvolution;

import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;

import mpicbg.spim.io.IOFunctions;
import mpicbg.spim.postprocessing.deconvolution2.LRInput;
import net.imglib2.Cursor;
import net.imglib2.RandomAccessibleInterval;
import net.imglib2.algorithm.fft2.FFTConvolution;
import net.imglib2.exception.IncompatibleTypeException;
import net.imglib2.img.Img;
import net.imglib2.img.ImgFactory;
import net.imglib2.img.array.ArrayImg;
import net.imglib2.img.array.ArrayImgFactory;
import net.imglib2.type.numeric.complex.ComplexFloatType;
import net.imglib2.type.numeric.real.FloatType;
import net.imglib2.util.Util;
import net.imglib2.view.Views;
import spim.Threads;
import spim.process.cuda.Block;
import spim.process.cuda.BlockGeneratorFixedSizePrecise;
import spim.process.cuda.CUDAFourierConvolution;

public class MVDeconFFT
{
	public static enum PSFTYPE { OPTIMIZATION_II, OPTIMIZATION_I, EFFICIENT_BAYESIAN, INDEPENDENT };
	
	public static CUDAFourierConvolution cuda = null;
	
	private RandomAccessibleInterval< FloatType > image, weight;
	private ArrayImg< FloatType, ? > kernel1, kernel2;
	private FFTConvolution< FloatType > fftConvolution1, fftConvolution2;
	protected int numViews = 0;

	final protected ExecutorService service;
	PSFTYPE iterationType;
	ArrayList< MVDeconFFT > views;

	final int n;

	final boolean useBlocks, useCUDA, useCPU;
	final int[] blockSize, deviceList;
	final int device0, numDevices;
	final Block[] blocks;
	final boolean saveMemory;

	// the imgfactory used to instantiate the blocks and compute the FFTs, must be ArrayImg for CUDA
	private ImgFactory< FloatType > blockFactory;
	private ImgFactory< ComplexFloatType > fftFactory;

	/**
	 * Used to determine if the Convolutions already have been computed for the current iteration
	 */
	int i = -1;

	public MVDeconFFT(
			final RandomAccessibleInterval< FloatType > image,
			final RandomAccessibleInterval< FloatType > weight,
			final ArrayImg< FloatType, ? > kernel,
			final ImgFactory< FloatType > blockFactory,
			final int[] deviceList, final boolean useBlocks,
			final int[] blockSize, final boolean saveMemory )
	{
		this.image = image;
		this.kernel1 = kernel;
		this.weight = weight;
		this.n = image.numDimensions();
		this.service = FFTConvolution.createExecutorService( Threads.numThreads() );
		this.blockFactory = blockFactory;
		try
		{
			this.fftFactory = blockFactory.imgFactory( new ComplexFloatType() );
		}
		catch (IncompatibleTypeException e)
		{
			e.printStackTrace();
			throw new RuntimeException( "Cannot cast ImgFactory for ComplexFloatType." );
		}

		this.deviceList = deviceList;
		this.device0 = deviceList[ 0 ];
		this.numDevices = deviceList.length;
		this.saveMemory = saveMemory;

		// figure out if we need GPU and/or CPU
		boolean anyGPU = false;
		boolean anyCPU = false;
		
		for ( final int i : deviceList )
		{
			if ( i >= 0 )
				anyGPU = true;
			else if ( i == -1 )
				anyCPU = true;
		}

		this.useCUDA = anyGPU;
		this.useCPU = anyCPU;

		if ( useBlocks )
		{
			this.useBlocks = true;

			// define the blocksize so that it is one single block
			this.blockSize = new int[ n ];

			for ( int d = 0; d < this.blockSize.length; ++d )
				this.blockSize[ d ] = blockSize[ d ];

			final long[] imgSize = new long[ n ];
			final long[] kernelSize = new long[ n ];

			image.dimensions( imgSize );
			kernel.dimensions( kernelSize );

			final BlockGeneratorFixedSizePrecise blockGenerator = new BlockGeneratorFixedSizePrecise( Util.int2long( this.blockSize ) );
			this.blocks = blockGenerator.divideIntoBlocks( imgSize, kernelSize );

			IOFunctions.println( "Number of blocks: " + this.blocks.length );
		}
		else if ( this.useCUDA ) // and no blocks, i.e. one big block
		{
			this.useBlocks = true;

			// define the blocksize so that it is one single block
			this.blockSize = new int[ n ];

			for ( int d = 0; d < this.blockSize.length; ++d )
				this.blockSize[ d ] = (int)image.dimension( d ) + (int)kernel.dimension( d ) - 1;

			final long[] imgSize = new long[ n ];
			final long[] kernelSize = new long[ n ];

			image.dimensions( imgSize );
			kernel.dimensions( kernelSize );

			final BlockGeneratorFixedSizePrecise blockGenerator = new BlockGeneratorFixedSizePrecise( Util.int2long( this.blockSize ) );
			this.blocks = blockGenerator.divideIntoBlocks( imgSize, kernelSize );

			IOFunctions.println( "Number of blocks: " + this.blocks.length + " (1 single block for CUDA processing)." );

		}
		else
		{
			this.blocks = null;
			this.blockSize = null;
			this.useBlocks = false;
		}
	}

	/**
	 * @param numViews - the number of views in the acquisition, determines the exponential of the kernel
	 */
	protected void setNumViews( final int numViews ) { this.numViews = numViews; }
	
	/**
	 * This method is called once all views are added to the {@link LRInput}
	 * @throws IncompatibleTypeException 
	 */
	protected void init( final PSFTYPE iterationType, final ArrayList< MVDeconFFT > views ) throws IncompatibleTypeException
	{		
		// normalize kernel so that sum of all pixels == 1
		AdjustInput.normImg( kernel1 );

		this.iterationType = iterationType;
		this.views = views;

		if ( numViews == 0 )
		{
			System.out.println( "Warning, numViews was not set." );
			numViews = 1;
		}
		
		if ( numViews == 1 || iterationType == PSFTYPE.INDEPENDENT )
		{
			// compute the inverted kernel (switch dimensions)
			this.kernel2 = computeInvertedKernel( this.kernel1 );
		}
		else if ( iterationType == PSFTYPE.EFFICIENT_BAYESIAN )
		{
			// compute the compound kernel P_v^compound of the efficient bayesian multi-view deconvolution
			// for the current view \phi_v(x_v)
			//
			// P_v^compound = P_v^{*} prod{w \in W_v} P_v^{*} \ast P_w \ast P_w^{*}
			
			// we first get P_v^{*} -> {*} refers to the inverted coordinates
			final ArrayImg< FloatType, ? > tmp = computeInvertedKernel( this.kernel1.copy() );

			// now for each view: w \in W_v
			for ( final MVDeconFFT view : views )
			{
				if ( view != this )
				{
					// convolve first P_v^{*} with P_w
					Img< FloatType > input = computeInvertedKernel( this.kernel1 );
					Img< FloatType > kernel = view.kernel1;
					Img< FloatType > output = input.factory().create( input, input.firstElement() );

					final FFTConvolution< FloatType > conv1 = new FFTConvolution< FloatType >(
							Views.extendZero( input ),
							input,
							Views.extendZero( kernel ),
							kernel,
							output,
							new ArrayImgFactory< ComplexFloatType >() );

					conv1.setExecutorService( service );
					conv1.setKeepImgFFT( false );
					conv1.convolve();

					// and now convolve the result with P_w^{*}
					input = output;
					kernel = computeInvertedKernel( view.kernel1 );
					output = input.factory().create( input, input.firstElement() );
					final FFTConvolution< FloatType > conv2 = new FFTConvolution< FloatType >(
							Views.extendZero( input ),
							input,
							Views.extendZero( kernel ),
							kernel,
							output,
							new ArrayImgFactory< ComplexFloatType >() );

					conv2.setExecutorService( service );
					conv2.setKeepImgFFT( false );
					conv2.convolve();

					// multiply the result with P_v^{*} yielding the compound kernel
					final Cursor< FloatType > cursor = tmp.cursor();
					for ( final FloatType t : output )
					{
						cursor.fwd();
						cursor.get().set( t.get() * cursor.get().get() );
					}
				}
			}

			// norm the compound kernel
			AdjustInput.normImg( tmp );

			// set it as kernel2 of the deconvolution
			this.kernel2 = ( tmp );
		}
		else if ( iterationType == PSFTYPE.OPTIMIZATION_I )
		{
			// compute the simplified compound kernel P_v^compound of the efficient bayesian multi-view deconvolution
			// for the current view \phi_v(x_v)
			//
			// P_v^compound = P_v^{*} prod{w \in W_v} P_v^{*} \ast P_w

			// we first get P_v^{*} -> {*} refers to the inverted coordinates
			final ArrayImg< FloatType, ? > tmp = ( this.kernel1.copy() );

			// now for each view: w \in W_v
			for ( final MVDeconFFT view : views )
			{
				if ( view != this )
				{
					Img< FloatType > input = this.kernel1;
					Img< FloatType > kernel = computeInvertedKernel( view.kernel1 );
					Img< FloatType > output = input.factory().create( input, input.firstElement() );

					final FFTConvolution< FloatType > conv = new FFTConvolution< FloatType >(
							Views.extendZero( input ),
							input,
							Views.extendZero( kernel ),
							kernel,
							output,
							new ArrayImgFactory< ComplexFloatType >() );

					conv.setExecutorService( service );
					conv.setKeepImgFFT( false );
					conv.convolve();

					// multiply with the kernel
					final Cursor< FloatType > cursor = tmp.cursor();
					for ( final FloatType t : output )
					{
						cursor.fwd();
						cursor.get().set( t.get() * cursor.get().get() );
					}
				}
			}

			// norm the compound kernel
			AdjustInput.normImg( tmp );

			// compute the inverted kernel
			this.kernel2 = computeInvertedKernel( tmp );
		}
		else //if ( iterationType == PSFTYPE.OPTIMIZATION_II )
		{
			// compute the squared kernel and its inverse
			final ArrayImg< FloatType, ? > exponentialKernel = computeExponentialKernel( this.kernel1, numViews );

			// norm the squared kernel
			AdjustInput.normImg( exponentialKernel );

			// compute the inverted squared kernel
			this.kernel2 = computeInvertedKernel( exponentialKernel );	
		}
	}

	public static ArrayImg< FloatType, ? > computeExponentialKernel( final ArrayImg< FloatType, ? > kernel, final int numViews )
	{
		final ArrayImg< FloatType, ? > exponentialKernel = kernel.copy();

		for ( final FloatType f : exponentialKernel )
			f.set( pow( f.get(), numViews ) );

		return exponentialKernel;
	}

	public static ArrayImg< FloatType, ? > computeInvertedKernel( final ArrayImg< FloatType, ? > kernel )
	{
		final ArrayImg< FloatType, ? > invKernel = kernel.copy();

		for ( int d = 0; d < invKernel.numDimensions(); ++d )
			Mirror.mirror( invKernel, d, Threads.numThreads() );

		return invKernel;
	}

	final private static float pow( final float value, final int power )
	{
		float result = value;

		for ( int i = 1; i < power; ++i )
			result *= value;

		return result;
	}

	public void setImage( final Img< FloatType > image )
	{
		this.image = image;
		setCurrentIteration( -1 );
	}

	public void setWeight( final Img< FloatType > weight ) { this.weight = weight; }

	public void setKernel( final ArrayImg< FloatType, ? > kernel ) throws IncompatibleTypeException 
	{
		this.kernel1 = kernel;
		init( iterationType, views );
		setCurrentIteration( -1 );
	}

	public RandomAccessibleInterval< FloatType > getImage() { return image; }
	public RandomAccessibleInterval< FloatType > getWeight() { return weight; }
	public ArrayImg< FloatType, ? > getKernel1() { return kernel1; }
	public ArrayImg< FloatType, ? > getKernel2() { return kernel2; }
	
	public void setCurrentIteration( final int i ) { this.i = i; }
	public int getCurrentIteration() { return i; }

	/**
	 * convolves the image with kernel1
	 * 
	 * @param image - the image to convolve with
	 */
	public void convolve1( final Img< FloatType > image, final Img< FloatType > result )
	{
		if ( useCPU )
		{
			if ( useBlocks )
			{
				final Img< FloatType > block = blockFactory.create( blockSize, new FloatType() );

				if ( this.fftConvolution1 == null )
				{
					this.fftConvolution1 = new FFTConvolution< FloatType >( block, this.kernel1 );
					this.fftConvolution1.setComputeComplexConjugate( false );
					this.fftConvolution1.setExecutorService( service );
					this.fftConvolution1.setKeepImgFFT( false );
				}

				for ( int i = 0; i < blocks.length; ++i )
					MVDeconFFTThreads.convolve1BlockCPU( blocks[ i ], image, result, block, fftConvolution1, i );

				if ( saveMemory )
				{
					this.fftConvolution1 = null;
					System.gc();
				}

				return;
			}
			else
			{
				if ( this.fftConvolution1 == null )
				{
					this.fftConvolution1 = new FFTConvolution< FloatType >( this.image, this.kernel1, fftFactory );
					this.fftConvolution1.setComputeComplexConjugate( false );
					this.fftConvolution1.setExecutorService( service );
					this.fftConvolution1.setKeepImgFFT( false );
				}

				//IJ.log( "Using CPU only to compute as one block ... " );
				long time = System.currentTimeMillis();
				final FFTConvolution< FloatType > fftConv = fftConvolution1;
				fftConv.setImg( image );
				fftConv.setOutput( result );
				fftConv.convolve();
				System.out.println( " image: compute " + (System.currentTimeMillis() - time) );

				if ( saveMemory )
				{
					this.fftConvolution1 = null;
					System.gc();
				}

				return;
			}
		}
		else if ( useCUDA && numDevices == 1 )
		{
			final Img< FloatType > block = blockFactory.create( blockSize, new FloatType() );

			for ( int i = 0; i < blocks.length; ++i )
				MVDeconFFTThreads.convolve1BlockCUDA( blocks[ i ], device0, image, result, block, kernel1, i );

			return;
		}
		else
		{
			final AtomicInteger ai = new AtomicInteger();
			final Thread[] threads = new Thread[ deviceList.length ];

			for ( int i = 0; i < deviceList.length; ++i )
				threads[ i ] = MVDeconFFTThreads.getCUDAThread1( ai, blockFactory, blocks, blockSize, image, result, deviceList[ i ], kernel1 );

			for ( int ithread = 0; ithread < threads.length; ++ithread )
				threads[ ithread ].start();

			try
			{
				for ( int ithread = 0; ithread < threads.length; ++ithread )
					threads[ ithread ].join();
			}
			catch (InterruptedException ie)
			{
				throw new RuntimeException(ie);
			}

			return;
		}
	}

	/**
	 * convolves the image with kernel2 (inverted kernel1)
	 * 
	 * @param image - the image to convolve with
	 */
	public void convolve2( final Img< FloatType > image, final Img< FloatType > result )
	{
		if ( useCPU )
		{
			if ( useBlocks )
			{
				final Img< FloatType > block = blockFactory.create( blockSize, new FloatType() );

				if ( this.fftConvolution2 == null )
				{
					this.fftConvolution2 = new FFTConvolution< FloatType >( block, this.kernel2 );
					this.fftConvolution2.setComputeComplexConjugate( false );
					this.fftConvolution2.setExecutorService( service );
					this.fftConvolution2.setKeepImgFFT( false );
				}

				for ( int i = 0; i < blocks.length; ++i )
					MVDeconFFTThreads.convolve2BlockCPU( blocks[ i ], image, result, block, fftConvolution2 );

				if ( saveMemory )
				{
					this.fftConvolution2 = null;
					System.gc();
				}

				return;
			}
			else
			{
				if ( this.fftConvolution2 == null )
				{
					this.fftConvolution2 = new FFTConvolution< FloatType >( this.image, this.kernel2, fftFactory );
					this.fftConvolution2.setComputeComplexConjugate( false );
					this.fftConvolution2.setExecutorService( service );
					this.fftConvolution2.setKeepImgFFT( false );
				}

				final FFTConvolution< FloatType > fftConv = fftConvolution2;
				fftConv.setImg( Views.extendValue( image, new FloatType( 1.0f ) ), image );  // ratio outside of the deconvolved space (psi) is 1
				fftConv.setOutput( result );
				fftConv.convolve();

				if ( saveMemory )
				{
					this.fftConvolution2 = null;
					System.gc();
				}

				return;
			}
		}
		else if ( useCUDA && numDevices == 1 )
		{
			final Img< FloatType > block = blockFactory.create( blockSize, new FloatType() );

			for ( int i = 0; i < blocks.length; ++i )
				MVDeconFFTThreads.convolve2BlockCUDA( blocks[ i ], device0, image, result, block, kernel2 );

			return;
		}
		else
		{
			final AtomicInteger ai = new AtomicInteger();
			final Thread[] threads = new Thread[ deviceList.length ];

			for ( int i = 0; i < deviceList.length; ++i )
				threads[ i ] = MVDeconFFTThreads.getCUDAThread2( ai, blockFactory, blocks, blockSize, image, result, deviceList[ i ], kernel2 );

			for ( int ithread = 0; ithread < threads.length; ++ithread )
				threads[ ithread ].start();

			try
			{
				for ( int ithread = 0; ithread < threads.length; ++ithread )
					threads[ ithread ].join();
			}
			catch (InterruptedException ie)
			{
				throw new RuntimeException(ie);
			}

			return;
		}
	}
}