/*
* OpenPixi - Open Particle-In-Cell (PIC) Simulator
* Copyright (C) 2012 OpenPixi.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openpixi.pixi.physics;
import com.nativelibs4java.opencl.*;
import com.nativelibs4java.opencl.CLMem.Usage;
import com.nativelibs4java.opencl.util.*;
import com.nativelibs4java.util.*;
import java.io.File;
import java.io.FileNotFoundException;
import org.bridj.Pointer;
import static org.bridj.Pointer.*;
import static java.lang.Math.*;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.ByteOrder;
import java.util.ArrayList;
import org.openpixi.pixi.physics.collision.algorithms.CollisionAlgorithm;
import org.openpixi.pixi.physics.collision.detectors.Detector;
import org.openpixi.pixi.physics.fields.PoissonSolver;
import org.openpixi.pixi.physics.force.CombinedForce;
import org.openpixi.pixi.physics.force.Force;
import org.openpixi.pixi.physics.force.SimpleGridForce;
import org.openpixi.pixi.physics.grid.Grid;
import org.openpixi.pixi.physics.grid.Interpolation;
import org.openpixi.pixi.physics.grid.LocalInterpolation;
import org.openpixi.pixi.physics.movement.ParticleMover;
import org.openpixi.pixi.physics.movement.boundary.ParticleBoundaries;
import org.openpixi.pixi.physics.movement.boundary.SimpleParticleBoundaries;
import org.openpixi.pixi.physics.particles.Particle;
import org.openpixi.pixi.physics.util.DoubleBox;
public class ParallelSimulationCL {
/**
* Timestep
*/
public double tstep;
/**
* Width of simulated area
*/
private double width;
/**
* Height of simulated area
*/
private double height;
private double speedOfLight;
/**
* Number of iterations in the non-interactive simulation.
*/
private int iterations;
/**
* Contains all Particle2D objects
*/
public ArrayList<Particle> particles;
public CombinedForce f;
private ParticleMover mover;
/**
* Grid for dynamic field calculation
*/
public Grid grid;
public Detector detector;
public CollisionAlgorithm collisionalgorithm;
/**
* Particle list to be passed to the kernel
*/
private Pointer<Double> inParticles;
/**
* Force passed to the kernel
*/
private Pointer<Double> inForce;
/**
* Grid cells passed to the kernel
*/
private Pointer<Double> inCells;
/**
* Boundaries passed to the kernel
*/
private Pointer<Integer> inBoundaries;
/**
* Number of Particle attributes
*/
private int P_SIZE = 22;
/**
* Number of Force attributes
*/
private int F_SIZE = 10;
/**
* Number of Cell attributes
*/
private int C_SIZE = 8;
/**
* Grid interpolator algorithm name
*/
private String OCLGridInterpolator;
/**
* Particle solver algorithm name
*/
private String OCLParticleSolver;
private int writeCells = 0;
/**
* We can turn on or off the effect of the grid on particles by adding or
* removing this force from the total force.
*/
private SimpleGridForce gridForce = new SimpleGridForce();
private boolean usingGridForce = false;
private ParticleGridInitializer particleGridInitializer = new ParticleGridInitializer();
private Interpolation interpolation;
/**
* solver for the electrostatic poisson equation
*/
private PoissonSolver poisolver;
public Interpolation getInterpolation() {
return interpolation;
}
public double getWidth() {
return width;
}
public double getHeight() {
return height;
}
public double getSpeedOfLight() {
return speedOfLight;
}
public ParticleMover getParticleMover() {
return mover;
}
/**
* Constructor for non distributed simulation.
*/
public ParallelSimulationCL(Settings settings) {
tstep = settings.getTimeStep();
width = settings.getSimulationWidth();
height = settings.getSimulationHeight();
speedOfLight = settings.getSpeedOfLight();
iterations = settings.getIterations();
// TODO make particles a generic list
particles = (ArrayList<Particle>) settings.getParticles();
f = settings.getForce();
ParticleBoundaries particleBoundaries = new SimpleParticleBoundaries(
new DoubleBox(tstep, width, tstep, height),
settings.getParticleBoundary());
mover = new ParticleMover(
settings.getParticleSolver(),
particleBoundaries,
settings.getParticleIterator());
grid = new Grid(settings);
if (settings.useGrid()) {
turnGridForceOn();
} else {
turnGridForceOff();
}
poisolver = settings.getPoissonSolver();
interpolation = new LocalInterpolation(
settings.getInterpolator(), settings.getParticleIterator());
particleGridInitializer.initialize(interpolation, poisolver, particles, grid);
detector = settings.getCollisionDetector();
collisionalgorithm = settings.getCollisionAlgorithm();
OCLParticleSolver = settings.getOCLParticleSolver();
OCLGridInterpolator = settings.getOCLGridInterpolator();
writeCells = settings.getWriteToFile();
prepareAllParticles();
}
public void turnGridForceOn() {
if (!usingGridForce) {
f.add(gridForce);
usingGridForce = true;
}
}
public void turnGridForceOff() {
if (usingGridForce) {
f.remove(gridForce);
usingGridForce = false;
}
}
public void particlePush() {
mover.push(particles, f, tstep);
}
public void prepareAllParticles() {
mover.prepare(particles, f, tstep);
}
public void completeAllParticles() {
mover.complete(particles, f, tstep);
}
/**
* Converts arrays of Objects objects into arrays of Doubles so they can be
* passed to the OpenCL kernel
*/
public void hostToKernelConversion(int particlesSize, int forceSize, int cellsSize, int boundSize, ByteOrder byteOrder) {
int k = 0;
inParticles = allocateDoubles(particlesSize).order(byteOrder);
inForce = allocateDoubles(forceSize).order(byteOrder);
inCells = allocateDoubles(cellsSize).order(byteOrder);
inBoundaries = allocateInts(boundSize).order(byteOrder);
for (int i = 0; i < particlesSize; i += P_SIZE) {
inParticles.set(i + 0, particles.get(k).getX());
inParticles.set(i + 1, particles.get(k).getY());
inParticles.set(i + 2, particles.get(k).getRadius());
inParticles.set(i + 3, particles.get(k).getVx());
inParticles.set(i + 4, particles.get(k).getVy());
inParticles.set(i + 5, particles.get(k).getAx());
inParticles.set(i + 6, particles.get(k).getAy());
inParticles.set(i + 7, particles.get(k).getMass());
inParticles.set(i + 8, particles.get(k).getCharge());
inParticles.set(i + 9, particles.get(k).getPrevX());
inParticles.set(i + 10, particles.get(k).getPrevY());
inParticles.set(i + 11, particles.get(k).getEx());
inParticles.set(i + 12, particles.get(k).getEy());
inParticles.set(i + 13, particles.get(k).getBz());
inParticles.set(i + 14, particles.get(k).getPrevPositionComponentForceX());
inParticles.set(i + 15, particles.get(k).getPrevPositionComponentForceY());
inParticles.set(i + 16, particles.get(k).getPrevTangentVelocityComponentOfForceX());
inParticles.set(i + 17, particles.get(k).getPrevTangentVelocityComponentOfForceY());
inParticles.set(i + 18, particles.get(k).getPrevNormalVelocityComponentOfForceX());
inParticles.set(i + 19, particles.get(k).getPrevNormalVelocityComponentOfForceY());
inParticles.set(i + 20, particles.get(k).getPrevBz());
inParticles.set(i + 21, particles.get(k++).getPrevLinearDragCoefficient());
}
k = 0;
for (int i = 0; i < forceSize; i += F_SIZE) {
inForce.set(i + 0, f.getForceX(particles.get(k)));
inForce.set(i + 1, f.getForceY(particles.get(k)));
inForce.set(i + 2, f.getPositionComponentofForceX(particles.get(k)));
inForce.set(i + 3, f.getPositionComponentofForceY(particles.get(k)));
inForce.set(i + 4, f.getTangentVelocityComponentOfForceX(particles.get(k)));
inForce.set(i + 5, f.getTangentVelocityComponentOfForceY(particles.get(k)));
inForce.set(i + 6, f.getNormalVelocityComponentofForceX(particles.get(k)));
inForce.set(i + 7, f.getNormalVelocityComponentofForceX(particles.get(k)));
inForce.set(i + 8, f.getBz(particles.get(k)));
inForce.set(i + 9, f.getLinearDragCoefficient(particles.get(k++)));
}
k = 0;
int numCellsX = grid.getNumCellsXTotal();
int numCellsY = grid.getNumCellsYTotal();
int c = 0;
for (int i = 0; i < cellsSize; i++) {
inCells.set(i, 0.0);
}
for (int i = 0; i < numCellsX; i++) {
for (int j = 0; j < numCellsY; j++) {
inCells.set(c + 0, grid.getCells()[i][j].getJx());
inCells.set(c + 1, grid.getCells()[i][j].getJy());
inCells.set(c + 2, grid.getCells()[i][j].getRho());
inCells.set(c + 3, grid.getCells()[i][j].getPhi());
inCells.set(c + 4, grid.getCells()[i][j].getEx());
inCells.set(c + 5, grid.getCells()[i][j].getEy());
inCells.set(c + 6, grid.getCells()[i][j].getBz());
inCells.set(c + 7, grid.getCells()[i][j].getBzo());
c += C_SIZE;
}
}
k = 0;
for (int i = 0; i < numCellsX * numCellsY; i++) {
inBoundaries.set(k++, grid.parallelBoundariesArray[i]);
}
}
/**
* Write the results to a txt file
*/
public void writeToFile() throws FileNotFoundException {
PrintWriter pw = new PrintWriter(new File("particles_ocl.txt"));
for (int i = 0; i < particles.size(); i++) {
pw.write(particles.get(i).getX() + "\n");
pw.write(particles.get(i).getY() + "\n");
pw.write(particles.get(i).getRadius() + "\n");
pw.write(particles.get(i).getVx() + "\n");
pw.write(particles.get(i).getVy() + "\n");
pw.write(particles.get(i).getAx() + "\n");
pw.write(particles.get(i).getAy() + "\n");
pw.write(particles.get(i).getMass() + "\n");
pw.write(particles.get(i).getCharge() + "\n");
pw.write(particles.get(i).getPrevX() + "\n");
pw.write(particles.get(i).getPrevY() + "\n");
pw.write(particles.get(i).getEx() + "\n");
pw.write(particles.get(i).getEy() + "\n");
pw.write(particles.get(i).getBz() + "\n");
pw.write(particles.get(i).getPrevPositionComponentForceX() + "\n");
pw.write(particles.get(i).getPrevPositionComponentForceY() + "\n");
pw.write(particles.get(i).getPrevTangentVelocityComponentOfForceX() + "\n");
pw.write(particles.get(i).getPrevTangentVelocityComponentOfForceY() + "\n");
pw.write(particles.get(i).getPrevNormalVelocityComponentOfForceX() + "\n");
pw.write(particles.get(i).getPrevNormalVelocityComponentOfForceY() + "\n");
pw.write(particles.get(i).getPrevBz() + "\n");
pw.write(particles.get(i).getPrevLinearDragCoefficient() + "\n");
}
pw.close();
}
/**
* Runs the entire simulation
*/
public void runParallelSimulation() throws IOException, InterruptedException {
int particlesSize = particles.size() * P_SIZE;
int forceSize = particles.size() * F_SIZE;
int cellsSize = grid.getNumCellsXTotal() * grid.getNumCellsYTotal() * C_SIZE;
int boundSize = grid.getNumCellsXTotal() * grid.getNumCellsYTotal();
CLContext context = JavaCL.createBestContext();
CLQueue queue = context.createDefaultQueue();
ByteOrder byteOrder = context.getByteOrder();
long workGroupSize = context.getDevices()[0].getMaxWorkGroupSize() - 1;
hostToKernelConversion(particlesSize, forceSize, cellsSize, boundSize, byteOrder);
// Create an OpenCL input buffer :
CLBuffer<Double> inPar = context.createDoubleBuffer(Usage.InputOutput, inParticles);
CLBuffer<Double> inFor = context.createDoubleBuffer(Usage.Input, inForce);
CLBuffer<Double> inCel = context.createDoubleBuffer(Usage.InputOutput, inCells);
CLBuffer<Integer> inBound = context.createIntBuffer(Usage.InputOutput, inBoundaries);
//call the kernel
SimulationKernel kernels = new SimulationKernel(context);
int n = (int) workGroupSize;
int[] globalSizes = new int[]{n};
int[] localSizes = new int[]{n};
if (OCLParticleSolver.equalsIgnoreCase("Boris") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
borisCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Boris Damped") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
borisDampedCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Empty Solver") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
emptySolverCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Euler") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
eulerCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Euler Richardson") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
eulerRichardsonCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
leapFrogCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog Damped") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
leapFrogDampedCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog Half Step") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
leapFrogHalfStepCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("SemiImplicit Euler") && OCLGridInterpolator.equalsIgnoreCase("Cloud In Cell")) {
semiImplicitEulerCloudInCell(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Boris") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
borisCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Boris Damped") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
borisDampedCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Empty Solver") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
emptySolverCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Euler") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
eulerCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Euler Richardson") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
eulerRichardsonCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
leapFrogCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog Damped") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
leapFrogDampedCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Leap Frog Half Step") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
leapFrogHalfStepCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("SemiImplicit Euler") && OCLGridInterpolator.equalsIgnoreCase("Charge Conserving CIC")) {
semiImplicitEulerCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else if (OCLParticleSolver.equalsIgnoreCase("Boris Profile")) {
borisProfile(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
} else { //default: boris solver, charge conserving cic interpolator
borisCIC(kernels, queue, inPar, inCel, inBound, n, globalSizes, localSizes, workGroupSize);
}
//get cells
if (writeCells == 1) {
Pointer<Double> outCel = inCel.read(queue, null);
PrintWriter pw = new PrintWriter(new File("cells_ocl.txt"));
for (int i = 0; i < grid.getNumCellsXTotal() * grid.getNumCellsYTotal() * C_SIZE; i += C_SIZE) {
pw.write(outCel.get(i + 0) + "\n");
pw.write(outCel.get(i + 1) + "\n");
pw.write(outCel.get(i + 2) + "\n");
pw.write(outCel.get(i + 3) + "\n");
pw.write(outCel.get(i + 4) + "\n");
pw.write(outCel.get(i + 5) + "\n");
pw.write(outCel.get(i + 6) + "\n");
pw.write(outCel.get(i + 7) + "\n");
}
pw.close();
}
}
void borisCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_boris(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void borisDampedCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_boris_damped(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void emptySolverCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
// while(processedParticles < particles.size()){
// pushEvt = kernels.particle_push_boris(queue, inPar, tstep, n, particles.size(), processedParticles,
// width, height, globalSizes, localSizes);
// processedParticles += workGroupSize;
// }
// processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void eulerCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_euler(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void eulerRichardsonCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_euler_richardson(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogDampedCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog_damped(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogHalfStepCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog_half_step(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void semiImplicitEulerCIC(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_semiimplicit_euler(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void borisCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_boris(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void borisDampedCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_boris_damped(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void emptySolverCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
// while(processedParticles < particles.size()){
// pushEvt = kernels.particle_push_boris(queue, inPar, tstep, n, particles.size(), processedParticles,
// width, height, globalSizes, localSizes);
// processedParticles += workGroupSize;
// }
// processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void eulerCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_euler(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void eulerRichardsonCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_euler_richardson(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogDampedCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog_damped(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void leapFrogHalfStepCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_leap_frog_half_step(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void semiImplicitEulerCloudInCell(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_semiimplicit_euler(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
//interpolate to grid
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.cloud_in_cell(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
//update grid
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
//interpolate to particle
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
}
void borisProfile(SimulationKernel kernels, CLQueue queue, CLBuffer<Double> inPar, CLBuffer<Double> inCel,
CLBuffer<Integer> inBound, int n, int[] globalSizes, int[] localSizes, long workGroupSize) {
CLEvent pushEvt = null;
CLEvent resetEvt = null;
CLEvent interpEvt = null;
CLEvent storeEvt = null;
CLEvent solveEEvt = null;
CLEvent solveBEvt = null;
CLEvent partInterpEvt = null;
long t1, t2;
long pushTime = 0, gridInterpTime = 0, updateTime = 0, partInterpTime = 0;
int processedParticles = 0;
for (int i = 0; i < iterations; i++) {
//particlePush
t1 = System.currentTimeMillis();
while (processedParticles < particles.size()) {
pushEvt = kernels.particle_push_boris(queue, inPar, tstep, n, particles.size(), processedParticles,
width, height, globalSizes, localSizes);
processedParticles += workGroupSize;
}
processedParticles = 0;
pushEvt.waitFor();
t2 = System.currentTimeMillis();
pushTime += (t2 - t1);
//interpolate to grid
t1 = System.currentTimeMillis();
resetEvt = kernels.reset_current(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, pushEvt);
while (processedParticles < particles.size()) {
interpEvt = kernels.charge_conserving_CIC(queue, inPar, inCel, inBound, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, resetEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
interpEvt.waitFor();
t2 = System.currentTimeMillis();
gridInterpTime += (t2 - t1);
//update grid
t1 = System.currentTimeMillis();
storeEvt = kernels.store_fields(queue, inCel, n,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
globalSizes, localSizes, interpEvt);
solveEEvt = kernels.solve_for_e(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, storeEvt);
solveBEvt = kernels.solve_for_b(queue, inCel, inBound, tstep, n, grid.getNumCellsX(), grid.getNumCellsY(),
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveEEvt);
solveBEvt.waitFor();
t2 = System.currentTimeMillis();
updateTime += (t2 - t1);
//interpolate to particle
t1 = System.currentTimeMillis();
while (processedParticles < particles.size()) {
partInterpEvt = kernels.particle_interpolation(queue, inPar, inCel, tstep, n, particles.size(), processedParticles,
grid.getNumCellsXTotal(), grid.getNumCellsYTotal(),
grid.getCellWidth(), grid.getCellHeight(), globalSizes, localSizes, solveBEvt);
processedParticles += workGroupSize;
}
processedParticles = 0;
partInterpEvt.waitFor();
t2 = System.currentTimeMillis();
partInterpTime += (t2 - t1);
}
//get output
Pointer<Double> outPar = inPar.read(queue, partInterpEvt);
for (int i = 0; i < particles.size() * P_SIZE; i += P_SIZE) {
particles.get(i / P_SIZE).setX(outPar.get(i + 0));
particles.get(i / P_SIZE).setY(outPar.get(i + 1));
particles.get(i / P_SIZE).setRadius(outPar.get(i + 2));
particles.get(i / P_SIZE).setVx(outPar.get(i + 3));
particles.get(i / P_SIZE).setVy(outPar.get(i + 4));
particles.get(i / P_SIZE).setAx(outPar.get(i + 5));
particles.get(i / P_SIZE).setAy(outPar.get(i + 6));
particles.get(i / P_SIZE).setMass(outPar.get(i + 7));
particles.get(i / P_SIZE).setCharge(outPar.get(i + 8));
particles.get(i / P_SIZE).setPrevX(outPar.get(i + 9));
particles.get(i / P_SIZE).setPrevY(outPar.get(i + 10));
particles.get(i / P_SIZE).setEx(outPar.get(i + 11));
particles.get(i / P_SIZE).setEy(outPar.get(i + 12));
particles.get(i / P_SIZE).setBz(outPar.get(i + 13));
particles.get(i / P_SIZE).setPrevPositionComponentForceX(outPar.get(i + 14));
particles.get(i / P_SIZE).setPrevPositionComponentForceY(outPar.get(i + 15));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceX(outPar.get(i + 16));
particles.get(i / P_SIZE).setPrevTangentVelocityComponentOfForceY(outPar.get(i + 17));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceX(outPar.get(i + 18));
particles.get(i / P_SIZE).setPrevNormalVelocityComponentOfForceY(outPar.get(i + 19));
particles.get(i / P_SIZE).setPrevBz(outPar.get(i + 20));
particles.get(i / P_SIZE).setPrevLinearDragCoefficient(outPar.get(i + 21));
}
System.out.println("Particle push: " + pushTime + "ms");
System.out.println("Grid Interpolation: " + gridInterpTime + " ms");
System.out.println("Grid update: " + updateTime + "ms");
System.out.println("Particle interpolation: " + partInterpTime + "ms");
}
}