package jcuda.jcublas.ops;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.nd4j.jita.allocator.enums.AllocationStatus;
import org.nd4j.jita.allocator.impl.AtomicAllocator;
import org.nd4j.jita.conf.Configuration;
import org.nd4j.jita.conf.CudaEnvironment;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
import static org.junit.Assert.assertEquals;
/**
* @author raver119@gmail.com
*/
@Ignore
public class CudaBroadcastTests {
@Before
public void setUp() {
CudaEnvironment.getInstance().getConfiguration()
.setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
.setFirstMemory(AllocationStatus.DEVICE)
.setMaximumBlockSize(64)
.setMaximumGridSize(128)
.enableDebug(true);
System.out.println("Init called");
}
@Test
public void testPinnedAddiRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
assertEquals("JcublasLevel1", Nd4j.getBlasWrapper().level1().getClass().getSimpleName());
for (int iter = 0; iter < 100; iter++) {
INDArray array1 = Nd4j.zeros(15, 15);
for (int y = 0; y < 15; y++) {
for (int x = 0; x < 15; x++) {
assertEquals("Failed on iteration: ["+iter+"], y.x: ["+y+"."+x+"]", 0.0f, array1.getRow(y).getFloat(x), 0.01);
}
}
INDArray array2 = Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f});
for (int i = 0; i < 30; i++) {
array1.addiRowVector(array2);
}
//System.out.println("Array1: " + array1);
//System.out.println("Array2: " + array2);
for (int y = 0; y < 15; y++) {
for (int x = 0; x < 15; x++) {
assertEquals("Failed on iteration: ["+iter+"], y.x: ["+y+"."+x+"]", 60.0f, array1.getRow(y).getFloat(x), 0.01);
}
}
}
}
@Test
public void testPinnedSubiRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
INDArray array1 = Nd4j.zeros(1500,150);
INDArray array2 = Nd4j.linspace(1,150,150);
AtomicAllocator.getInstance().getPointer(array1, (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext());
AtomicAllocator.getInstance().getPointer(array2, (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext());
long time1 = System.currentTimeMillis();
array1.subiRowVector(array2);
long time2 = System.currentTimeMillis();
System.out.println("Execution time: " + (time2 - time1));
// System.out.println("Array1: " + array1);
// System.out.println("Array2: " + array2);
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-3.0f, array1.getRow(0).getFloat(2), 0.01);
assertEquals(-10.0f, array1.getRow(0).getFloat(9), 0.01);
}
@Test
public void testPinnedSubiColumnVector2() throws Exception {
// simple way to stop test if we're not on CUDA backend here
INDArray array1 = Nd4j.zeros(1500,150);
INDArray array2 = Nd4j.linspace(1,1500,1500).reshape(1500,1);
array1.subiColumnVector(array2);
// System.out.println("Array1: " + array1);
// System.out.println("Array2: " + array2);
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-301.0f, array1.getRow(300).getFloat(0), 0.01);
assertEquals(-1500.0f, array1.getRow(1499).getFloat(0), 0.01);
}
@Test
public void testPinnedSubiRowVector2() throws Exception {
// simple way to stop test if we're not on CUDA backend here
INDArray array1 = Nd4j.zeros(1500,150);
INDArray array2 = Nd4j.linspace(1,1500,1500).reshape(1500,1);
array1.subiRowVector(array2);
System.out.println("Array1: " + array1.shapeInfoDataBuffer());
System.out.println("Array2: " + array2.shapeInfoDataBuffer());
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-301.0f, array1.getRow(300).getFloat(0), 0.01);
assertEquals(-1500.0f, array1.getRow(1499).getFloat(0), 0.01);
}
@Test
public void testPinnedRSubiRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
assertEquals("JcublasLevel1", Nd4j.getBlasWrapper().level1().getClass().getSimpleName());
INDArray array1 = Nd4j.create(15,15);
INDArray array2 = Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f});
array1.rsubiRowVector(array2);
System.out.println("Array1: " + array1);
//System.out.println("Array2: " + array2);
assertEquals(2.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(2.0f, array1.getRow(1).getFloat(0), 0.01);
assertEquals(2.0f, array1.getRow(3).getFloat(3), 0.01);
}
@Test
public void testPinnedSubiColumnVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
INDArray array1 = Nd4j.zeros(150,3);
INDArray array2 = Nd4j.linspace(1, 150, 150).reshape(150,1);
array1.subiColumnVector(array2);
System.out.println("Array1: " + array1);
System.out.println("Array2: " + array2);
assertEquals(-1.0f, array1.getRow(0).getFloat(0), 0.01);
assertEquals(-2.0f, array1.getRow(1).getFloat(0), 0.01);
assertEquals(-3.0f, array1.getRow(2).getFloat(0), 0.01);
}
@Test
public void testPinnedMulRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
assertEquals("JcublasLevel1", Nd4j.getBlasWrapper().level1().getClass().getSimpleName());
INDArray array1 = Nd4j.zeros(15,15);
array1.putRow(0, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
array1.putRow(1, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
INDArray array2 = Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f});
array1.muliRowVector(array2);
System.out.println("Array1: " + array1);
System.out.println("Array2: " + array2);
assertEquals(4.0f, array1.getRow(0).getFloat(0), 0.01);
}
@Test
public void testPinnedDivRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
assertEquals("JcublasLevel1", Nd4j.getBlasWrapper().level1().getClass().getSimpleName());
INDArray array1 = Nd4j.zeros(15,15);
array1.putRow(0, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
array1.putRow(1, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
INDArray array2 = Nd4j.create(new float[]{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
array1.diviRowVector(array2);
System.out.println("Array1: " + array1);
System.out.println("Array2: " + array2);
assertEquals(2.0f, array1.getRow(0).getFloat(0), 0.01);
}
@Test
public void testPinnedRDivRowVector() throws Exception {
// simple way to stop test if we're not on CUDA backend here
assertEquals("JcublasLevel1", Nd4j.getBlasWrapper().level1().getClass().getSimpleName());
INDArray array1 = Nd4j.zeros(15,15);
array1.putRow(0, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
array1.putRow(1, Nd4j.create(new float[]{2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}));
INDArray array2 = Nd4j.create(new float[]{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
array1.rdiviRowVector(array2);
System.out.println("Array1: " + array1);
System.out.println("Array2: " + array2);
assertEquals(0.5f, array1.getRow(0).getFloat(0), 0.01);
}
@Test
public void execBroadcastOp() throws Exception {
INDArray array = Nd4j.ones(1024, 1024);
INDArray arrayRow = Nd4j.linspace(1, 1024, 1024);
float sum = (float) array.sumNumber().doubleValue();
array.addiRowVector(arrayRow);
long time1 = System.nanoTime();
for (int x = 0; x < 1000; x++) {
array.addiRowVector(arrayRow);
}
long time2 = System.nanoTime();
System.out.println("Execution time: " + ((time2 - time1) / 1000));
assertEquals(1002, array.getFloat(0), 0.1f);
assertEquals(2003, array.getFloat(1), 0.1f);
}
@Test
public void execBroadcastOpTimed2() throws Exception {
Nd4j.create(1);
System.out.println("A ----------------");
INDArray array = Nd4j.zeros(2048, 1024);
System.out.println("0 ----------------");
INDArray arrayRow = Nd4j.ones(1024);
System.out.println("1 ----------------");
float sum = (float) array.sumNumber().doubleValue();
float sum2 = (float) arrayRow.sumNumber().doubleValue();
System.out.println("2 ----------------");
long time1 = System.nanoTime();
for (int x = 0; x < 1000; x++) {
array.addiRowVector(arrayRow);
}
long time2 = System.nanoTime();
System.out.println("Execution time: " + ((time2 - time1) / 1000));
for (int x = 0; x < array.rows(); x++) {
INDArray row = array.getRow(x);
for (int y = 0; y < array.columns(); y++) {
assertEquals("Failed on x.y: ["+x+"."+y+"]",1000f, row.getFloat(y), 0.01);
}
}
}
}