OpenCL4JavaBenchmarkTest.java example

Explorer

JavaCL-master
- Blas
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        blas
        CLDefaultMatrix2D.java
        CLEvents.java
        CLKernels.java
        CLMatrix2D.java
        CLMatrixUtils.java
        ujmp
        CLDenseDoubleMatrix2D.java
        CLDenseDoubleMatrix2DFactory.java
        CLDenseFloatMatrix2D.java
        CLDenseFloatMatrix2DFactory.java
        CLDenseMatrix2DImpl.java
        CLMatrixBenchmark.java
        CLWrappedMatrix2D.java
        DirectNIODenseDoubleMatrix2D.java
        DirectNIODenseDoubleMatrix2DFactory.java
        MatrixUtils.java
        package-info.java
    - test
      - java
        com
        nativelibs4java
        opencl
        blas
        CLMatrixUtilsTest.java
        ujmp
        PerformanceTest.java
        UJMPOpenCLTest.java
- Contributions
  - Kazo Csaba
    - BinaryKernelTest - issue 30.java
- Core
  - src
    - main
      - java
        com
        ochafik
        util
        string
        StringUtils.java
      - velocity
        com
        nativelibs4java
        opencl
        ByteOrderHack.java
        CLAbstractEntity.java
        CLAbstractUserProgram.java
        CLBuffer.java
        CLBuildException.java
        CLContext.java
        CLDevice.java
        CLEvent.java
        CLException.java
        CLImage.java
        CLImage2D.java
        CLImage3D.java
        CLImageFormat.java
        CLInfoGetter.java
        CLKernel.java
        CLMem.java
        CLPlatform.java
        CLProgram.java
        CLQueue.java
        CLSampler.java
        CLUserEvent.java
        ImageIOUtils.java
        InfoName.java
        JavaCL.java
        LocalSize.java
        PlatformUtils.java
        ReusablePointer.java
        ReusablePointers.java
        package-info.java
    - test
      - java
        com
        nativelibs4java
        opencl
        AbstractCommon.java
        BinaryKernelTest.java
        BufferReadTest.java
        BufferTest.java
        ByteOrderHackTest.java
        CLPlatformTest.java
        CLTestUtils.java
        DeviceTest.java
        EventTest.java
        ImageTest.java
        InfoGettersTest.java
        JOGLTest.java
        KernelTest.java
        OpenCL4JavaBasicTest.java
        OpenCL4JavaBenchmarkTest.java
        OverheadTest.java
        StressTest.java
        SweatTest.java
        test
        BenchmarkUtils.java
- Demos
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        demos
        SetupUtils.java
        hardware
        HardwareReport.java
        mandelbrot
        MandelbrotDemo.java
        random
        ParallelRandomDemo.java
        sobelfilter
        SobelFilterDemo.java
        vectoradd
        VectorAdd.java
    - test
      - java
        com
        nativelibs4java
        opencl
        demos
        SetupUtilsTest.java
        TestReport.java
- Generator
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        generator
        JavaCLGenerator.java
- InteractiveImageDemo
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        demos
        interactiveimage
        InteractiveImageDemo.java
        Utils.java
- JavaCL
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        OSGiBundleActivator.java
        util
        Fun1.java
        Fun2.java
        LinearAlgebraUtils.java
        OpenCLType.java
        ParallelMath.java
        ParallelRandom.java
        Primitive.java
        ReductionUtils.java
        Transformer.java
        fft
        AbstractDFT.java
        AbstractFFTPow2.java
        DoubleDFT.java
        DoubleFFTPow2.java
        FloatDFT.java
        FloatFFTPow2.java
        package-info.java
        package-info.java
    - test
      - java
        com
        nativelibs4java
        opencl
        generator
        GeneratorTest.java
        util
        DiscreteFourierTransformTest.java
        ParallelRandomTest.java
        ReductionTest.java
- MavenPlugin
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        generator
        JavaCLGeneratorMojo.java
- NumericalBenchmark
  - src
    - main
      - java
        fft
        FFTBench.java
- OpenCL4Java
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        library
        IOpenCLLibrary.java
        OpenCLLibrary.java
        OpenGLContextUtils.java
        cl_buffer_region.java
        cl_image_desc.java
        cl_image_format.java
        package-info.java
        proxy
        AbstractOpenCLImplementation.java
        PointerUtils.java
        ProxiedOpenCLImplementation.java
- OpenGLDemos
  - src
    - main
      - java
        com
        nativelibs4java
        opencl
        demos
        JavaCLSettingsPanel.java
        particles
        ParticlesDemo.java
- Tutorials
  - DFT
    - src
      - main
        java
        tutorial
        DFT.java
        DFT2.java
  - Simple
    - src
      - main
        resources
        archetype-resources
        src
        main
        java
        JavaCLTutorial1.java
        JavaCLTutorial2.java
        JavaCLTutorial3.java

package com.nativelibs4java.opencl;

import static com.nativelibs4java.opencl.CLTestUtils.avgError;
import static com.nativelibs4java.opencl.CLTestUtils.fillBuffersWithSomeDatad;
import static com.nativelibs4java.opencl.CLTestUtils.fillBuffersWithSomeDataf;
import static com.nativelibs4java.test.BenchmarkUtils.gc;
import static com.nativelibs4java.util.NIOUtils.directBytes;

import org.bridj.Pointer;
import static org.bridj.Pointer.*;
import java.util.Arrays;

import java.nio.FloatBuffer;
import java.nio.DoubleBuffer;

import org.junit.BeforeClass;
import org.junit.Test;

import com.nativelibs4java.opencl.CLTestUtils.Action2;
import com.nativelibs4java.opencl.CLTestUtils.ExecResult;

//import com.nativelibs4java.scalacl.*;
/// @see http://ati.amd.com/technology/streamcomputing/intro_opencl.html#simple
public class OpenCL4JavaBenchmarkTest {
    static final boolean warmup = true;

    static ExecResult<FloatBuffer> testJava_float_aSinB(int loops, int dataSize) throws CLBuildException {

        FloatBuffer aBuffer = FloatBuffer.allocate(dataSize);
        FloatBuffer bBuffer = FloatBuffer.allocate(dataSize);
        FloatBuffer outputBuffer = FloatBuffer.allocate(dataSize);
        float[] a = aBuffer.array(), b = bBuffer.array(), output = outputBuffer.array();

        if (warmup) {
            System.out.print("Warming up Java operations...");
            for (int i = 0; i < 3000; i++) {
                java_aSinB(a, b, output, 100);
            }
            System.out.println();
        }

        fillBuffersWithSomeDataf(aBuffer, bBuffer);
        gc();

        long start = System.nanoTime();
        for (int i = 0; i < loops; i++) {
            java_aSinB(a, b, output, dataSize);
        }
        long time = System.nanoTime() - start;
        System.out.println("Java operations : " + time + "ns");
        return new ExecResult<FloatBuffer>(outputBuffer, time / (loops * (double) dataSize));
    }

    static ExecResult<DoubleBuffer> testJava_double_aSinB(int loops, int dataSize) throws CLBuildException {

        DoubleBuffer aBuffer = DoubleBuffer.allocate(dataSize);
        DoubleBuffer bBuffer = DoubleBuffer.allocate(dataSize);
        DoubleBuffer outputBuffer = DoubleBuffer.allocate(dataSize);
        double[] a = aBuffer.array(), b = bBuffer.array(), output = outputBuffer.array();

        if (warmup) {
            System.out.print("Warming up Java operations...");
            for (int i = 0; i < 3000; i++) {
                java_aSinB(a, b, output, 100);
            }
            System.out.println();
        }

        fillBuffersWithSomeDatad(aBuffer, bBuffer);
        gc();

        long start = System.nanoTime();
        for (int i = 0; i < loops; i++) {
            java_aSinB(a, b, output, dataSize);
        }
        long time = System.nanoTime() - start;
        System.out.println("Java operations : " + time + "ns");
        return new ExecResult<DoubleBuffer>(outputBuffer, time / (loops * (double) dataSize));
    }

    static ExecResult<Pointer<Float>> testOpenCL_float_aSinB(CLContext context, int loops, int dataSize, boolean hostInOpenCL) throws CLBuildException {

        ExecResult<Pointer<Byte>> er = testOpenCL_aSinB(context, Prim.Float, loops, dataSize, hostInOpenCL, new Action2<Pointer<Byte>, Pointer<Byte>>() {

            public void call(Pointer<Byte> a, Pointer<Byte> b) {
                fillBuffersWithSomeDataf(a.as(Float.class), b.as(Float.class));
            }
        });
        return new ExecResult<Pointer<Float>>(er.buffer.as(Float.class), er.unitTimeNano);
    }

    static ExecResult<Pointer<Double>> testOpenCL_double_aSinB(CLContext context, int loops, int dataSize, boolean hostInOpenCL) throws CLBuildException {

        ExecResult<Pointer<Byte>> er = testOpenCL_aSinB(context, Prim.Double, loops, dataSize, hostInOpenCL, new Action2<Pointer<Byte>, Pointer<Byte>>() {

            public void call(Pointer<Byte> a, Pointer<Byte> b) {
                fillBuffersWithSomeDatad(a.as(Double.class), b.as(Double.class));
            }
        });
        return new ExecResult<Pointer<Double>>(er.buffer.as(Double.class), er.unitTimeNano);
    }

    static ExecResult<Pointer<Byte>> testOpenCL_aSinB(CLContext context, Prim nativePrim, int loops, int dataSize, boolean hostInOpenCL, Action2<Pointer<Byte>, Pointer<Byte>> fillBuffersWithSomeData) throws CLBuildException {

        CLKernel kernel = setupASinB(nativePrim, context);
        CLQueue queue = context.createDefaultQueue();

        Pointer<Byte> input1 = null, input2 = null, output = null;
        CLBuffer<Byte> memIn1, memIn2, memOut;
        if (hostInOpenCL) {
            memIn1 = kernel.program.context.createBuffer(CLMem.Usage.Input, Byte.class, dataSize * nativePrim.sizeof());
            memIn2 = kernel.program.context.createBuffer(CLMem.Usage.Input, Byte.class, dataSize * nativePrim.sizeof());
            memOut = kernel.program.context.createBuffer(CLMem.Usage.Output, Byte.class, dataSize * nativePrim.sizeof());
        } else {
            input1 = allocateBytes(dataSize * nativePrim.sizeof()).order(context.getByteOrder());
            input2 = allocateBytes(dataSize * nativePrim.sizeof()).order(context.getByteOrder());
            output = allocateBytes(dataSize * nativePrim.sizeof()).order(context.getByteOrder());

            memIn1 = kernel.program.context.createBuffer(CLMem.Usage.Input, input1, false);
            memIn2 = kernel.program.context.createBuffer(CLMem.Usage.Input, input2, false);
            memOut = kernel.program.context.createBuffer(CLMem.Usage.Output, output, false);
        }
        kernel.setArgs(memIn1, memIn2, memOut);

        if (warmup) {
            for (int i = 0; i < 3000; i++) {
                kernel.enqueueNDRange(queue, new int[]{dataSize});
            }
            queue.finish();
        }

        if (hostInOpenCL) {
            input1 = memIn1.map(queue, CLMem.MapFlags.Write);
            input2 = memIn2.map(queue, CLMem.MapFlags.Write);
        }
        fillBuffersWithSomeData.call(input1, input2);
        if (hostInOpenCL) {
            memIn1.unmap(queue, input1);
            memIn2.unmap(queue, input2);
        }
        queue.finish();
        gc();

        //if (dataSize < workItemSize) {
        //    System.err.println("dataSize = " + dataSize + " is lower than max workItemSize for first dim = " + workItemSize + " !!!");
        //    workItemSize = 1;
        //}

        long start = System.nanoTime();
        for (int i = 0; i < loops; i++) {
            kernel.enqueueNDRange(queue, new int[]{dataSize});//, new int[]{workItemSize});
        }
        queue.finish();
        long time = System.nanoTime() - start;

        //System.out.println("OpenCL operations(" + target + ") : " + time + "ns");
        if (hostInOpenCL) {
            // Copy the OpenCL-hosted array back to RAM
            output = memOut.map(queue, CLMem.MapFlags.Read);
            // System.out.println("memOut.map = " + Long.toHexString(output.getPeer()));
            //queue.finish();
            Pointer<Byte> b = allocateBytes(dataSize * nativePrim.sizeof()).order(context.getByteOrder());
            output.copyTo(b, dataSize);
            memOut.unmap(queue, output);
            output = b;
        }
        return new ExecResult<Pointer<Byte>>(output, time / (loops * (double) dataSize));
    }

    static CLKernel setupASinB(Prim nativeType, CLContext context) throws CLBuildException {
        String src = "\n"
                //+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
                + "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n"
                + (nativeType == Prim.Double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "")
                + "__kernel void aSinB(                                                  \n"
                + "   __global const " + nativeType + "* a,                                       \n"
                + "   __global const " + nativeType + "* b,                                       \n"
                + "   __global " + nativeType + "* output)                                        \n"
                + "{                                                                 \n"
                + "   int i = get_global_id(0);                                      \n"
                + "   float ai = a[i], bi = b[i];                                    \n"
                + "   output[i] = ai * sin(bi);// + atan2(ai, bi);                     \n"
                + "}                                                                 \n";

        CLProgram program = context.createProgram(src).build();
        CLKernel kernel = program.createKernel("aSinB");

        return kernel;
    }

    public static void java_aSinB(float[] a, float[] b, float[] output, int dataSize) throws CLBuildException {
        for (int i = 0; i < dataSize; i++) {
            float ai = a[i], bi = b[i];
            output[i] = ai * (float) Math.sin(bi);// + (float)Math.atan2(ai, bi);
        }
    }

    public static void java_aSinB(double[] a, double[] b, double[] output, int dataSize) throws CLBuildException {
        for (int i = 0; i < dataSize; i++) {
            double ai = a[i], bi = b[i];
            output[i] = ai * Math.sin(bi);// + Math.atan2(ai, bi);
        }
    }

    enum Prim {

        Float(4), Double(8), Int(4), Short(2), Half(2);
        final int sizeof;

        Prim(int sizeof) {
            this.sizeof = sizeof;
        }

        public int sizeof() {
            return sizeof;
        }

        public String toString() {
            return name().toLowerCase();
        }
    }

    @Test
    public void testBenchmark() {
        /*for (String s : new String[] {
        "C:\\Program Files (x86)\\ATI Stream\\bin\\x86_64\\OpenCL.dll",
        "C:\\Program Files (x86)\\ATI Stream\\bin\\x86\\OpenCL.dll",
        "C:\\Program Files\\ATI Stream\\bin\\x86\\OpenCL.dll"
        })
        if ((f = new File(s)).exists())
        break;

        if (f.exists())
        System.setProperty("OpenCL.library", f.toString());
        //sss */
        try {
            System.out.println("#\n# " + OpenCL4JavaBenchmarkTest.class.getName() + "\n#");
            System.out.println("java.vm.name = " + System.getProperty("java.vm.name"));
            System.out.println("java.vm.version = " + System.getProperty("java.runtime.version"));
            System.out.println("Found platforms : " + Arrays.asList(JavaCL.listPlatforms()));
            CLPlatform platform = JavaCL.listPlatforms()[0];
            String v = platform.getVendor();
	    System.out.println("Platform Vendor: " + v);
            boolean isAMD = v.equals("Advanced Micro Devices, Inc.");
            int loops = 10;
            int dataSize = isAMD ? 1024 : 1024 * 1024;
            
            CLContext context = JavaCL.createBestContext();
            boolean hasDoubleSupport = context.isDoubleSupported();

            if (!hasDoubleSupport) {
                System.out.println("OpenCL context does not support double precision computations : skipping second part of the	test");
            } else {
                System.out.println("#\n# [Double Operations]\n#");
                ExecResult<DoubleBuffer> nsByJavaOp = testJava_double_aSinB(loops, dataSize);
                ExecResult<Pointer<Double>> nsByCLHostedOp = testOpenCL_double_aSinB(context, loops, dataSize, true);
                ExecResult<Pointer<Double>> nsByNativeHostedCLOp = testOpenCL_double_aSinB(context, loops, dataSize, false);
                double errCLHosted = avgError(nsByJavaOp.buffer, nsByCLHostedOp.buffer, dataSize);
                double errNativeHosted = avgError(nsByJavaOp.buffer, nsByNativeHostedCLOp.buffer, dataSize);

                System.out.println(" Avg relative error (hosted in CL) = " + errCLHosted);
                System.out.println("Avg relative error (hosted in RAM) = " + errNativeHosted);
                System.out.println();

                System.out.println("                  java op\t= " + nsByJavaOp.unitTimeNano + " ns");
                System.out.println();
                System.out.println(" opencl (hosted in CL) op\t= " + nsByCLHostedOp.unitTimeNano + " ns");
                System.out.println("    times slower than Java = " + (nsByCLHostedOp.unitTimeNano / nsByJavaOp.unitTimeNano));
                System.out.println("    times faster than Java = " + (nsByJavaOp.unitTimeNano / nsByCLHostedOp.unitTimeNano));
                System.out.println();
                System.out.println("opencl (hosted in RAM) op\t= " + nsByNativeHostedCLOp.unitTimeNano + " ns");
                System.out.println("    times slower than Java = " + (nsByNativeHostedCLOp.unitTimeNano / nsByJavaOp.unitTimeNano));
                System.out.println("    times faster than Java = " + (nsByJavaOp.unitTimeNano / nsByNativeHostedCLOp.unitTimeNano));
            }

            if (true) {
                System.out.println("#\n# [Float Operations]\n#");
                ExecResult<FloatBuffer> nsByJavaOp = testJava_float_aSinB(loops, dataSize);
                ExecResult<Pointer<Float>> nsByCLHostedOp = testOpenCL_float_aSinB(context, loops, dataSize, true);
                ExecResult<Pointer<Float>> nsByNativeHostedCLOp = testOpenCL_float_aSinB(context, loops, dataSize, false);
                double errCLHosted = avgError(nsByJavaOp.buffer, nsByCLHostedOp.buffer, dataSize);
                double errNativeHosted = avgError(nsByJavaOp.buffer, nsByNativeHostedCLOp.buffer, dataSize);

                /*for (int i = 0; i < 10; i++) {
                System.out.print("i\t = " + i + ",\t");
                System.out.print("buf \t=" + nsByJavaOp.buffer.get(i) +",\t");
                System.out.print("nat \t=" + nsByNativeHostedCLOp.buffer.get(i) +",\t");
                System.out.print("ocl \t=" + nsByCLHostedOp.buffer.get(i) +",\t");
                System.out.println();
                }*/
                System.out.println(" Avg relative error (hosted in CL) = " + errCLHosted);
                System.out.println("Avg relative error (hosted in RAM) = " + errNativeHosted);
                System.out.println();

                System.out.println("                  java op\t= " + nsByJavaOp.unitTimeNano + " ns");
                System.out.println();
                System.out.println(" opencl (hosted in CL) op\t= " + nsByCLHostedOp.unitTimeNano + " ns");
                System.out.println("    times slower than Java = " + (nsByCLHostedOp.unitTimeNano / nsByJavaOp.unitTimeNano));
                System.out.println("    times faster than Java = " + (nsByJavaOp.unitTimeNano / nsByCLHostedOp.unitTimeNano));
                System.out.println();
                System.out.println("opencl (hosted in RAM) op\t= " + nsByNativeHostedCLOp.unitTimeNano + " ns");
                System.out.println("    times slower than Java = " + (nsByNativeHostedCLOp.unitTimeNano / nsByJavaOp.unitTimeNano));
                System.out.println("    times faster than Java = " + (nsByJavaOp.unitTimeNano / nsByNativeHostedCLOp.unitTimeNano));
            }



        } catch (CLBuildException e) {
            e.printStackTrace();
        }
    }
}