package weka.core.matrix;
/*
* Matrix Multiplication
* JCublas - Java bindings for CUBLAS, the NVIDIA CUDA BLAS library,
* to be used with JCuda <br />
* http://www.jcuda.org
*/
/**
* @authors
* {tengel,andrea}@inf.ufsm.br,
* Luiz-Angelo.Steffenel@univ-reims.fr,
* Manuele.Kirsch-Pinheiro@univ-paris1.fr
*/
import static jcuda.jcublas.JCublas2.*;
import static jcuda.runtime.JCuda.*;
import jcuda.*;
import jcuda.jcublas.cublasHandle;
import static jcuda.jcublas.cublasOperation.CUBLAS_OP_N;
/**
* This class performs a BLAS 'dgemm' operation, i.e. for computing the matrix <br />
* C = alpha * A * B + beta * C <br /> using double precision
*
* if we set alpha= 1 and beta = 0, we have the matrix multiplication.
*/
public class JCublasMatrixMultiplication {
/**
* Implementation of dgemm using JCublas
*/
public static Matrix times(Matrix MA, Matrix MB) {
double alpha = 1.0f;
double beta = 0.0f;
/// C(m,n) = A(m,k) x B(k,n)
int m = MA.getRowDimension();
int k = MA.getColumnDimension();
int n = MB.getColumnDimension();
double[] A = MA.toArray();
double[] B = MB.toArray();
double[] C = new double[m * n];
// Create a CUBLAS handle
cublasHandle handle = new cublasHandle();
cublasCreate(handle);
// Allocate memory on the device
Pointer d_A = new Pointer();
Pointer d_B = new Pointer();
Pointer d_C = new Pointer();
cudaMalloc(d_A, A.length * Sizeof.DOUBLE);
cudaMalloc(d_B, B.length * Sizeof.DOUBLE);
cudaMalloc(d_C, C.length * Sizeof.DOUBLE);
// Copy the memory from the host to the device
cublasSetVector(A.length, Sizeof.DOUBLE, Pointer.to(A), 1, d_A, 1);
cublasSetVector(B.length, Sizeof.DOUBLE, Pointer.to(B), 1, d_B, 1);
cublasSetVector(C.length, Sizeof.DOUBLE, Pointer.to(C), 1, d_C, 1);
// Execute dgemm
Pointer pAlpha = Pointer.to(new double[]{alpha});
Pointer pBeta = Pointer.to(new double[]{beta});
cublasDgemm(handle, CUBLAS_OP_N,CUBLAS_OP_N, n, m, k, pAlpha, d_B, n, d_A, k, pBeta, d_C, n);
// Copy the result from the device to the host
cublasGetVector(m * n, Sizeof.DOUBLE, d_C, 1, Pointer.to(C), 1);
// Clean up
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
cublasDestroy(handle);
return new Matrix(arrayToMatrix(C, m, n), m, n);
}
static double[][] arrayToMatrix(double[] v, int nlin, int ncol) {
double[][] m = new double[nlin][ncol];
for (int i = 0; i < nlin; i++) {
System.arraycopy(v, i * ncol, m[i], 0, ncol);
}
return m;
}
}