/*-
*
* * Copyright 2015 Skymind,Inc.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package org.nd4j.linalg.jcublas;
import org.nd4j.jita.allocator.enums.CudaConstants;
import org.nd4j.jita.allocator.impl.AllocationPoint;
import org.nd4j.jita.allocator.impl.AtomicAllocator;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.FloatBuffer;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.BaseNDArray;
import org.nd4j.linalg.api.ndarray.BaseNDArrayProxy;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.executioner.GridExecutioner;
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
import org.nd4j.nativeblas.NativeOpsHolder;
import java.util.List;
/**
*
*
* Created by mjk on 8/23/14.
*
* @author mjk
* @author Adam Gibson
* @author raver119@gmail.com
*/
public class JCublasNDArray extends BaseNDArray {
public JCublasNDArray(double[][] data) {
super(data);
}
public JCublasNDArray(double[][] data, char ordering) {
super(data, ordering);
}
public JCublasNDArray(int[] shape, DataBuffer buffer) {
super(shape, buffer);
}
/**
* Create this JCublasNDArray with the given data and shape and 0 offset
*
* @param data the data to use
* @param shape the shape of the JCublasNDArray
* @param ordering
*/
public JCublasNDArray(float[] data, int[] shape, char ordering) {
super(data, shape, ordering);
}
/**
* @param data the data to use
* @param shape the shape of the JCublasNDArray
* @param offset the desired offset
* @param ordering the ordering of the JCublasNDArray
*/
public JCublasNDArray(float[] data, int[] shape, int offset, char ordering) {
super(data, shape, offset, ordering);
}
/**
* Construct an JCublasNDArray of the specified shape
* with an empty data array
*
* @param shape the shape of the JCublasNDArray
* @param stride the stride of the JCublasNDArray
* @param offset the desired offset
* @param ordering the ordering of the JCublasNDArray
*/
public JCublasNDArray(int[] shape, int[] stride, int offset, char ordering) {
super(shape, stride, offset, ordering);
}
/**
* Construct an JCublasNDArray of the specified shape, with optional initialization
*
* @param shape the shape of the JCublasNDArray
* @param stride the stride of the JCublasNDArray
* @param offset the desired offset
* @param ordering the ordering of the JCublasNDArray
* @param initialize Whether to initialize the INDArray. If true: initialize. If false: don't.
*/
public JCublasNDArray(int[] shape, int[] stride, int offset, char ordering, boolean initialize) {
super(shape, stride, offset, ordering, initialize);
}
/**
* Create the JCublasNDArray with
* the specified shape and stride and an offset of 0
*
* @param shape the shape of the JCublasNDArray
* @param stride the stride of the JCublasNDArray
* @param ordering the ordering of the JCublasNDArray
*/
public JCublasNDArray(int[] shape, int[] stride, char ordering) {
super(shape, stride, ordering);
}
public JCublasNDArray(int[] shape, int offset, char ordering) {
super(shape, offset, ordering);
}
public JCublasNDArray(int[] shape) {
super(shape);
}
/**
* Creates a new <i>n</i> times <i>m</i> <tt>DoubleMatrix</tt>.
*
* @param newRows the number of rows (<i>n</i>) of the new matrix.
* @param newColumns the number of columns (<i>m</i>) of the new matrix.
* @param ordering
*/
public JCublasNDArray(int newRows, int newColumns, char ordering) {
super(newRows, newColumns, ordering);
}
/**
* Create an JCublasNDArray from the specified slices.
* This will go through and merge all of the
* data from each slice in to one JCublasNDArray
* which will then take the specified shape
*
* @param slices the slices to merge
* @param shape the shape of the JCublasNDArray
* @param ordering
*/
public JCublasNDArray(List<INDArray> slices, int[] shape, char ordering) {
super(slices, shape, ordering);
}
/**
* Create an JCublasNDArray from the specified slices.
* This will go through and merge all of the
* data from each slice in to one JCublasNDArray
* which will then take the specified shape
*
* @param slices the slices to merge
* @param shape the shape of the JCublasNDArray
* @param stride
* @param ordering
*/
public JCublasNDArray(List<INDArray> slices, int[] shape, int[] stride, char ordering) {
super(slices, shape, stride, ordering);
}
public JCublasNDArray(float[] data, int[] shape, int[] stride, char ordering) {
super(data, shape, stride, ordering);
}
public JCublasNDArray(float[] data, int[] shape, int[] stride, int offset, char ordering) {
super(data, shape, stride, offset, ordering);
}
public JCublasNDArray(DataBuffer data, int[] shape, int[] stride, int offset) {
super(data, shape, stride, offset);
}
public JCublasNDArray(int[] data, int[] shape, int[] strides) {
super(data, shape, strides);
}
public JCublasNDArray(DataBuffer data, int[] shape) {
super(data, shape);
}
public JCublasNDArray(DataBuffer buffer, int[] shape, int offset) {
super(buffer, shape, offset);
}
/**
* Create this JCublasNDArray with the given data and shape and 0 offset
*
* @param data the data to use
* @param shape the shape of the JCublasNDArray
*/
public JCublasNDArray(float[] data, int[] shape) {
super(data, shape);
}
public JCublasNDArray(float[] data, int[] shape, int offset) {
super(data, shape, offset);
}
/**
* Construct an JCublasNDArray of the specified shape
* with an empty data array
*
* @param shape the shape of the JCublasNDArray
* @param stride the stride of the JCublasNDArray
* @param offset the desired offset
*/
public JCublasNDArray(int[] shape, int[] stride, int offset) {
super(shape, stride, offset);
}
/**
* Create the JCublasNDArray with
* the specified shape and stride and an offset of 0
*
* @param shape the shape of the JCublasNDArray
* @param stride the stride of the JCublasNDArray
*/
public JCublasNDArray(int[] shape, int[] stride) {
super(shape, stride);
}
public JCublasNDArray(int[] shape, int offset) {
super(shape, offset);
}
public JCublasNDArray(int[] shape, char ordering) {
super(shape, ordering);
}
/**
* Creates a new <i>n</i> times <i>m</i> <tt>DoubleMatrix</tt>.
*
* @param newRows the number of rows (<i>n</i>) of the new matrix.
* @param newColumns the number of columns (<i>m</i>) of the new matrix.
*/
public JCublasNDArray(int newRows, int newColumns) {
super(newRows, newColumns);
}
/**
* Create an JCublasNDArray from the specified slices.
* This will go through and merge all of the
* data from each slice in to one JCublasNDArray
* which will then take the specified shape
*
* @param slices the slices to merge
* @param shape the shape of the JCublasNDArray
*/
public JCublasNDArray(List<INDArray> slices, int[] shape) {
super(slices, shape);
}
/**
* Create an JCublasNDArray from the specified slices.
* This will go through and merge all of the
* data from each slice in to one JCublasNDArray
* which will then take the specified shape
*
* @param slices the slices to merge
* @param shape the shape of the JCublasNDArray
* @param stride
*/
public JCublasNDArray(List<INDArray> slices, int[] shape, int[] stride) {
super(slices, shape, stride);
}
public JCublasNDArray(float[] data, int[] shape, int[] stride) {
super(data, shape, stride);
}
public JCublasNDArray(float[] data, int[] shape, int[] stride, int offset) {
super(data, shape, stride, offset);
}
public JCublasNDArray(float[] data) {
super(data);
}
public JCublasNDArray(JCublasNDArray doubleMatrix) {
this(new int[] {doubleMatrix.rows, doubleMatrix.columns});
this.data = dup().data();
}
public JCublasNDArray(double[] data, int[] shape, int[] stride, int offset) {
super(data, shape, stride, offset);
}
public JCublasNDArray(float[][] floats) {
super(floats);
}
public JCublasNDArray(float[][] data, char ordering) {
super(data, ordering);
}
public JCublasNDArray(DataBuffer buffer, int[] shape, int offset, char ordering) {
super(buffer, shape, offset, ordering);
}
public JCublasNDArray() {}
public JCublasNDArray(DataBuffer buffer) {
super(buffer);
}
public JCublasNDArray(DataBuffer buffer, int[] shape, int[] stride, int offset, char ordering) {
super(buffer, shape, stride, offset, ordering);
}
public JCublasNDArray(float[] data, char order) {
super(data, order);
}
public JCublasNDArray(FloatBuffer floatBuffer, char order) {
super(floatBuffer, order);
}
public JCublasNDArray(DataBuffer buffer, int[] shape, int[] strides) {
super(buffer, shape, strides);
}
public JCublasNDArray(double[] data, int[] shape, char ordering) {
super(data, shape, ordering);
}
public JCublasNDArray(double[] data, int[] shape, int[] stride, int offset, char ordering) {
super(data, shape, stride, offset, ordering);
}
@Override
public INDArray dup() {
if (this.isCompressed() && this.ordering() == Nd4j.order().charValue()) {
INDArray ret = Nd4j.createArrayFromShapeBuffer(data().dup(), this.shapeInfoDataBuffer());
ret.markAsCompressed(true);
return ret;
}
/*
Special case for cuda: if we have not a view, and shapes do match - we
*/
/*
if (!isView() && ordering() == Nd4j.order() && Shape.strideDescendingCAscendingF(this)) {
AtomicAllocator allocator = AtomicAllocator.getInstance();
INDArray array = Nd4j.createUninitialized(shape(), ordering());
CudaContext context = allocator.getFlowController().prepareAction(array, this);
Configuration configuration = CudaEnvironment.getInstance().getConfiguration();
if (configuration.getMemoryModel() == Configuration.MemoryModel.IMMEDIATE && configuration.getFirstMemory() == AllocationStatus.DEVICE) {
// log.info("Path 0");
allocator.memcpyDevice(array.data(), allocator.getPointer(this.data, context), this.data.length() * this.data().getElementSize(), 0, context);
} else if (configuration.getMemoryModel() == Configuration.MemoryModel.DELAYED || configuration.getFirstMemory() == AllocationStatus.HOST) {
AllocationPoint pointSrc = allocator.getAllocationPoint(this);
AllocationPoint pointDst = allocator.getAllocationPoint(array);
if (pointSrc.getAllocationStatus() == AllocationStatus.HOST) {
// log.info("Path A");
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getPointers().getHostPointer(), pointSrc.getPointers().getHostPointer(), length * data.getElementSize(), CudaConstants.cudaMemcpyHostToHost, context.getOldStream());
} else {
// log.info("Path B. SRC dId: [{}], DST dId: [{}], cId: [{}]", pointSrc.getDeviceId(), pointDst.getDeviceId(), allocator.getDeviceId());
// this code branch is possible only with DELAYED memoryModel and src point being allocated on device
if (pointDst.getAllocationStatus() != AllocationStatus.DEVICE) {
allocator.getMemoryHandler().alloc(AllocationStatus.DEVICE, pointDst, pointDst.getShape(), false);
}
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getPointers().getDevicePointer(), pointSrc.getPointers().getHostPointer(), length * data.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream());
}
}
allocator.getFlowController().registerAction(context, array, this);
return array;
} else */return super.dup();
}
@Override
public INDArray dup(char order) {
if (this.isCompressed() && this.ordering() == order) {
INDArray ret = Nd4j.createArrayFromShapeBuffer(data().dup(), this.shapeInfoDataBuffer());
ret.markAsCompressed(true);
return ret;
}
/*
if (!isView() && ordering() == order && Shape.strideDescendingCAscendingF(this)) {
AtomicAllocator allocator = AtomicAllocator.getInstance();
INDArray array = Nd4j.createUninitialized(shape(), order);
CudaContext context = allocator.getFlowController().prepareAction(array, this);
Configuration configuration = CudaEnvironment.getInstance().getConfiguration();
if (configuration.getMemoryModel() == Configuration.MemoryModel.IMMEDIATE && configuration.getFirstMemory() == AllocationStatus.DEVICE) {
allocator.memcpyDevice(array.data(), allocator.getPointer(this.data, context), this.data.length() * this.data().getElementSize(), 0, context);
} else if (configuration.getMemoryModel() == Configuration.MemoryModel.DELAYED || configuration.getFirstMemory() == AllocationStatus.HOST) {
AllocationPoint pointSrc = allocator.getAllocationPoint(this);
AllocationPoint pointDst = allocator.getAllocationPoint(array);
if (pointSrc.getAllocationStatus() == AllocationStatus.HOST) {
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getPointers().getHostPointer(), pointSrc.getPointers().getHostPointer(), length * data.getElementSize(), CudaConstants.cudaMemcpyHostToHost, context.getOldStream());
} else {
// this code branch is possible only with DELAYED memoryModel and src point being allocated on device
if (pointDst.getAllocationStatus() != AllocationStatus.DEVICE) {
allocator.getMemoryHandler().alloc(AllocationStatus.DEVICE, pointDst, pointDst.getShape(), false);
}
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getPointers().getDevicePointer(), pointSrc.getPointers().getDevicePointer(), length * data.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream());
}
}
allocator.getFlowController().registerAction(context, array, this);
return array;
} else */return super.dup(order);
}
@Override
public boolean equals(Object o) {
//if (o != null) AtomicAllocator.getInstance().synchronizeHostData((INDArray) o);
//AtomicAllocator.getInstance().synchronizeHostData(this);
return super.equals(o);
}
/**
* Generate string representation of the matrix.
*/
@Override
public String toString() {
return super.toString();
}
/**
*
* PLEASE NOTE: Never use this method, unless you 100% have to
*
* @param buffer
*/
public void setShapeInfoDataBuffer(DataBuffer buffer) {
this.shapeInformation = buffer;
this.javaShapeInformation = shapeInformation.asInt();
}
private Object writeReplace() throws java.io.ObjectStreamException {
return new BaseNDArrayProxy(this);
}
@Override
public INDArray permutei(int... rearrange) {
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
return super.permutei(rearrange);
}
/**
* This method does direct array copy. Impossible to use on views or mixed orders.
*
* PLEASE NOTE: YOU SHOULD NEVER USE THIS METHOD, UNLESS YOU 100% CLEAR ABOUT IT
*
* @return
*/
@Override
public synchronized INDArray unsafeDuplication() {
INDArray ret = Nd4j.createUninitialized(this.shape(), this.ordering());
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = (CudaContext) allocator.getDeviceContext().getContext();
allocator.memcpyDevice(ret.data(), allocator.getAllocationPoint(this.data).getDevicePointer(),
this.data.length() * this.data().getElementSize(), 0, context);
context.syncOldStream();
return ret;
}
@Override
public INDArray leverageTo(String id) {
if (!isAttached()) {
// log.info("Skipping detached");
return this;
}
if (!Nd4j.getWorkspaceManager().checkIfWorkspaceExists(id)) {
// log.info("Skipping non-existent");
return this;
}
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
MemoryWorkspace target = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(id);
if (current == target) {
// log.info("Skipping equals A");
return this;
}
if (this.data.getParentWorkspace() == target) {
// log.info("Skipping equals B");
return this;
}
Nd4j.getMemoryManager().setCurrentWorkspace(target);
// log.info("Leveraging...");
INDArray copy = null;
if (!this.isView()) {
//if (1 < 0) {
Nd4j.getExecutioner().commit();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
/*
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointDst.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 1 failed");
context.syncOldStream();
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointSrc.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 2 failed");
context.syncOldStream();
*/
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
Nd4j.getExecutioner().commit();
}
Nd4j.getMemoryManager().setCurrentWorkspace(current);
return copy;
}
/**
* This method pulls this INDArray into current Workspace.
*
* PLEASE NOTE: If there's no current Workspace - INDArray returned as is
*
* @return
*/
@Override
public INDArray migrate() {
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
if (current == null)
return this;
INDArray copy = null;
if (!this.isView()) {
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
// CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
if (pointDst.getDeviceId() != Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId()) {
//log.info("Swapping [{}] -> [{}]", pointDst.getDeviceId(), Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
pointDst.setDeviceId(Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
}
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
}
return copy;
}
}