package org.deeplearning4j.gradientcheck;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.layers.convolution.ConvolutionHelper;
import org.deeplearning4j.nn.layers.convolution.CudnnConvolutionHelper;
import org.deeplearning4j.nn.layers.normalization.BatchNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.CudnnBatchNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.CudnnLocalResponseNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.LocalResponseNormalizationHelper;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.lang.reflect.Field;
import java.util.Random;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
/**
* Created by Alex on 09/09/2016.
*/
public class CuDNNGradientChecks {
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-5;
private static final double DEFAULT_MAX_REL_ERROR = 1e-2;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-6;
static {
DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
}
@Test
public void testConvolutional() throws Exception {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
String[] activFns = {"sigmoid", "tanh"};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
int[] minibatchSizes = {1, 4};
int width = 6;
int height = 6;
int inputDepth = 2;
int nOut = 3;
Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
f.setAccessible(true);
Random r = new Random(12345);
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width});
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-1, 1))
.updater(Updater.NONE).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
.activation(afn).build())
.layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
.activation(afn).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth)).pretrain(false)
.backprop(true);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(c0, null);
// f.set(c1, null);
// assertNull(f.get(c0));
// assertNull(f.get(c1));
//-------------------------------
String name = new Object() {}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(input);
mln.setLabels(labels);
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(input, labels);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
@Test
public void testBatchNormCnn() throws Exception {
//Note: CuDNN batch norm supports 4d only, as per 5.1 (according to api reference documentation)
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 1;
int hw = 4;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
.regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build())
.layer(1, new BatchNormalization.Builder().build())
.layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true);
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.normalization.BatchNormalization.class.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.normalization.BatchNormalization b =
(org.deeplearning4j.nn.layers.normalization.BatchNormalization) mln.getLayer(1);
BatchNormalizationHelper bn = (BatchNormalizationHelper) f.get(b);
assertTrue(bn instanceof CudnnBatchNormalizationHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(b, null);
// assertNull(f.get(b));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
@Test
public void testLRN() throws Exception {
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 6;
int hw = 5;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
.regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
.activation(Activation.TANH).build())
.layer(1, new LocalResponseNormalization.Builder().build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true);
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class
.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization l =
(org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) mln.getLayer(1);
LocalResponseNormalizationHelper lrn = (LocalResponseNormalizationHelper) f.get(l);
assertTrue(lrn instanceof CudnnLocalResponseNormalizationHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(l, null);
// assertNull(f.get(l));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}