package org.deeplearning4j.gradientcheck;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.nn.api.*;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.variational.*;
import org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.impl.ActivationIdentity;
import org.nd4j.linalg.activations.impl.ActivationTanH;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossMAE;
import org.nd4j.linalg.lossfunctions.impl.LossMSE;
import java.util.Arrays;
import java.util.Random;
import static junit.framework.TestCase.fail;
import static org.junit.Assert.assertTrue;
/**
* @author Alex Black
*/
public class VaeGradientCheckTests {
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-6;
private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
static {
DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
}
@Test
public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
String[] activFns = {"identity", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {5, 6}};
int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}};
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] {1, 5}) {
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, i % 3, 1.0);
}
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (String afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1)
.l2Bias(biasL2[k]).l1Bias(biasL1[k])
.optimizationAlgo(
OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.learningRate(1.0).seed(12345L).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4)
.nOut(3).encoderLayerSizes(encoderSizes)
.decoderLayerSizes(decoderSizes)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.activation(afn).updater(
Updater.SGD)
.build())
.layer(1, new OutputLayer.Builder(lf)
.activation(outputActivation).nIn(3).nOut(3)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.updater(Updater.SGD).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", encLayerSizes = "
+ Arrays.toString(encoderSizes) + ", decLayerSizes = "
+ Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels);
assertTrue(msg, gradOK);
}
}
}
}
}
}
@Test
public void testVaePretrain() {
String[] activFns = {"identity", "identity", "tanh", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] pzxAfns = {"identity", "tanh", "identity", "tanh"};
String[] pxzAfns = {"tanh", "identity", "tanh", "identity"};
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {5, 6}};
int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}};
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] {1, 5}) {
INDArray features = Nd4j.rand(minibatch, 4);
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (int j = 0; j < activFns.length; j++) {
String afn = activFns[j];
String pzxAfn = pzxAfns[j];
String pxzAfn = pxzAfns[j];
double l2 = l2vals[j]; //Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long...
double l1 = l1vals[j];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2)
.l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j])
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3)
.encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes)
.pzxActivationFunction(pzxAfn)
.reconstructionDistribution(
new GaussianReconstructionDistribution(pxzAfn))
.activation(afn).updater(Updater.SGD).build())
.pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrain() - activationFn=" + afn + ", p(z|x) afn = " + pzxAfn
+ ", p(x|z) afn = " + pxzAfn + ", encLayerSizes = " + Arrays.toString(encoderSizes)
+ ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1="
+ l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int l = 0; l < mln.getnLayers(); l++)
System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, features, 12345);
assertTrue(msg, gradOK);
}
}
}
}
@Test
public void testVaePretrainReconstructionDistributions() {
int inOutSize = 6;
ReconstructionDistribution[] reconstructionDistributions =
new ReconstructionDistribution[] {new GaussianReconstructionDistribution(Activation.IDENTITY),
new GaussianReconstructionDistribution(Activation.TANH),
new BernoulliReconstructionDistribution(Activation.SIGMOID),
new CompositeReconstructionDistribution.Builder()
.addDistribution(2,
new GaussianReconstructionDistribution(
Activation.IDENTITY))
.addDistribution(2, new BernoulliReconstructionDistribution())
.addDistribution(2,
new GaussianReconstructionDistribution(
Activation.TANH))
.build(),
new ExponentialReconstructionDistribution("identity"),
new ExponentialReconstructionDistribution("tanh"),
new LossFunctionWrapper(new ActivationTanH(), new LossMSE()),
new LossFunctionWrapper(new ActivationIdentity(), new LossMAE())};
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] {1, 5}) {
for (int i = 0; i < reconstructionDistributions.length; i++) {
INDArray data;
switch (i) {
case 0: //Gaussian + identity
case 1: //Gaussian + tanh
data = Nd4j.rand(minibatch, inOutSize);
break;
case 2: //Bernoulli
data = Nd4j.create(minibatch, inOutSize);
Nd4j.getExecutioner().exec(new BernoulliDistribution(data, 0.5), Nd4j.getRandom());
break;
case 3: //Composite
data = Nd4j.create(minibatch, inOutSize);
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 2)).assign(Nd4j.rand(minibatch, 2));
Nd4j.getExecutioner()
.exec(new BernoulliDistribution(
data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 4)), 0.5),
Nd4j.getRandom());
data.get(NDArrayIndex.all(), NDArrayIndex.interval(4, 6)).assign(Nd4j.rand(minibatch, 2));
break;
case 4:
case 5:
data = Nd4j.rand(minibatch, inOutSize);
break;
case 6:
case 7:
data = Nd4j.randn(minibatch, inOutSize);
break;
default:
throw new RuntimeException();
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
.seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
.encoderLayerSizes(5).decoderLayerSizes(6)
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
.activation(Activation.TANH).updater(Updater.SGD)
.build())
.pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrainReconstructionDistributions() - " + reconstructionDistributions[i];
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
data, 12345);
assertTrue(msg, gradOK);
}
}
}
@Test
public void testVaePretrainMultipleSamples() {
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] {1, 5}) {
for (int numSamples : new int[] {1, 10}) {
// for (int numSamples : new int[]{10}) {
INDArray features = Nd4j.rand(minibatch, 4);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
.seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(5, 6)
.decoderLayerSizes(7, 8).pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
new GaussianReconstructionDistribution(Activation.TANH))
.numSamples(numSamples).activation(Activation.TANH).updater(Updater.SGD)
.build())
.pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrainMultipleSamples() - numSamples = " + numSamples;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
features, 12345);
assertTrue(msg, gradOK);
}
}
}
}