/*- * * * Copyright 2016 Skymind,Inc. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package org.deeplearning4j.nn.graph.vertex.impl; import org.deeplearning4j.berkeley.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.Or; import org.nd4j.linalg.factory.Nd4j; /** * Adds the ability to reshape and flatten the tensor in the computation graph. This is the equivalent * of calling {@code .reshape(new int[]{})} on the input array to the vertex and passing the new shape * to the next layer. ReshapeVertex also ensures the shape is valid for the backward pass. * * @author Justin Long (crockpotveggies) */ public class ReshapeVertex extends BaseGraphVertex { private int[] newShape; public ReshapeVertex(ComputationGraph graph, String name, int vertexIndex, int[] newShape) { this(graph, name, vertexIndex, null, null, newShape); } public ReshapeVertex(ComputationGraph graph, String name, int vertexIndex, VertexIndices[] inputVertices, VertexIndices[] outputVertices, int[] newShape) { super(graph, name, vertexIndex, inputVertices, outputVertices); this.newShape = newShape; } @Override public boolean hasLayer() { return false; } @Override public boolean isOutputVertex() { return false; } @Override public Layer getLayer() { return null; } @Override public INDArray doForward(boolean training) { if (!canDoForward()) throw new IllegalStateException("Cannot do forward pass: inputs not set"); if (inputs.length > 1) throw new IllegalStateException("Reshape vertex requires a single input."); return inputs[0].reshape(inputs[0].ordering(), newShape); } @Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt) { if (!canDoBackward()) throw new IllegalStateException("Cannot do backward pass: errors not set"); INDArray[] out = new INDArray[1]; out[0] = epsilon.reshape(inputs[0].ordering(), inputs[0].shape()); return new Pair<>(null, out); } @Override public void setBackpropGradientsViewArray(INDArray backpropGradientsViewArray) { if (backpropGradientsViewArray != null) throw new RuntimeException("Vertex does not have gradients; gradients view array cannot be set here"); } @Override public Pair<INDArray, MaskState> feedForwardMaskArrays(INDArray[] maskArrays, MaskState currentMaskState, int minibatchSize) { if (maskArrays == null) { return new Pair<>(null, currentMaskState); } //Most common case: all or none. //If there's only *some* mask arrays: assume the others (missing) are equivalent to all 1s //And for handling multiple masks: best strategy seems to be an OR operation //i.e., output is 1 if any of the input are 1s //Which means: if any masks are missing, output null (equivalent to no mask, or all steps present) //Otherwise do an element-wise OR operation for (INDArray arr : maskArrays) { if (arr == null) { return new Pair<>(null, currentMaskState); } } //At this point: all present. Do OR operation if (maskArrays.length == 1) { return new Pair<>(maskArrays[0], currentMaskState); } else { INDArray ret = maskArrays[0].dup(maskArrays[0].ordering()); Nd4j.getExecutioner().exec(new Or(maskArrays[0], maskArrays[1], ret)); for (int i = 2; i < maskArrays.length; i++) { Nd4j.getExecutioner().exec(new Or(maskArrays[i], ret, ret)); } return new Pair<>(ret, currentMaskState); } } @Override public String toString() { return "ReshapeVertex(id=" + this.getVertexIndex() + ",name=\"" + this.getVertexName() + "\",shape=" + newShape.toString() + ")"; } }