/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.hops; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.Hop.MultiThreadedHop; import org.apache.sysml.lops.ConvolutionTransform; import org.apache.sysml.lops.ConvolutionTransform.OperationTypes; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.lops.LopsException; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.ConvolutionParameters; import java.util.ArrayList; public class ConvolutionOp extends Hop implements MultiThreadedHop { private Hop.ConvOp op; private int _maxNumThreads = -1; //-1 for unlimited private ConvolutionOp() { //default constructor for clone } public ConvolutionOp(String l, DataType dt, ValueType vt, ConvOp o, ArrayList<Hop> inp) { super(l, dt, vt); op = o; for( int i=0; i<inp.size(); i++ ) { Hop in = inp.get(i); getInput().add(i, in); in.getParent().add(this); } //compute unknown dims and nnz refreshSizeInformation(); } public ConvOp getOp() { return op; } @Override public String getOpString() { return "" + HopsConv2Lops.get(op); } private boolean isEligibleForSpark() { // return (op == ConvOp.DIRECT_CONV2D || op == ConvOp.MAX_POOLING) ? true : false; return false; } @Override public Lop constructLops() throws HopsException, LopsException { //return already created lops if( getLops() != null ) return getLops(); ExecType et = optFindExecType(); ArrayList<Hop> inputs = getInput(); switch( op ) { case MAX_POOLING: case MAX_POOLING_BACKWARD: case DIRECT_CONV2D: case DIRECT_CONV2D_BACKWARD_DATA: case DIRECT_CONV2D_BACKWARD_FILTER: case BIAS_ADD: case BIAS_MULTIPLY: { if(et == ExecType.CP || et == ExecType.GPU) { setLops(constructConvolutionLops(et, inputs)); break; } else { throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name()); } // break; } default: throw new HopsException("Unsupported lops construction for operation type '"+op+"'."); } //add reblock/checkpoint lops if necessary constructAndSetLopsDataFlowProperties(); return getLops(); } public void setOp(ConvOp op) { this.op = op; } private int getNumExpectedInputs() { switch(op) { case MAX_POOLING_BACKWARD: case DIRECT_CONV2D: case DIRECT_CONV2D_BACKWARD_FILTER: case DIRECT_CONV2D_BACKWARD_DATA: return 14; case BIAS_ADD: case BIAS_MULTIPLY: return 2; default: return 13; } } private boolean isInputReLU(Hop input) { return input instanceof UnaryOp && ((UnaryOp) input).getOp() == OpOp1.SELP; } private boolean isInputConv2d(Hop input) { return input instanceof ConvolutionOp && ((ConvolutionOp) input).getOp() == ConvOp.DIRECT_CONV2D; } public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) throws HopsException, LopsException { if(inputs.size() != getNumExpectedInputs()) throw new HopsException("Incorrect number of inputs for " + op.name()); Lop in = null; Lop in2 = null; ArrayList<Hop> inputs1 = inputs; int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); OperationTypes lopOp = HopsConv2Lops.get(op); // RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend // by reducing unnecessary sparse-to-dense-to-sparse conversion. // For other backends, this operators is not necessary as it reduces an additional relu operator. if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING && isInputReLU(inputs.get(0))) { in = inputs.get(0).getInput().get(0).constructLops(); lopOp = OperationTypes.RELU_MAX_POOLING; } else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING_BACKWARD && isInputReLU(inputs.get(0))) { in = inputs.get(0).getInput().get(0).constructLops(); lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD; } else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && op == ConvOp.BIAS_ADD && isInputConv2d(inputs.get(0))) { lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD; // the first lop is image in = inputs.get(0).getInput().get(0).constructLops(); // the second lop is bias in2 = inputs.get(1).constructLops(); // Use the inputs from conv2d rather than bias_add inputs1 = inputs.get(0).getInput(); } else { in = inputs.get(0).constructLops(); } // // TODO: Inserting reblock requires knowing columns apriori // ConvolutionTransform transform1 = new ConvolutionTransform(addReblockIfNecessary(et, lopOp, in), lopOp, getDataType(), getValueType(), et, k); // setReblockedOutputDimension(et, transform1); ConvolutionTransform transform1 = new ConvolutionTransform(in, lopOp, getDataType(), getValueType(), et, k); setOutputDimensions(transform1); setLineNumbers(transform1); in.addOutput(transform1); if(in2 != null) { transform1.addInput(in2); in2.addOutput(transform1); } // stride1, stride2, padding1, padding2 // input_shape1, input_shape2, input_shape3, input_shape4, // filter_shape1, filter_shape2, filter_shape3, filter_shape4 for( int i=1; i < inputs1.size(); i++ ) { Lop ltmp = inputs1.get(i).constructLops(); transform1.addInput(ltmp); ltmp.addOutput(transform1); } transform1.setLevel(); //force order of added lops return transform1; } @Override protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) { double sparsity = 1.0; return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity); } @Override protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz ) { //default: no intermediate memory requirements return 0; } @Override protected long[] inferOutputCharacteristics( MemoTable memo ) { // [numRows, numCols, NNZ] long[] ret = new long[3]; if(op == ConvOp.BIAS_ADD || op == ConvOp.BIAS_MULTIPLY) { MatrixCharacteristics[] mc = memo.getAllInputStats(getInput()); ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1; ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1; ret[2] = -1; return (ret[0]>0 && ret[1]>0) ? ret : null; } ConvolutionParameters params; try { params = parseInput(); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } switch(op) { case MAX_POOLING: { // input long N = getInput().get(0)._dim1; ret[0] = N; ret[1] = getExtractedVal(params.C, params.P, params.Q); ret[2] = -1; break; } case DIRECT_CONV2D: { // input, filter long N = getInput().get(0)._dim1; ret[0] = N; ret[1] = getExtractedVal(params.K, params.P, params.Q); ret[2] = -1; break; } case DIRECT_CONV2D_BACKWARD_FILTER: { // input, dout ret[0] = params.K; ret[1] = getExtractedVal(params.C, params.R, params.S); ret[2] = -1; break; } case MAX_POOLING_BACKWARD: { // input, dout ret[0] = getInput().get(0)._dim1; ret[1] = getInput().get(0)._dim2; ret[2] = -1; break; } case DIRECT_CONV2D_BACKWARD_DATA: { // filter, dout long N = getInput().get(1)._dim1; ret[0] = N; ret[1] = getExtractedVal(params.C, params.H, params.W); ret[2] = -1; break; } default: throw new RuntimeException("Unsupported op:" + op.name()); } if(LOG.isDebugEnabled() && (ret[0] <= 0 || ret[1] <= 0)) { LOG.debug("Unknown dimensions for ConvolutionOp in inferOutputCharacteristics:" + op.name() + " " + ret[0] + " " + ret[1] + " img_dim=[" + params.N + " " + params.C + " " + params.H + " " + params.W + "]" + " filter_dim=[" + params.K + " " + params.C + " " + params.H + " " + params.W + "]" + " output_feature_map=[" + params.P + " " + params.Q + "] stride=[" + params.stride_h + " " + params.stride_w + "]" + " pad=[" + params.pad_h + " " + params.pad_w + "]"); } //safe return (create entry only if at least dims known) return (ret[0]>0 && ret[1]>0) ? ret : null; } @Override public boolean allowsAllExecTypes() { return true; } @Override protected ExecType optFindExecType() throws HopsException { checkAndSetForcedPlatform(); ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; if( _etypeForced != null ) { _etype = findGPUExecTypeByMemEstimate(_etypeForced); } else { if ( OptimizerUtils.isMemoryBasedOptLevel() ) { _etype = findGPUExecTypeByMemEstimate(findExecTypeByMemEstimate()); } else { _etype = REMOTE; } //check for valid CP dimensions and matrix size checkAndSetInvalidCPDimsAndSize(); } // TODO: Fix this after adding remaining spark instructions _etype = !isEligibleForSpark() && _etype == REMOTE ? ExecType.CP : _etype; //mark for recompile (forever) if( ConfigurationManager.isDynamicRecompilation() && !dimsKnown(true) && _etype==REMOTE ) setRequiresRecompile(); return _etype; } // stride1, stride2, padding1, padding2 // input_shape1, input_shape2, input_shape3, input_shape4, // filter_shape1, filter_shape2, filter_shape3, filter_shape4 ConvolutionParameters parseInput() throws DMLRuntimeException { ConvolutionParameters params = null; if(op == ConvOp.MAX_POOLING_BACKWARD || op == ConvOp.DIRECT_CONV2D || op == ConvOp.DIRECT_CONV2D_BACKWARD_FILTER || op == ConvOp.DIRECT_CONV2D_BACKWARD_DATA) { params = new ConvolutionParameters( computeSizeInformation(getInput().get(6)), computeSizeInformation(getInput().get(7)), computeSizeInformation(getInput().get(8)), computeSizeInformation(getInput().get(9)), computeSizeInformation(getInput().get(10)), computeSizeInformation(getInput().get(12)), computeSizeInformation(getInput().get(13)), computeSizeInformation(getInput().get(2)), computeSizeInformation(getInput().get(3)), computeSizeInformation(getInput().get(4)), computeSizeInformation(getInput().get(5)), _maxNumThreads); } else { params = new ConvolutionParameters( computeSizeInformation(getInput().get(5)), computeSizeInformation(getInput().get(6)), computeSizeInformation(getInput().get(7)), computeSizeInformation(getInput().get(8)), computeSizeInformation(getInput().get(9)), computeSizeInformation(getInput().get(11)), computeSizeInformation(getInput().get(12)), computeSizeInformation(getInput().get(1)), computeSizeInformation(getInput().get(2)), computeSizeInformation(getInput().get(3)), computeSizeInformation(getInput().get(4)), _maxNumThreads); } return params; } public static long getExtractedVal(long val1, long val2, long val3) { if(val1 == -1 || val2 == -1 || val3 == -1) { return -1; } return val1*val2*val3; } @Override public void refreshSizeInformation() { if(op == ConvOp.BIAS_ADD || op == ConvOp.BIAS_MULTIPLY) { Hop input1 = getInput().get(0); setDim1(input1.getDim1()); setDim2(input1.getDim2()); return; } ConvolutionParameters params; try { params = parseInput(); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } switch(op) { case MAX_POOLING: { // input long N = getInput().get(0)._dim1; _dim1 = N; _dim2 = getExtractedVal(params.C, params.P, params.Q); _nnz = -1; // cannot infer stats break; } case MAX_POOLING_BACKWARD: { // input, dout _dim1 = getInput().get(0)._dim1; _dim2 = getInput().get(0)._dim2; _nnz = -1; break; } case DIRECT_CONV2D: { // input, filter long N = getInput().get(0)._dim1; _dim1 = N; _dim2 = getExtractedVal(params.K, params.P, params.Q); _nnz = -1; // cannot infer stats break; } case DIRECT_CONV2D_BACKWARD_DATA: { // filter, dout long N = getInput().get(1)._dim1; _dim1 = N; _dim2 = getExtractedVal(params.C, params.H, params.W); _nnz = -1; // cannot infer stats break; } case DIRECT_CONV2D_BACKWARD_FILTER: { // input, dout _dim1 = params.K; _dim2 = getExtractedVal(params.C, params.R, params.S); _nnz = -1; // cannot infer stats break; } default: throw new RuntimeException("The sizes are not refreshed for " + op.name()); } if(LOG.isDebugEnabled() && (_dim1 <= 0 || _dim2 <= 0)) { LOG.debug("Unknown dimensions for ConvolutionOp in refreshSizeInformation:" + op.name() + " " + _dim1 + " " + _dim2 + " img_dim=[" + params.N + " " + params.C + " " + params.H + " " + params.W + "]" + " filter_dim=[" + params.K + " " + params.C + " " + params.H + " " + params.W + "]" + " output_feature_map=[" + params.P + " " + params.Q + "] stride=[" + params.stride_h + " " + params.stride_w + "]" + " pad=[" + params.pad_h + " " + params.pad_w + "]"); } } @Override public Object clone() throws CloneNotSupportedException { ConvolutionOp ret = new ConvolutionOp(); //copy generic attributes ret.clone(this, false); //copy specific attributes ret.op = op; ret._maxNumThreads = _maxNumThreads; return ret; } @Override public boolean compare( Hop that ) { if( !(that instanceof ConvolutionOp) ) return false; ConvolutionOp that2 = (ConvolutionOp)that; boolean ret = (op == that2.op) && (getInput().size()==that.getInput().size()) && _maxNumThreads == that2._maxNumThreads; //compare all childs if( ret ) //sizes matched for( int i=0; i<_input.size(); i++ ) ret &= getInput().get(i) == that2.getInput().get(i); return ret; } @Override public void setMaxNumThreads( int k ) { _maxNumThreads = k; } @Override public int getMaxNumThreads() { return _maxNumThreads; } }