/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.hops.cost; import java.util.ArrayList; import java.util.HashSet; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.lops.DataGen; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.MapMult; import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.lops.MMTSJ.MMTSJType; import org.apache.sysml.lops.compile.JobType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.CacheableData; import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.instructions.CPInstructionParser; import org.apache.sysml.runtime.instructions.Instruction; import org.apache.sysml.runtime.instructions.InstructionUtils; import org.apache.sysml.runtime.instructions.MRInstructionParser; import org.apache.sysml.runtime.instructions.MRJobInstruction; import org.apache.sysml.runtime.instructions.cp.CPInstruction; import org.apache.sysml.runtime.instructions.cp.CPInstruction.CPINSTRUCTION_TYPE; import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction; import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction; import org.apache.sysml.runtime.instructions.mr.BinaryMRInstructionBase; import org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction; import org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction; import org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction; import org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer; import org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction; import org.apache.sysml.runtime.instructions.mr.MRInstruction; import org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction; import org.apache.sysml.runtime.instructions.mr.PickByCountInstruction; import org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction; import org.apache.sysml.runtime.instructions.mr.TernaryInstruction; import org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase; import org.apache.sysml.runtime.instructions.mr.MRInstruction.MRINSTRUCTION_TYPE; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput; import org.apache.sysml.runtime.matrix.operators.CMOperator; import org.apache.sysml.runtime.matrix.operators.CMOperator.AggregateOperationTypes; import org.apache.sysml.yarn.ropt.MRJobResourceInstruction; import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer; public class CostEstimatorStaticRuntime extends CostEstimator { //time-conversion private static final long DEFAULT_FLOPS = 2L * 1024 * 1024 * 1024; //2GFLOPS //private static final long UNKNOWN_TIME = -1; //floating point operations private static final double DEFAULT_NFLOP_NOOP = 10; private static final double DEFAULT_NFLOP_UNKNOWN = 1; private static final double DEFAULT_NFLOP_CP = 1; private static final double DEFAULT_NFLOP_TEXT_IO = 350; //MR job latency private static final double DEFAULT_MR_JOB_LATENCY_LOCAL = 2; private static final double DEFAULT_MR_JOB_LATENCY_REMOTE = 20; private static final double DEFAULT_MR_TASK_LATENCY_LOCAL = 0.001; private static final double DEFAULT_MR_TASK_LATENCY_REMOTE = 1.5; //IO READ throughput private static final double DEFAULT_MBS_FSREAD_BINARYBLOCK_DENSE = 200; private static final double DEFAULT_MBS_FSREAD_BINARYBLOCK_SPARSE = 100; private static final double DEFAULT_MBS_HDFSREAD_BINARYBLOCK_DENSE = 150; private static final double DEFAULT_MBS_HDFSREAD_BINARYBLOCK_SPARSE = 75; //IO WRITE throughput private static final double DEFAULT_MBS_FSWRITE_BINARYBLOCK_DENSE = 150; private static final double DEFAULT_MBS_FSWRITE_BINARYBLOCK_SPARSE = 75; private static final double DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_DENSE = 120; private static final double DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_SPARSE = 60; private static final double DEFAULT_MBS_HDFSWRITE_TEXT_DENSE = 40; private static final double DEFAULT_MBS_HDFSWRITE_TEXT_SPARSE = 30; @Override @SuppressWarnings("unused") protected double getCPInstTimeEstimate( Instruction inst, VarStats[] vs, String[] args ) throws DMLRuntimeException { CPInstruction cpinst = (CPInstruction)inst; //load time into mem double ltime = 0; if( !vs[0]._inmem ){ ltime += getHDFSReadTime( vs[0]._rlen, vs[0]._clen, vs[0].getSparsity() ); //eviction costs if( CacheableData.CACHING_WRITE_CACHE_ON_READ && LazyWriteBuffer.getWriteBufferSize()<MatrixBlock.estimateSizeOnDisk(vs[0]._rlen, vs[0]._clen, (long)((vs[0]._nnz<0)? vs[0]._rlen*vs[0]._clen:vs[0]._nnz)) ) { ltime += Math.abs( getFSWriteTime( vs[0]._rlen, vs[0]._clen, vs[0].getSparsity() )); } vs[0]._inmem = true; } if( !vs[1]._inmem ){ ltime += getHDFSReadTime( vs[1]._rlen, vs[1]._clen, vs[1].getSparsity() ); //eviction costs if( CacheableData.CACHING_WRITE_CACHE_ON_READ && LazyWriteBuffer.getWriteBufferSize()<MatrixBlock.estimateSizeOnDisk(vs[1]._rlen, vs[1]._clen, (long)((vs[1]._nnz<0)? vs[1]._rlen*vs[1]._clen:vs[1]._nnz)) ) { ltime += Math.abs( getFSWriteTime( vs[1]._rlen, vs[1]._clen, vs[1].getSparsity()) ); } vs[1]._inmem = true; } if( LOG.isDebugEnabled() && ltime!=0 ) { LOG.debug("Cost["+cpinst.getOpcode()+" - read] = "+ltime); } //exec time CP instruction String opcode = (cpinst instanceof FunctionCallCPInstruction) ? InstructionUtils.getOpCode(cpinst.toString()) : cpinst.getOpcode(); double etime = getInstTimeEstimate(opcode, vs, args, ExecType.CP); //write time caching double wtime = 0; //double wtime = getFSWriteTime( vs[2]._rlen, vs[2]._clen, (vs[2]._nnz<0)? 1.0:(double)vs[2]._nnz/vs[2]._rlen/vs[2]._clen ); if( inst instanceof VariableCPInstruction && ((VariableCPInstruction)inst).getOpcode().equals("write") ) wtime += getHDFSWriteTime(vs[2]._rlen, vs[2]._clen, vs[2].getSparsity(), ((VariableCPInstruction)inst).getInput3().getName() ); if( LOG.isDebugEnabled() && wtime!=0 ) { LOG.debug("Cost["+cpinst.getOpcode()+" - write] = "+wtime); } //total costs double costs = ltime + etime + wtime; //if( LOG.isDebugEnabled() ) // LOG.debug("Costs CP instruction = "+costs); return costs; } @Override protected double getMRJobInstTimeEstimate( Instruction inst, VarStats[] vs, String[] args ) throws DMLRuntimeException { MRJobInstruction jinst = (MRJobInstruction) inst; //infrastructure properties boolean localJob = InfrastructureAnalyzer.isLocalMode(); int maxPMap = InfrastructureAnalyzer.getRemoteParallelMapTasks(); int maxPRed = Math.min( InfrastructureAnalyzer.getRemoteParallelReduceTasks(), ConfigurationManager.getNumReducers() ); double blocksize = ((double)InfrastructureAnalyzer.getHDFSBlockSize())/(1024*1024); //correction max number of mappers/reducers on yarn clusters if( InfrastructureAnalyzer.isYarnEnabled() ) { maxPMap = (int)Math.max( maxPMap, YarnClusterAnalyzer.getNumCores() ); //artificially reduced by factor 2, in order to prefer map-side processing even if smaller degree of parallelism maxPRed = (int)Math.max( maxPRed, YarnClusterAnalyzer.getNumCores()/2 /2 ); } //yarn-specific: take degree of parallelism into account if( jinst instanceof MRJobResourceInstruction ){ int maxTasks = (int)((MRJobResourceInstruction)jinst).getMaxMRTasks(); maxPMap = Math.min(maxPMap, maxTasks); maxPRed = Math.min(maxPRed, maxTasks); } //job properties boolean mapOnly = jinst.isMapOnly(); String rdInst = jinst.getIv_randInstructions(); String rrInst = jinst.getIv_recordReaderInstructions(); String mapInst = jinst.getIv_instructionsInMapper(); String shfInst = jinst.getIv_shuffleInstructions(); String aggInst = jinst.getIv_aggInstructions(); String otherInst = jinst.getIv_otherInstructions(); byte[] inIx = getInputIndexes( jinst.getInputVars() ); byte[] retIx = jinst.getIv_resultIndices(); byte[] mapOutIx = getMapOutputIndexes(inIx, retIx, rdInst, mapInst, shfInst, aggInst, otherInst); int numMap = computeNumMapTasks(vs, inIx, blocksize, maxPMap, jinst.getJobType()); int numPMap = Math.min(numMap, maxPMap); int numEPMap = Math.max(Math.min(numMap, maxPMap/2),1); //effective map dop int numRed = computeNumReduceTasks( vs, mapOutIx, jinst.getJobType() ); int numPRed = Math.min(numRed, maxPRed); int numEPRed = Math.max(Math.min(numRed, maxPRed/2),1); //effective reduce dop LOG.debug("Meta nmap = "+numMap+", nred = "+numRed+"; npmap = "+numPMap+", npred = "+numPRed+"; nepmap = "+numEPMap+", nepred = "+numEPRed); //step 0: export if inputs in mem double exportCosts = 0; for( int i=0; i<jinst.getInputVars().length; i++ ) if( vs[i]._inmem ) exportCosts += getHDFSWriteTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity()); //step 1: MR job / task latency (normalization by effective dop) double jobLatencyCosts = localJob ? DEFAULT_MR_JOB_LATENCY_LOCAL : DEFAULT_MR_JOB_LATENCY_REMOTE; double taskLatencyCost = (numMap / numEPMap + numEPRed) * (localJob ? DEFAULT_MR_TASK_LATENCY_LOCAL : DEFAULT_MR_TASK_LATENCY_REMOTE); double latencyCosts = jobLatencyCosts + taskLatencyCost; //step 2: parallel read of inputs (normalization by effective dop) double hdfsReadCosts = 0; for( int i=0; i<jinst.getInputVars().length; i++ ) hdfsReadCosts += getHDFSReadTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity()); hdfsReadCosts /= numEPMap; //step 3: parallel MR instructions String[] mapperInst = new String[]{rdInst, rrInst, mapInst}; String[] reducerInst = new String[]{shfInst, aggInst, otherInst}; //map instructions compute/distcache read (normalization by effective dop) double mapDCReadCost = 0; //read through distributed cache double mapCosts = 0; //map compute cost double shuffleCosts = 0; double reduceCosts = 0; //reduce compute costs for( String instCat : mapperInst ) if( instCat != null && instCat.length()>0 ) { String[] linst = instCat.split( Lop.INSTRUCTION_DELIMITOR ); for( String tmp : linst ){ //map compute costs Object[] o = extractMRInstStatistics(tmp, vs); String opcode = InstructionUtils.getOpCode(tmp); mapCosts += getInstTimeEstimate(opcode, (VarStats[])o[0], (String[])o[1], ExecType.MR); //dist cache read costs int dcIndex = getDistcacheIndex(tmp); if( dcIndex >= 0 ) { mapDCReadCost += Math.min(getFSReadTime(vs[dcIndex]._rlen, vs[dcIndex]._clen, vs[dcIndex].getSparsity()), getFSReadTime(DistributedCacheInput.PARTITION_SIZE, 1, 1.0)) //32MB partitions * numMap; //read in each task } } } mapCosts /= numEPMap; mapDCReadCost /= numEPMap; if( !mapOnly ) { //shuffle costs (normalization by effective map/reduce dop) for( int i=0; i<mapOutIx.length; i++ ) { shuffleCosts += ( getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity())*4 / numEPRed + getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed); //correction of shuffle costs (necessary because the above shuffle does not consider the number of blocks) //TODO this is a workaround - we need to address the number of map output blocks in a more systematic way for( String instCat : reducerInst ) if( instCat != null && instCat.length()>0 ) { String[] linst = instCat.split( Lop.INSTRUCTION_DELIMITOR ); for( String tmp : linst ) { if(InstructionUtils.getMRType(tmp)==MRINSTRUCTION_TYPE.Aggregate) shuffleCosts += numMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed; } } } //reduce instructions compute (normalization by effective dop) for( String instCat : reducerInst ) if( instCat != null && instCat.length()>0 ) { String[] linst = instCat.split( Lop.INSTRUCTION_DELIMITOR ); for( String tmp : linst ){ Object[] o = extractMRInstStatistics(tmp, vs); if(InstructionUtils.getMRType(tmp)==MRINSTRUCTION_TYPE.Aggregate) o[1] = new String[]{String.valueOf(numMap)}; String opcode = InstructionUtils.getOpCode(tmp); reduceCosts += getInstTimeEstimate(opcode, (VarStats[])o[0], (String[])o[1], ExecType.MR); } } reduceCosts /= numEPRed; } //step 4: parallel write of outputs (normalization by effective dop) double hdfsWriteCosts = 0; for( int i=0; i<jinst.getOutputVars().length; i++ ) { hdfsWriteCosts += getHDFSWriteTime(vs[retIx[i]]._rlen, vs[retIx[i]]._clen, vs[retIx[i]].getSparsity()); } hdfsWriteCosts /= ((mapOnly)? numEPMap : numEPRed); //debug output if( LOG.isDebugEnabled() ) { LOG.debug("Costs Export = "+exportCosts); LOG.debug("Costs Latency = "+latencyCosts); LOG.debug("Costs HDFS Read = "+hdfsReadCosts); LOG.debug("Costs Distcache Read = "+mapDCReadCost); LOG.debug("Costs Map Exec = "+mapCosts); LOG.debug("Costs Shuffle = "+shuffleCosts); LOG.debug("Costs Reduce Exec = "+reduceCosts); LOG.debug("Costs HDFS Write = "+hdfsWriteCosts); } //aggregate individual cost factors return exportCosts + latencyCosts + hdfsReadCosts + mapCosts + mapDCReadCost + shuffleCosts + reduceCosts + hdfsWriteCosts; } private Object[] extractMRInstStatistics( String inst, VarStats[] stats ) throws DMLRuntimeException { Object[] ret = new Object[2]; //stats, attrs VarStats[] vs = new VarStats[3]; String[] attr = null; String[] parts = InstructionUtils.getInstructionParts(inst); String opcode = parts[0]; if( opcode.equals(DataGen.RAND_OPCODE) ) { vs[0] = _unknownStats; vs[1] = _unknownStats; vs[2] = stats[Integer.parseInt(parts[2])]; int type = 2; //awareness of instruction patching min/max if( !parts[7].contains(Lop.VARIABLE_NAME_PLACEHOLDER) && !parts[8].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ) { double minValue = Double.parseDouble(parts[7]); double maxValue = Double.parseDouble(parts[8]); double sparsity = Double.parseDouble(parts[9]); if( minValue == 0.0 && maxValue == 0.0 ) type = 0; else if( sparsity == 1.0 && minValue == maxValue ) type = 1; } attr = new String[]{String.valueOf(type)}; } if( opcode.equals(DataGen.SEQ_OPCODE) ) { vs[0] = _unknownStats; vs[1] = _unknownStats; vs[2] = stats[Integer.parseInt(parts[2])]; } else //general case { String inst2 = replaceInstructionPatch( inst ); MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(inst2); if( mrinst instanceof UnaryMRInstructionBase ) { UnaryMRInstructionBase uinst = (UnaryMRInstructionBase) mrinst; vs[0] = uinst.input>=0 ? stats[ uinst.input ] : _unknownStats; vs[1] = _unknownStats; vs[2] = stats[ uinst.output ]; if( vs[0] == null ) //scalar input, e.g., print vs[0] = _scalarStats; if( vs[2] == null ) //scalar output vs[2] = _scalarStats; if( mrinst instanceof MMTSJMRInstruction ) { String type = ((MMTSJMRInstruction)mrinst).getMMTSJType().toString(); attr = new String[]{type}; } else if( mrinst instanceof CM_N_COVInstruction ) { if( opcode.equals("cm") ) attr = new String[]{parts[parts.length-2]}; } else if( mrinst instanceof GroupedAggregateInstruction ) { if( opcode.equals("groupedagg") ) { AggregateOperationTypes type = CMOperator.getAggOpType(parts[2], parts[3]); attr = new String[]{String.valueOf(type.ordinal())}; } } } else if( mrinst instanceof BinaryMRInstructionBase ) { BinaryMRInstructionBase binst = (BinaryMRInstructionBase) mrinst; vs[0] = stats[ binst.input1 ]; vs[1] = stats[ binst.input2 ]; vs[2] = stats[ binst.output ]; if( vs[0] == null ) //scalar input, vs[0] = _scalarStats; if( vs[1] == null ) //scalar input, vs[1] = _scalarStats; if( vs[2] == null ) //scalar output vs[2] = _scalarStats; if( opcode.equals("rmempty") ) { RemoveEmptyMRInstruction rbinst = (RemoveEmptyMRInstruction) mrinst; attr = new String[]{rbinst.isRemoveRows()?"0":"1"}; } } else if( mrinst instanceof TernaryInstruction ) { TernaryInstruction tinst = (TernaryInstruction) mrinst; vs[0] = stats[ tinst.input1 ]; vs[1] = stats[ tinst.input2 ]; vs[2] = stats[ tinst.input3 ]; if( vs[0] == null ) //scalar input, vs[0] = _scalarStats; if( vs[1] == null ) //scalar input, vs[1] = _scalarStats; if( vs[2] == null ) //scalar input vs[2] = _scalarStats; } else if( mrinst instanceof PickByCountInstruction ) { PickByCountInstruction pinst = (PickByCountInstruction) mrinst; vs[0] = stats[ pinst.input1 ]; vs[2] = stats[ pinst.output ]; if( vs[0] == null ) //scalar input, vs[0] = _scalarStats; if( vs[1] == null ) //scalar input, vs[1] = _scalarStats; if( vs[2] == null ) //scalar input vs[2] = _scalarStats; } else if( mrinst instanceof MapMultChainInstruction) { MapMultChainInstruction minst = (MapMultChainInstruction) mrinst; vs[0] = stats[ minst.getInput1() ]; vs[1] = stats[ minst.getInput2() ]; if( minst.getInput3()>=0 ) vs[2] = stats[ minst.getInput3() ]; if( vs[0] == null ) //scalar input, vs[0] = _scalarStats; if( vs[1] == null ) //scalar input, vs[1] = _scalarStats; if( vs[2] == null ) //scalar input vs[2] = _scalarStats; } } //maintain var status (CP output always inmem) vs[2]._inmem = true; ret[0] = vs; ret[1] = attr; return ret; } ///////////////////// // Utilities // ///////////////////// private byte[] getInputIndexes(String[] inputVars) { byte[] inIx = new byte[inputVars.length]; for( int i=0; i<inIx.length; i++ ) inIx[i] = (byte)i; return inIx; } private byte[] getMapOutputIndexes( byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst ) throws DMLRuntimeException { //note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper //map indices HashSet<Byte> ixMap = new HashSet<Byte>(); for( byte ix : inIx ) ixMap.add(ix); if( rdInst!=null && rdInst.length()>0 ) { rdInst = replaceInstructionPatch(rdInst); DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst); for( DataGenMRInstruction inst : ins ) for( byte ix : inst.getAllIndexes() ) ixMap.add(ix); } if( mapInst!=null && mapInst.length()>0 ) { mapInst = replaceInstructionPatch(mapInst); MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst); for( MRInstruction inst : ins ) for( byte ix : inst.getAllIndexes() ) ixMap.add(ix); } //reduce indices HashSet<Byte> ixRed = new HashSet<Byte>(); for( byte ix : retIx ) ixRed.add(ix); if( shfInst!=null && shfInst.length()>0 ) { shfInst = replaceInstructionPatch(shfInst); MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst); for( MRInstruction inst : ins ) for( byte ix : inst.getAllIndexes() ) ixRed.add(ix); } if( aggInst!=null && aggInst.length()>0 ) { aggInst = replaceInstructionPatch(aggInst); MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst); for( MRInstruction inst : ins ) for( byte ix : inst.getAllIndexes() ) ixRed.add(ix); } if( otherInst!=null && otherInst.length()>0 ) { otherInst = replaceInstructionPatch(otherInst); MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst); for( MRInstruction inst : ins ) for( byte ix : inst.getAllIndexes() ) ixRed.add(ix); } //difference ixMap.retainAll(ixRed); //copy result byte[] ret = new byte[ixMap.size()]; int i = 0; for( byte ix : ixMap ) ret[i++] = ix; return ret; } private int computeNumMapTasks( VarStats[] vs, byte[] inputIx, double blocksize, int maxPMap, JobType jobtype ) { //special cases if( jobtype == JobType.DATAGEN ) return maxPMap; //input size, num blocks double mapInputSize = 0; int numBlocks = 0; for( int i=0; i<inputIx.length; i++ ) { //input size mapInputSize += ((double)MatrixBlock.estimateSizeOnDisk((long)vs[inputIx[i]]._rlen, (long)vs[inputIx[i]]._clen, (long)vs[inputIx[i]]._nnz)) / (1024*1024); //num blocks int lret = (int) Math.ceil((double)vs[inputIx[i]]._rlen/vs[inputIx[i]]._brlen) *(int) Math.ceil((double)vs[inputIx[i]]._clen/vs[inputIx[i]]._bclen); numBlocks = Math.max(lret, numBlocks); } return Math.max(1, Math.min( (int)Math.ceil(mapInputSize/blocksize),numBlocks )); } private int computeNumReduceTasks( VarStats[] vs, byte[] mapOutIx, JobType jobtype ) { int ret = -1; //TODO for jobtype==JobType.MMCJ common dim switch( jobtype ) { case REBLOCK: case CSV_REBLOCK: { for( int i=0; i<mapOutIx.length; i++ ) { int lret = (int) Math.ceil((double)vs[mapOutIx[i]]._rlen/vs[mapOutIx[i]]._brlen) *(int) Math.ceil((double)vs[mapOutIx[i]]._clen/vs[mapOutIx[i]]._bclen); ret = Math.max(lret, ret); } break; } default: { for( int i=0; i<mapOutIx.length; i++ ) { int lret = (int) Math.ceil((double)vs[mapOutIx[i]]._rlen/ConfigurationManager.getBlocksize()) *(int) Math.ceil((double)vs[mapOutIx[i]]._clen/ConfigurationManager.getBlocksize()); ret = Math.max(lret, ret); } break; } } return Math.max(1, ret); } private int getDistcacheIndex(String inst) throws DMLRuntimeException { ArrayList<Byte> indexes = new ArrayList<Byte>(); if( InstructionUtils.isDistributedCacheUsed(inst) ) { MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(inst); if( mrinst instanceof IDistributedCacheConsumer ) ((IDistributedCacheConsumer)mrinst).addDistCacheIndex(inst, indexes); } if( !indexes.isEmpty() ) return indexes.get(0); else return -1; } ///////////////////// // I/O Costs // ///////////////////// /** * Returns the estimated read time from HDFS. * NOTE: Does not handle unknowns. * * @param dm rows? * @param dn columns? * @param ds sparsity factor? * @return estimated HDFS read time */ private double getHDFSReadTime( long dm, long dn, double ds ) { boolean sparse = MatrixBlock.evalSparseFormatOnDisk(dm, dn, (long)(ds*dm*dn)); double ret = ((double)MatrixBlock.estimateSizeOnDisk((long)dm, (long)dn, (long)(ds*dm*dn))) / (1024*1024); if( sparse ) ret /= DEFAULT_MBS_HDFSREAD_BINARYBLOCK_SPARSE; else //dense ret /= DEFAULT_MBS_HDFSREAD_BINARYBLOCK_DENSE; return ret; } private double getHDFSWriteTime( long dm, long dn, double ds ) { boolean sparse = MatrixBlock.evalSparseFormatOnDisk(dm, dn, (long)(ds*dm*dn)); double bytes = (double)MatrixBlock.estimateSizeOnDisk((long)dm, (long)dn, (long)(ds*dm*dn)); double mbytes = bytes / (1024*1024); double ret = -1; if( sparse ) ret = mbytes / DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_SPARSE; else //dense ret = mbytes / DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_DENSE; //if( LOG.isDebugEnabled() ) // LOG.debug("Costs[export] = "+ret+"s, "+mbytes+" MB ("+dm+","+dn+","+ds+")."); return ret; } private double getHDFSWriteTime( long dm, long dn, double ds, String format ) { boolean sparse = MatrixBlock.evalSparseFormatOnDisk(dm, dn, (long)(ds*dm*dn)); double bytes = (double)MatrixBlock.estimateSizeOnDisk((long)dm, (long)dn, (long)(ds*dm*dn)); double mbytes = bytes / (1024*1024); double ret = -1; if( format.equals("textcell") || format.equals("csv") ) { if( sparse ) ret = mbytes / DEFAULT_MBS_HDFSWRITE_TEXT_SPARSE; else //dense ret = mbytes / DEFAULT_MBS_HDFSWRITE_TEXT_DENSE; ret *= 2.75; //text commonly 2x-3.5x larger than binary } else { if( sparse ) ret = mbytes / DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_SPARSE; else //dense ret = mbytes / DEFAULT_MBS_HDFSWRITE_BINARYBLOCK_DENSE; } //if( LOG.isDebugEnabled() ) // LOG.debug("Costs[export] = "+ret+"s, "+mbytes+" MB ("+dm+","+dn+","+ds+")."); return ret; } /** * Returns the estimated read time from local FS. * NOTE: Does not handle unknowns. * * @param dm rows? * @param dn columns? * @param ds sparsity factor? * @return estimated local file system read time */ private double getFSReadTime( long dm, long dn, double ds ) { boolean sparse = MatrixBlock.evalSparseFormatOnDisk(dm, dn, (long)(ds*dm*dn)); double ret = ((double)MatrixBlock.estimateSizeOnDisk((long)dm, (long)dn, (long)(ds*dm*dn))) / (1024*1024); if( sparse ) ret /= DEFAULT_MBS_FSREAD_BINARYBLOCK_SPARSE; else //dense ret /= DEFAULT_MBS_FSREAD_BINARYBLOCK_DENSE; return ret; } private double getFSWriteTime( long dm, long dn, double ds ) { boolean sparse = MatrixBlock.evalSparseFormatOnDisk(dm, dn, (long)(ds*dm*dn)); double ret = ((double)MatrixBlock.estimateSizeOnDisk((long)dm, (long)dn, (long)(ds*dm*dn))) / (1024*1024); if( sparse ) ret /= DEFAULT_MBS_FSWRITE_BINARYBLOCK_SPARSE; else //dense ret /= DEFAULT_MBS_FSWRITE_BINARYBLOCK_DENSE; return ret; } ///////////////////// // Operation Costs // ///////////////////// private double getInstTimeEstimate(String opcode, VarStats[] vs, String[] args, ExecType et) throws DMLRuntimeException { boolean inMR = (et == ExecType.MR); return getInstTimeEstimate(opcode, inMR, vs[0]._rlen, vs[0]._clen, (vs[0]._nnz<0)? 1.0:(double)vs[0]._nnz/vs[0]._rlen/vs[0]._clen, vs[1]._rlen, vs[1]._clen, (vs[1]._nnz<0)? 1.0:(double)vs[1]._nnz/vs[1]._rlen/vs[1]._clen, vs[2]._rlen, vs[2]._clen, (vs[2]._nnz<0)? 1.0:(double)vs[2]._nnz/vs[2]._rlen/vs[2]._clen, args); } /** * Returns the estimated instruction execution time, w/o data transfer and single-threaded. * For scalars input dims must be set to 1 before invocation. * * NOTE: Does not handle unknowns. * * @param opcode instruction opcode * @param inMR ? * @param d1m ? * @param d1n ? * @param d1s ? * @param d2m ? * @param d2n ? * @param d2s ? * @param d3m ? * @param d3n ? * @param d3s ? * @param args ? * @return estimated instruction execution time * @throws DMLRuntimeException if DMLRuntimeException occurs */ private double getInstTimeEstimate( String opcode, boolean inMR, long d1m, long d1n, double d1s, long d2m, long d2n, double d2s, long d3m, long d3n, double d3s, String[] args ) throws DMLRuntimeException { double nflops = getNFLOP(opcode, inMR, d1m, d1n, d1s, d2m, d2n, d2s, d3m, d3n, d3s, args); double time = nflops / DEFAULT_FLOPS; if( LOG.isDebugEnabled() ) LOG.debug("Cost["+opcode+"] = "+time+"s, "+nflops+" flops ("+d1m+","+d1n+","+d1s+","+d2m+","+d2n+","+d2s+","+d3m+","+d3n+","+d3s+")."); return time; } private double getNFLOP( String optype, boolean inMR, long d1m, long d1n, double d1s, long d2m, long d2n, double d2s, long d3m, long d3n, double d3s, String[] args ) throws DMLRuntimeException { //operation costs in FLOP on matrix block level (for CP and MR instructions) //(excludes IO and parallelism; assumes known dims for all inputs, outputs ) boolean leftSparse = MatrixBlock.evalSparseFormatInMemory(d1m, d1n, (long)(d1s*d1m*d1n)); boolean rightSparse = MatrixBlock.evalSparseFormatInMemory(d2m, d2n, (long)(d2s*d2m*d2n)); boolean onlyLeft = (d1m>=0 && d1n>=0 && d2m<0 && d2n<0 ); boolean allExists = (d1m>=0 && d1n>=0 && d2m>=0 && d2n>=0 && d3m>=0 && d3n>=0 ); //NOTE: all instruction types that are equivalent in CP and MR are only //included in CP to prevent redundancy CPINSTRUCTION_TYPE cptype = CPInstructionParser.String2CPInstructionType.get(optype); if( cptype != null ) //for CP Ops and equivalent MR ops { //general approach: count of floating point *, /, +, -, ^, builtin ; switch(cptype) { case AggregateBinary: //opcodes: ba+*, cov if( optype.equals("ba+*") ) { //matrix mult //reduction by factor 2 because matrix mult better than //average flop count if( !leftSparse && !rightSparse ) return 2 * (d1m * d1n * ((d2n>1)?d1s:1.0) * d2n) /2; else if( !leftSparse && rightSparse ) return 2 * (d1m * d1n * d1s * d2n * d2s) /2; else if( leftSparse && !rightSparse ) return 2 * (d1m * d1n * d1s * d2n) /2; else //leftSparse && rightSparse return 2 * (d1m * d1n * d1s * d2n * d2s) /2; } else if( optype.equals("cov") ) { //note: output always scalar, d3 used as weights block //if( allExists ), same runtime for 2 and 3 inputs return 23 * d1m; //(11+3*k+) } return 0; case MMChain: //reduction by factor 2 because matrix mult better than average flop count //(mmchain essentially two matrix-vector muliplications) if( !leftSparse ) return (2+2) * (d1m * d1n) /2; else return (2+2) * (d1m * d1n * d1s) /2; case AggregateTernary: //opcodes: tak+* return 6 * d1m * d1n; //2*1(*) + 4 (k+) case AggregateUnary: //opcodes: uak+, uark+, uack+, uasqk+, uarsqk+, uacsqk+, // uamean, uarmean, uacmean, uavar, uarvar, uacvar, // uamax, uarmax, uarimax, uacmax, uamin, uarmin, uacmin, // ua+, uar+, uac+, ua*, uatrace, uaktrace, // nrow, ncol, length, cm if( optype.equals("nrow") || optype.equals("ncol") || optype.equals("length") ) return DEFAULT_NFLOP_NOOP; else if( optype.equals( "cm" ) ) { double xcm = 1; switch( Integer.parseInt(args[0]) ) { case 0: xcm=1; break; //count case 1: xcm=8; break; //mean case 2: xcm=16; break; //cm2 case 3: xcm=31; break; //cm3 case 4: xcm=51; break; //cm4 case 5: xcm=16; break; //variance } return (leftSparse) ? xcm * (d1m * d1s + 1) : xcm * d1m; } else if( optype.equals("uatrace") || optype.equals("uaktrace") ) return 2 * d1m * d1n; else if( optype.equals("ua+") || optype.equals("uar+") || optype.equals("uac+") ){ //sparse safe operations if( !leftSparse ) //dense return d1m * d1n; else //sparse return d1m * d1n * d1s; } else if( optype.equals("uak+") || optype.equals("uark+") || optype.equals("uack+")) return 4 * d1m * d1n; //1*k+ else if( optype.equals("uasqk+") || optype.equals("uarsqk+") || optype.equals("uacsqk+")) return 5 * d1m * d1n; // +1 for multiplication to square term else if( optype.equals("uamean") || optype.equals("uarmean") || optype.equals("uacmean")) return 7 * d1m * d1n; //1*k+ else if( optype.equals("uavar") || optype.equals("uarvar") || optype.equals("uacvar")) return 14 * d1m * d1n; else if( optype.equals("uamax") || optype.equals("uarmax") || optype.equals("uacmax") || optype.equals("uamin") || optype.equals("uarmin") || optype.equals("uacmin") || optype.equals("uarimax") || optype.equals("ua*") ) return d1m * d1n; return 0; case ArithmeticBinary: //opcodes: +, -, *, /, ^ (incl. ^2, *2) //note: covers scalar-scalar, scalar-matrix, matrix-matrix if( optype.equals("+") || optype.equals("-") //sparse safe && ( leftSparse || rightSparse ) ) return d1m*d1n*d1s + d2m*d2n*d2s; else return d3m*d3n; case Ternary: //opcodes: ctable if( optype.equals("ctable") ){ if( leftSparse ) return d1m * d1n * d1s; //add else return d1m * d1n; } return 0; case BooleanBinary: //opcodes: &&, || return 1; //always scalar-scalar case BooleanUnary: //opcodes: ! return 1; //always scalar-scalar case Builtin: //opcodes: log //note: covers scalar-scalar, scalar-matrix, matrix-matrix //note: can be unary or binary if( allExists ) //binary return 3 * d3m * d3n; else //unary return d3m * d3n; case BuiltinBinary: //opcodes: max, min, solve //note: covers scalar-scalar, scalar-matrix, matrix-matrix if( optype.equals("solve") ) //see also MultiReturnBuiltin return d1m * d1n * d1n; //for 1kx1k ~ 1GFLOP -> 0.5s else //default return d3m * d3n; case BuiltinUnary: //opcodes: exp, abs, sin, cos, tan, sign, sqrt, plogp, print, round, sprop, sigmoid //TODO add cost functions for commons math builtins: inverse, cholesky if( optype.equals("print") ) //scalar only return 1; else { double xbu = 1; //default for all ops if( optype.equals("plogp") ) xbu = 2; else if( optype.equals("round") ) xbu = 4; if( optype.equals("sin") || optype.equals("tan") || optype.equals("round") || optype.equals("abs") || optype.equals("sqrt") || optype.equals("sprop") || optype.equals("sigmoid") || optype.equals("sign") ) //sparse-safe { if( leftSparse ) //sparse return xbu * d1m * d1n * d1s; else //dense return xbu * d1m * d1n; } else return xbu * d1m * d1n; } case Reorg: //opcodes: r', rdiag case MatrixReshape: //opcodes: rshape if( leftSparse ) return d1m * d1n * d1s; else return d1m * d1n; case Append: //opcodes: append return DEFAULT_NFLOP_CP * (((leftSparse) ? d1m * d1n * d1s : d1m * d1n ) + ((rightSparse) ? d2m * d2n * d2s : d2m * d2n )); case RelationalBinary: //opcodes: ==, !=, <, >, <=, >= //note: all relational ops are not sparsesafe return d3m * d3n; //covers all combinations of scalar and matrix case File: //opcodes: rm, mv return DEFAULT_NFLOP_NOOP; case Variable: //opcodes: assignvar, cpvar, rmvar, rmfilevar, assignvarwithfile, attachfiletovar, valuepick, iqsize, read, write, createvar, setfilename, castAsMatrix if( optype.equals("write") ){ boolean text = args[0].equals("textcell") || args[0].equals("csv"); double xwrite = text ? DEFAULT_NFLOP_TEXT_IO : DEFAULT_NFLOP_CP; if( !leftSparse ) return d1m * d1n * xwrite; else return d1m * d1n * d1s * xwrite; } else if ( optype.equals("inmem-iqm") ) //note: assumes uniform distribution return 2 * d1m + //sum of weights 5 + 0.25d * d1m + //scan to lower quantile 8 * 0.5 * d1m; //scan from lower to upper quantile else return DEFAULT_NFLOP_NOOP; case Rand: //opcodes: rand, seq if( optype.equals(DataGen.RAND_OPCODE) ){ int nflopRand = 32; //per random number switch(Integer.parseInt(args[0])) { case 0: return DEFAULT_NFLOP_NOOP; //empty matrix case 1: return d3m * d3n * 8; //allocate, arrayfill case 2: //full rand { if( d3s==1.0 ) return d3m * d3n * nflopRand + d3m * d3n * 8; //DENSE gen (incl allocate) else return (d3s>=MatrixBlock.SPARSITY_TURN_POINT)? 2 * d3m * d3n * nflopRand + d3m * d3n * 8: //DENSE gen (incl allocate) 3 * d3m * d3n * d3s * nflopRand + d3m * d3n * d3s * 24; //SPARSE gen (incl allocate) } } } else //seq return d3m * d3n * DEFAULT_NFLOP_CP; case StringInit: //sinit return d3m * d3n * DEFAULT_NFLOP_CP; case External: //opcodes: extfunct //note: should be invoked independently for multiple outputs return d1m * d1n * d1s * DEFAULT_NFLOP_UNKNOWN; case MultiReturnBuiltin: //opcodes: qr, lu, eigen //note: they all have cubic complexity, the scaling factor refers to commons.math double xf = 2; //default e.g, qr if( optype.equals("eigen") ) xf = 32; else if ( optype.equals("lu") ) xf = 16; return xf * d1m * d1n * d1n; //for 1kx1k ~ 2GFLOP -> 1s case ParameterizedBuiltin: //opcodes: cdf, invcdf, groupedagg, rmempty if( optype.equals("cdf") || optype.equals("invcdf")) return DEFAULT_NFLOP_UNKNOWN; //scalar call to commons.math else if( optype.equals("groupedagg") ){ double xga = 1; switch( Integer.parseInt(args[0]) ) { case 0: xga=4; break; //sum, see uk+ case 1: xga=1; break; //count, see cm case 2: xga=8; break; //mean case 3: xga=16; break; //cm2 case 4: xga=31; break; //cm3 case 5: xga=51; break; //cm4 case 6: xga=16; break; //variance } return 2 * d1m + xga * d1m; //scan for min/max, groupedagg } else if( optype.equals("rmempty") ){ switch(Integer.parseInt(args[0])){ case 0: //remove rows return ((leftSparse) ? d1m : d1m * Math.ceil(1.0d/d1s)/2) + DEFAULT_NFLOP_CP * d3m * d2m; case 1: //remove cols return d1n * Math.ceil(1.0d/d1s)/2 + DEFAULT_NFLOP_CP * d3m * d2m; } } return 0; case QSort: //opcodes: sort if( optype.equals("sort") ){ //note: mergesort since comparator used double sortCosts = 0; if( onlyLeft ) sortCosts = DEFAULT_NFLOP_CP * d1m + d1m; else //w/ weights sortCosts = DEFAULT_NFLOP_CP * ((leftSparse)?d1m*d1s:d1m); return sortCosts + d1m*(int)(Math.log(d1m)/Math.log(2)) + //mergesort DEFAULT_NFLOP_CP * d1m; } return 0; case MatrixIndexing: //opcodes: rangeReIndex, leftIndex if( optype.equals("leftIndex") ){ return DEFAULT_NFLOP_CP * ((leftSparse)? d1m*d1n*d1s : d1m*d1n) + 2 * DEFAULT_NFLOP_CP * ((rightSparse)? d2m*d2n*d2s : d2m*d2n ); } else if( optype.equals("rangeReIndex") ){ return DEFAULT_NFLOP_CP * ((leftSparse)? d2m*d2n*d2s : d2m*d2n ); } return 0; case MMTSJ: //opcodes: tsmm //diff to ba+* only upper triangular matrix //reduction by factor 2 because matrix mult better than //average flop count if( MMTSJType.valueOf(args[0]).isLeft() ) { //lefttranspose if( !rightSparse ) //dense return d1m * d1n * d1s * d1n /2; else //sparse return d1m * d1n * d1s * d1n * d1s /2; } else if(onlyLeft) { //righttranspose if( !leftSparse ) //dense return (double)d1m * d1n * d1m /2; else //sparse return d1m * d1n * d1s //reorg sparse + d1m * d1n * d1s * d1n * d1s /2; //core tsmm } return 0; case Partition: return d1m * d1n * d1s + //partitioning costs (inMR ? 0 : //include write cost if in CP getHDFSWriteTime(d1m, d1n, d1s)* DEFAULT_FLOPS); case INVALID: return 0; default: throw new DMLRuntimeException("CostEstimator: unsupported instruction type: "+optype); } } //if not found in CP instructions MRINSTRUCTION_TYPE mrtype = MRInstructionParser.String2MRInstructionType.get(optype); if ( mrtype != null ) //for specific MR ops { switch(mrtype) { case Aggregate: //opcodes: a+, ak+, asqk+, a*, amax, amin, amean //TODO should be aggregate unary int numMap = Integer.parseInt(args[0]); if( optype.equals("ak+") ) return 4 * numMap * d1m * d1n * d1s; else if( optype.equals("asqk+") ) return 5 * numMap * d1m * d1n * d1s; // +1 for multiplication to square term else if( optype.equals("avar") ) return 14 * numMap * d1m * d1n * d1s; else return numMap * d1m * d1n * d1s; case AggregateBinary: //opcodes: cpmm, rmm, mapmult //note: copy from CP costs if( optype.equals("cpmm") || optype.equals("rmm") || optype.equals(MapMult.OPCODE) ) //matrix mult { //reduction by factor 2 because matrix mult better than //average flop count if( !leftSparse && !rightSparse ) return 2 * (d1m * d1n * ((d2n>1)?d1s:1.0) * d2n) /2; else if( !leftSparse && rightSparse ) return 2 * (d1m * d1n * d1s * d2n * d2s) /2; else if( leftSparse && !rightSparse ) return 2 * (d1m * d1n * d1s * d2n) /2; else //leftSparse && rightSparse return 2 * (d1m * d1n * d1s * d2n * d2s) /2; } return 0; case MapMultChain: //opcodes: mapmultchain //assume dense input2 and input3 return 2 * d1m * d2n * d1n * ((d2n>1)?d1s:1.0) //ba(+*) + d1m * d2n //cellwise b(*) + d1m * d2n //r(t) + 2 * d2n * d1n * d1m * (leftSparse?d1s:1.0) //ba(+*) + d2n * d1n; //r(t) case ArithmeticBinary: //opcodes: s-r, so, max, min, // >, >=, <, <=, ==, != //TODO Should be relational //note: all relational ops are not sparsesafe return d3m * d3n; //covers all combinations of scalar and matrix case CombineUnary: //opcodes: combineunary return d1m * d1n * d1s; case CombineBinary: //opcodes: combinebinary return d1m * d1n * d1s + d2m * d2n * d2s; case CombineTernary: //opcodes: combinetertiary return d1m * d1n * d1s + d2m * d2n * d2s + d3m * d3n * d3s; case Unary: //opcodes: log, slog, pow //TODO requires opcode consolidation (builtin, arithmic) //note: covers scalar, matrix, matrix-scalar return d3m * d3n; case Ternary: //opcodes: ctabletransform, ctabletransformscalarweight, ctabletransformhistogram, ctabletransformweightedhistogram //note: copy from cp if( leftSparse ) return d1m * d1n * d1s; //add else return d1m * d1n; case Quaternary: //TODO pattern specific and all 4 inputs requires return d1m * d1n * d1s *4; case Reblock: //opcodes: rblk return DEFAULT_NFLOP_CP * ((leftSparse)? d1m*d1n*d1s : d1m*d1n); case Replicate: //opcodes: rblk return DEFAULT_NFLOP_CP * ((leftSparse)? d1m*d1n*d1s : d1m*d1n); case CM_N_COV: //opcodes: mean double xcm = 8; return (leftSparse) ? xcm * (d1m * d1s + 1) : xcm * d1m; case GroupedAggregate: //opcodes: groupedagg //TODO: need to consolidate categories (ParameterizedBuiltin) //copy from CP opertion double xga = 1; switch( Integer.parseInt(args[0]) ) { case 0: xga=4; break; //sum, see uk+ case 1: xga=1; break; //count, see cm case 2: xga=8; break; //mean case 3: xga=16; break; //cm2 case 4: xga=31; break; //cm3 case 5: xga=51; break; //cm4 case 6: xga=16; break; //variance } return 2 * d1m + xga * d1m; //scan for min/max, groupedagg case PickByCount: //opcodes: valuepick, rangepick break; //TODO //String2MRInstructionType.put( "valuepick" , MRINSTRUCTION_TYPE.PickByCount); // for quantile() //String2MRInstructionType.put( "rangepick" , MRINSTRUCTION_TYPE.PickByCount); // for interQuantile() case RangeReIndex: //opcodes: rangeReIndex, rangeReIndexForLeft //TODO: requires category consolidation if( optype.equals("rangeReIndex") ) return DEFAULT_NFLOP_CP * ((leftSparse)? d2m*d2n*d2s : d2m*d2n ); else //rangeReIndexForLeft return DEFAULT_NFLOP_CP * ((leftSparse)? d1m*d1n*d1s : d1m*d1n) + DEFAULT_NFLOP_CP * ((rightSparse)? d2m*d2n*d2s : d2m*d2n ); case ZeroOut: //opcodes: zeroOut return DEFAULT_NFLOP_CP * ((leftSparse)? d1m*d1n*d1s : d1m*d1n) + DEFAULT_NFLOP_CP * ((rightSparse)? d2m*d2n*d2s : d2m*d2n ); default: return 0; } } else { throw new DMLRuntimeException("CostEstimator: unsupported instruction type: "+optype); } //TODO Parameterized Builtin Functions //String2CPFileInstructionType.put( "rmempty" , CPINSTRUCTION_TYPE.ParameterizedBuiltin); return -1; //should never come here. } }