/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.utils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import com.ibm.bi.dml.api.DMLException;
import com.ibm.bi.dml.hops.FunctionOp;
import com.ibm.bi.dml.hops.Hop;
import com.ibm.bi.dml.hops.Hop.VisitStatus;
import com.ibm.bi.dml.hops.HopsException;
import com.ibm.bi.dml.hops.LiteralOp;
import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFLoopNode;
import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFNode;
import com.ibm.bi.dml.hops.globalopt.gdfgraph.GDFNode.NodeType;
import com.ibm.bi.dml.lops.Lop;
import com.ibm.bi.dml.parser.DMLProgram;
import com.ibm.bi.dml.parser.ForStatement;
import com.ibm.bi.dml.parser.ExternalFunctionStatement;
import com.ibm.bi.dml.parser.ForStatementBlock;
import com.ibm.bi.dml.parser.FunctionStatement;
import com.ibm.bi.dml.parser.FunctionStatementBlock;
import com.ibm.bi.dml.parser.IfStatement;
import com.ibm.bi.dml.parser.IfStatementBlock;
import com.ibm.bi.dml.parser.ParForStatementBlock;
import com.ibm.bi.dml.parser.WhileStatement;
import com.ibm.bi.dml.parser.WhileStatementBlock;
import com.ibm.bi.dml.parser.LanguageException;
import com.ibm.bi.dml.parser.StatementBlock;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.controlprogram.ExternalFunctionProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.ForProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.FunctionProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.IfProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.Program;
import com.ibm.bi.dml.runtime.controlprogram.ProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.WhileProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.context.SparkExecutionContext;
import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import com.ibm.bi.dml.runtime.instructions.Instruction;
import com.ibm.bi.dml.runtime.instructions.MRJobInstruction;
import com.ibm.bi.dml.runtime.instructions.cp.CPInstruction;
import com.ibm.bi.dml.runtime.instructions.spark.CSVReblockSPInstruction;
import com.ibm.bi.dml.runtime.instructions.spark.ReblockSPInstruction;
import com.ibm.bi.dml.runtime.instructions.spark.SPInstruction;
import com.ibm.bi.dml.yarn.ropt.YarnClusterAnalyzer;
public class Explain
{
//internal configuration parameters
private static final boolean REPLACE_SPECIAL_CHARACTERS = true;
private static final boolean SHOW_MEM_ABOVE_BUDGET = true;
private static final boolean SHOW_LITERAL_HOPS = false;
private static final boolean SHOW_DATA_DEPENDENCIES = true;
private static final boolean SHOW_DATA_FLOW_PROPERTIES = true;
//different explain levels
public enum ExplainType {
NONE, // explain disabled
HOPS, // explain program and hops
RUNTIME, // explain runtime program (default)
RECOMPILE_HOPS, // explain hops, incl recompile
RECOMPILE_RUNTIME, // explain runtime program, incl recompile
};
public static class ExplainCounts {
public int numCPInst = 0;
public int numJobs = 0;
public int numReblocks = 0;
}
//////////////
// public explain interface
/**
*
* @return
*/
public static String explainMemoryBudget() {
return explainMemoryBudget(new ExplainCounts());
}
/**
*
* @return
*/
public static String explainMemoryBudget(ExplainCounts counts)
{
StringBuilder sb = new StringBuilder();
sb.append( "# Memory Budget local/remote = " );
sb.append( OptimizerUtils.toMB(OptimizerUtils.getLocalMemBudget()) );
sb.append( "MB/" );
if( OptimizerUtils.isSparkExecutionMode() )
{
if( counts.numJobs-counts.numReblocks == 0 )
{
//avoid unnecessary lazy spark context creation on access to memory configurations
sb.append( "?MB/?MB" );
}
else //default
{
sb.append( OptimizerUtils.toMB(SparkExecutionContext.getConfiguredTotalDataMemory()) );
sb.append( "MB/" );
sb.append( OptimizerUtils.toMB(SparkExecutionContext.getBroadcastMemoryBudget()) );
sb.append( "MB" );
}
}
else
{
sb.append( OptimizerUtils.toMB(OptimizerUtils.getRemoteMemBudgetMap()) );
sb.append( "MB/" );
sb.append( OptimizerUtils.toMB(OptimizerUtils.getRemoteMemBudgetReduce()) );
sb.append( "MB" );
}
return sb.toString();
}
/**
*
* @return
*/
public static String explainDegreeOfParallelism()
{
return explainDegreeOfParallelism(new ExplainCounts());
}
/**
*
* @return
*/
public static String explainDegreeOfParallelism(ExplainCounts counts)
{
int lk = InfrastructureAnalyzer.getLocalParallelism();
StringBuilder sb = new StringBuilder();
sb.append( "# Degree of Parallelism (vcores) local/remote = " );
sb.append( lk );
sb.append( "/" );
if( OptimizerUtils.isSparkExecutionMode() ) //SP
{
if( counts.numJobs-counts.numReblocks == 0 )
{
//avoid unnecessary lazy spark context creation on access to memory configurations
sb.append( "?" );
}
else //default
{
int rk = SparkExecutionContext.getDefaultParallelism();
sb.append( rk );
}
}
else //MR
{
int rk = InfrastructureAnalyzer.getRemoteParallelMapTasks();
int rk2 = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
//correction max number of mappers/reducers on yarn clusters
if( InfrastructureAnalyzer.isYarnEnabled() ){
rk = (int)Math.max(rk, YarnClusterAnalyzer.getNumCores());
rk2 = (int)Math.max(rk2, YarnClusterAnalyzer.getNumCores()/2);
}
sb.append( rk );
sb.append( "/" );
sb.append( rk2 );
}
return sb.toString();
}
/**
*
* @param prog
* @param rtprog
* @param type
* @return
* @throws LanguageException
* @throws DMLRuntimeException
* @throws HopsException
*/
public static String explain(DMLProgram prog, Program rtprog, ExplainType type)
throws HopsException, DMLRuntimeException, LanguageException
{
//dispatch to individual explain utils
switch( type ) {
//explain hops with stats
case HOPS:
case RECOMPILE_HOPS:
return explain(prog);
//explain runtime program
case RUNTIME:
case RECOMPILE_RUNTIME:
return explain(rtprog);
case NONE:
//do nothing
}
return null;
}
/**
*
* @param dmlp
* @return
* @throws LanguageException
* @throws HopsException
* @throws DMLRuntimeException
*/
public static String explain(DMLProgram prog)
throws HopsException, DMLRuntimeException, LanguageException
{
StringBuilder sb = new StringBuilder();
//create header
sb.append("\nPROGRAM\n");
// Explain functions (if exists)
boolean firstFunction = true;
for (String namespace : prog.getNamespaces().keySet()){
for (String fname : prog.getFunctionStatementBlocks(namespace).keySet()){
if (firstFunction) {
sb.append("--FUNCTIONS\n");
firstFunction = false;
}
//show function call dag
sb.append("----FUNCTION CALL DAG\n");
sb.append("------MAIN PROGRAM\n");
HashSet<String> fstack = new HashSet<String>();
HashSet<String> lfset = new HashSet<String>();
for( StatementBlock sblk : prog.getStatementBlocks() )
sb.append(explainFunctionCallDag(sblk, fstack, lfset, 3));
//show individual functions
FunctionStatementBlock fsb = prog.getFunctionStatementBlock(namespace, fname);
FunctionStatement fstmt = (FunctionStatement) fsb.getStatement(0);
if (fstmt instanceof ExternalFunctionStatement)
sb.append("----EXTERNAL FUNCTION " + namespace + "::" + fname + "\n");
else {
sb.append("----FUNCTION " + namespace + "::" + fname + " [recompile="+fsb.isRecompileOnce()+"]\n");
for (StatementBlock current : fstmt.getBody())
sb.append(explainStatementBlock(current, 3));
}
}
}
// Explain main program
sb.append("--MAIN PROGRAM\n");
for( StatementBlock sblk : prog.getStatementBlocks() )
sb.append(explainStatementBlock(sblk, 2));
return sb.toString();
}
/**
*
* @param rtprog
* @return
* @throws HopsException
*/
public static String explain( Program rtprog )
throws HopsException
{
//counts number of instructions
boolean sparkExec = OptimizerUtils.isSparkExecutionMode();
ExplainCounts counts = new ExplainCounts();
countCompiledInstructions(rtprog, counts, !sparkExec, true, sparkExec);
StringBuilder sb = new StringBuilder();
//create header
sb.append("\nPROGRAM ( size CP/"+(sparkExec?"SP":"MR")+" = ");
sb.append(counts.numCPInst);
sb.append("/");
sb.append(counts.numJobs);
sb.append(" )\n");
//explain functions (if exists)
Map<String, FunctionProgramBlock> funcMap = rtprog.getFunctionProgramBlocks();
if( funcMap != null && !funcMap.isEmpty() )
{
sb.append("--FUNCTIONS\n");
//show function call dag
if( !rtprog.getProgramBlocks().isEmpty() &&
rtprog.getProgramBlocks().get(0).getStatementBlock() != null )
{
sb.append("----FUNCTION CALL DAG\n");
sb.append("------MAIN PROGRAM\n");
DMLProgram prog = rtprog.getProgramBlocks().get(0).getStatementBlock().getDMLProg();
HashSet<String> fstack = new HashSet<String>();
HashSet<String> lfset = new HashSet<String>();
for( StatementBlock sblk : prog.getStatementBlocks() )
sb.append(explainFunctionCallDag(sblk, fstack, lfset, 3));
}
//show individual functions
for( Entry<String, FunctionProgramBlock> e : funcMap.entrySet() )
{
String fkey = e.getKey();
FunctionProgramBlock fpb = e.getValue();
if( fpb instanceof ExternalFunctionProgramBlock )
sb.append("----EXTERNAL FUNCTION "+fkey+"\n");
else
{
sb.append("----FUNCTION "+fkey+" [recompile="+fpb.isRecompileOnce()+"]\n");
for( ProgramBlock pb : fpb.getChildBlocks() )
sb.append( explainProgramBlock(pb,3) );
}
}
}
//explain main program
sb.append("--MAIN PROGRAM\n");
for( ProgramBlock pb : rtprog.getProgramBlocks() )
sb.append( explainProgramBlock(pb,2) );
return sb.toString();
}
/**
*
* @param pb
* @return
*/
public static String explain( ProgramBlock pb )
{
return explainProgramBlock(pb, 0);
}
/**
*
* @param inst
* @return
*/
public static String explain( ArrayList<Instruction> inst )
{
return explainInstructions(inst, 0);
}
/**
*
* @param inst
* @param level
* @return
*/
public static String explain( ArrayList<Instruction> inst, int level )
{
return explainInstructions(inst, level);
}
/**
*
* @param inst
* @return
*/
public static String explain( Instruction inst )
{
return explainGenericInstruction(inst, 0);
}
/**
*
* @param sb
* @return
* @throws DMLRuntimeException
* @throws HopsException
*/
public static String explain( StatementBlock sb )
throws HopsException, DMLRuntimeException
{
return explainStatementBlock(sb, 0);
}
/**
*
* @param hops
* @return
* @throws DMLRuntimeException
*/
public static String explainHops( ArrayList<Hop> hops )
throws DMLRuntimeException
{
return explainHops(hops, 0);
}
/**
*
* @param hops
* @param level
* @return
* @throws DMLRuntimeException
*/
public static String explainHops( ArrayList<Hop> hops, int level )
throws DMLRuntimeException
{
StringBuilder sb = new StringBuilder();
Hop.resetVisitStatus(hops);
for( Hop hop : hops )
sb.append(explainHop(hop, level));
Hop.resetVisitStatus(hops);
return sb.toString();
}
/**
*
* @param hop
* @return
* @throws DMLRuntimeException
*/
public static String explain( Hop hop )
throws DMLRuntimeException
{
return explain(hop, 0);
}
/**
*
* @param hop
* @return
* @throws DMLRuntimeException
*/
public static String explain( Hop hop, int level )
throws DMLRuntimeException
{
hop.resetVisitStatus();
String ret = explainHop(hop, level);
hop.resetVisitStatus();
return ret;
}
/**
*
* @param gdfnodes
* @return
* @throws DMLRuntimeException
*/
public static String explainGDFNodes( ArrayList<GDFNode> gdfnodes )
throws DMLRuntimeException
{
return explainGDFNodes(gdfnodes, 0);
}
/**
*
* @param gdfnodes
* @param level
* @return
* @throws DMLRuntimeException
*/
public static String explainGDFNodes( ArrayList<GDFNode> gdfnodes, int level )
throws DMLRuntimeException
{
StringBuilder sb = new StringBuilder();
HashSet<Long> memo = new HashSet<Long>();
for( GDFNode gnode : gdfnodes )
sb.append(explainGDFNode(gnode, level, memo));
return sb.toString();
}
/**
* Counts the number of compiled MRJob/Spark instructions in the
* given runtime program.
*
* @param rtprog
* @return
*/
public static ExplainCounts countDistributedOperations( Program rtprog )
{
ExplainCounts counts = new ExplainCounts();
if( OptimizerUtils.isSparkExecutionMode() )
Explain.countCompiledInstructions(rtprog, counts, false, true, true);
else
Explain.countCompiledInstructions(rtprog, counts, true, true, false);
return counts;
}
/**
*
* @param arg
* @return
* @throws DMLException
*/
public static ExplainType parseExplainType( String arg )
throws DMLException
{
ExplainType ret = ExplainType.NONE;
if( arg !=null )
{
if( arg.equalsIgnoreCase("hops") )
ret = ExplainType.HOPS;
else if( arg.equalsIgnoreCase("runtime") )
ret = ExplainType.RUNTIME;
else if( arg.equalsIgnoreCase("recompile_hops") )
ret = ExplainType.RECOMPILE_HOPS;
else if( arg.equalsIgnoreCase("recompile_runtime") )
ret = ExplainType.RECOMPILE_RUNTIME;
else
throw new DMLException("Failed to parse explain type: "+arg+" " +
"(valid types: hops, runtime, recompile_hops, recompile_runtime).");
}
return ret;
}
/**
*
* @param level
* @return
*/
public static String getIdentation( int level ) {
return createOffset(level);
}
//////////////
// internal explain HOPS
private static String explainStatementBlock(StatementBlock sb, int level)
throws HopsException, DMLRuntimeException
{
StringBuilder builder = new StringBuilder();
String offset = createOffset(level);
if (sb instanceof WhileStatementBlock) {
WhileStatementBlock wsb = (WhileStatementBlock) sb;
builder.append(offset);
builder.append("WHILE (lines "+wsb.getBeginLine()+"-"+wsb.getEndLine()+")\n");
builder.append(explainHop(wsb.getPredicateHops(), level+1));
WhileStatement ws = (WhileStatement)sb.getStatement(0);
for (StatementBlock current : ws.getBody())
builder.append(explainStatementBlock(current, level+1));
}
else if (sb instanceof IfStatementBlock) {
IfStatementBlock ifsb = (IfStatementBlock) sb;
builder.append(offset);
builder.append("IF (lines "+ifsb.getBeginLine()+"-"+ifsb.getEndLine()+")\n");
builder.append(explainHop(ifsb.getPredicateHops(), level+1));
IfStatement ifs = (IfStatement) sb.getStatement(0);
for (StatementBlock current : ifs.getIfBody())
builder.append(explainStatementBlock(current, level+1));
if( !ifs.getElseBody().isEmpty() ) {
builder.append(offset);
builder.append("ELSE\n");
}
for (StatementBlock current : ifs.getElseBody())
builder.append(explainStatementBlock(current, level+1));
}
else if (sb instanceof ForStatementBlock) {
ForStatementBlock fsb = (ForStatementBlock) sb;
builder.append(offset);
if (sb instanceof ParForStatementBlock)
builder.append("PARFOR (lines "+fsb.getBeginLine()+"-"+fsb.getEndLine()+")\n");
else
builder.append("FOR (lines "+fsb.getBeginLine()+"-"+fsb.getEndLine()+")\n");
if (fsb.getFromHops() != null)
builder.append(explainHop(fsb.getFromHops(), level+1));
if (fsb.getToHops() != null)
builder.append(explainHop(fsb.getToHops(), level+1));
if (fsb.getIncrementHops() != null)
builder.append(explainHop(fsb.getIncrementHops(), level+1));
ForStatement fs = (ForStatement)sb.getStatement(0);
for (StatementBlock current : fs.getBody())
builder.append(explainStatementBlock(current, level+1));
}
else if (sb instanceof FunctionStatementBlock) {
FunctionStatement fsb = (FunctionStatement) sb.getStatement(0);
for (StatementBlock current : fsb.getBody())
builder.append(explainStatementBlock(current, level+1));
}
else {
// For generic StatementBlock
builder.append(offset);
builder.append("GENERIC (lines "+sb.getBeginLine()+"-"+sb.getEndLine()+") [recompile=" + sb.requiresRecompilation() + "]\n");
ArrayList<Hop> hopsDAG = sb.get_hops();
if( hopsDAG != null && !hopsDAG.isEmpty() ) {
Hop.resetVisitStatus(hopsDAG);
for (Hop hop : hopsDAG)
builder.append(explainHop(hop, level+1));
Hop.resetVisitStatus(hopsDAG);
}
}
return builder.toString();
}
/**
* Do a post-order traverse through the HopDag and explain each Hop
*
* @param hop
* @param level
* @return
* @throws DMLRuntimeException
*/
private static String explainHop(Hop hop, int level)
throws DMLRuntimeException
{
if( hop.getVisited() == VisitStatus.DONE
|| (!SHOW_LITERAL_HOPS && hop instanceof LiteralOp) )
{
return "";
}
StringBuilder sb = new StringBuilder();
String offset = createOffset(level);
for( Hop input : hop.getInput() )
sb.append(explainHop(input, level));
//indentation
sb.append(offset);
//hop id
if( SHOW_DATA_DEPENDENCIES )
sb.append("("+hop.getHopID()+") ");
//operation string
sb.append(hop.getOpString());
//input hop references
if( SHOW_DATA_DEPENDENCIES ) {
StringBuilder childs = new StringBuilder();
childs.append(" (");
boolean childAdded = false;
for( Hop input : hop.getInput() )
if( !(input instanceof LiteralOp) ){
childs.append(childAdded?",":"");
childs.append(input.getHopID());
childAdded = true;
}
childs.append(")");
if( childAdded )
sb.append(childs.toString());
}
//matrix characteristics
sb.append(" [" + hop.getDim1() + ","
+ hop.getDim2() + ","
+ hop.getRowsInBlock() + ","
+ hop.getColsInBlock() + ","
+ hop.getNnz() + "]");
//memory estimates
sb.append(" [" + showMem(hop.getInputMemEstimate(), false) + ","
+ showMem(hop.getIntermediateMemEstimate(), false) + ","
+ showMem(hop.getOutputMemEstimate(), false) + " -> "
+ showMem(hop.getMemEstimate(), true) + "]");
//data flow properties
if( SHOW_DATA_FLOW_PROPERTIES ) {
if( hop.requiresReblock() && hop.requiresCheckpoint() )
sb.append(" [rblk,chkpt]");
else if( hop.requiresReblock() )
sb.append(" [rblk]");
else if( hop.requiresCheckpoint() )
sb.append(" [chkpt]");
}
//exec type
if (hop.getExecType() != null)
sb.append(", " + hop.getExecType());
sb.append('\n');
hop.setVisited(VisitStatus.DONE);
return sb.toString();
}
//////////////
// internal explain GDFNODE
/**
* Do a post-order traverse through the GDFNode DAG and explain each GDFNode.
* Note: nodes referring to literalops are suppressed.
*
* @param hop
* @param level
* @return
* @throws DMLRuntimeException
*/
private static String explainGDFNode(GDFNode gnode, int level, HashSet<Long> memo)
throws DMLRuntimeException
{
//basic memoization via memo table since gnode has no visit status
if( memo.contains(gnode.getID()) ||
gnode.getNodeType()==NodeType.HOP_NODE && gnode.getHop() instanceof LiteralOp )
{
return "";
}
StringBuilder sb = new StringBuilder();
String offset = createOffset(level);
for( GDFNode input : gnode.getInputs() )
sb.append(explainGDFNode(input, level, memo));
//indentation
sb.append(offset);
//hop id
String deps = null;
if( SHOW_DATA_DEPENDENCIES ) {
sb.append("("+gnode.getID()+") ");
StringBuilder childs = new StringBuilder();
childs.append(" (");
boolean childAdded = false;
for( GDFNode input : gnode.getInputs() ) {
childs.append(childAdded?",":"");
childs.append(input.getID());
childAdded = true;
}
childs.append(")");
if( childAdded )
deps = childs.toString();
}
//operation string
if( gnode instanceof GDFLoopNode ) //LOOP NODES
{
GDFLoopNode lgnode = (GDFLoopNode) gnode;
String offset2 = createOffset(level+1);
sb.append(lgnode.explain(deps)+"\n"); //loop header
sb.append(offset2+"PRED:\n");
sb.append(explainGDFNode(lgnode.getLoopPredicate(),level+2, memo));
sb.append(offset2+"BODY:\n");
//note: memo table and already done child explain prevents redundancy
for( Entry<String,GDFNode> root : lgnode.getLoopOutputs().entrySet() ) {
sb.append(explainGDFNode(root.getValue(), level+2, memo));
}
}
else //GENERAL CASE (BASIC/CROSSBLOCK NODES)
{
sb.append(gnode.explain(deps));
sb.append('\n');
}
/*
//matrix characteristics
sb.append(" [" + hop.getDim1() + ","
+ hop.getDim2() + ","
+ hop.getRowsInBlock() + ","
+ hop.getColsInBlock() + ","
+ hop.getNnz() + "]");
//memory estimates
sb.append(" [" + showMem(hop.getInputMemEstimate(), false) + ","
+ showMem(hop.getIntermediateMemEstimate(), false) + ","
+ showMem(hop.getOutputMemEstimate(), false) + " -> "
+ showMem(hop.getMemEstimate(), true) + "]");
//exec type
if (hop.getExecType() != null)
sb.append(", " + hop.getExecType());
*/
//memoization
memo.add(gnode.getID());
return sb.toString();
}
//////////////
// internal explain RUNTIME
/**
*
* @param pb
* @param level
* @return
*/
private static String explainProgramBlock( ProgramBlock pb, int level )
{
StringBuilder sb = new StringBuilder();
String offset = createOffset(level);
if (pb instanceof FunctionProgramBlock )
{
FunctionProgramBlock fpb = (FunctionProgramBlock)pb;
for( ProgramBlock pbc : fpb.getChildBlocks() )
sb.append( explainProgramBlock( pbc, level+1) );
}
else if (pb instanceof WhileProgramBlock)
{
WhileProgramBlock wpb = (WhileProgramBlock) pb;
sb.append(offset);
sb.append("WHILE (lines "+wpb.getBeginLine()+"-"+wpb.getEndLine()+")\n");
sb.append(explainInstructions(wpb.getPredicate(), level+1));
for( ProgramBlock pbc : wpb.getChildBlocks() )
sb.append( explainProgramBlock( pbc, level+1) );
}
else if (pb instanceof IfProgramBlock)
{
IfProgramBlock ipb = (IfProgramBlock) pb;
sb.append(offset);
sb.append("IF (lines "+ipb.getBeginLine()+"-"+ipb.getEndLine()+")\n");
sb.append(explainInstructions(ipb.getPredicate(), level+1));
for( ProgramBlock pbc : ipb.getChildBlocksIfBody() )
sb.append( explainProgramBlock( pbc, level+1) );
if( !ipb.getChildBlocksElseBody().isEmpty() )
{
sb.append(offset);
sb.append("ELSE\n");
for( ProgramBlock pbc : ipb.getChildBlocksElseBody() )
sb.append( explainProgramBlock( pbc, level+1) );
}
}
else if (pb instanceof ForProgramBlock) //incl parfor
{
ForProgramBlock fpb = (ForProgramBlock) pb;
sb.append(offset);
if( pb instanceof ParForProgramBlock )
sb.append("PARFOR (lines "+fpb.getBeginLine()+"-"+fpb.getEndLine()+")\n");
else
sb.append("FOR (lines "+fpb.getBeginLine()+"-"+fpb.getEndLine()+")\n");
sb.append(explainInstructions(fpb.getFromInstructions(), level+1));
sb.append(explainInstructions(fpb.getToInstructions(), level+1));
sb.append(explainInstructions(fpb.getIncrementInstructions(), level+1));
for( ProgramBlock pbc : fpb.getChildBlocks() )
sb.append( explainProgramBlock( pbc, level+1) );
}
else
{
sb.append(offset);
if( pb.getStatementBlock()!=null )
sb.append("GENERIC (lines "+pb.getBeginLine()+"-"+pb.getEndLine()+") [recompile="+pb.getStatementBlock().requiresRecompilation()+"]\n");
else
sb.append("GENERIC (lines "+pb.getBeginLine()+"-"+pb.getEndLine()+") \n");
sb.append(explainInstructions(pb.getInstructions(), level+1));
}
return sb.toString();
}
/**
*
* @param instSet
* @param level
* @return
*/
private static String explainInstructions( ArrayList<Instruction> instSet, int level )
{
StringBuilder sb = new StringBuilder();
String offsetInst = createOffset(level);
for( Instruction inst : instSet )
{
String tmp = explainGenericInstruction(inst, level);
sb.append( offsetInst );
sb.append( tmp );
sb.append( '\n' );
}
return sb.toString();
}
/**
*
* @param inst
* @return
*/
private static String explainGenericInstruction( Instruction inst, int level )
{
String tmp = null;
if( inst instanceof MRJobInstruction )
tmp = explainMRJobInstruction((MRJobInstruction)inst, level+1);
else if ( inst instanceof SPInstruction || inst instanceof CPInstruction)
tmp = inst.toString();
if( REPLACE_SPECIAL_CHARACTERS ){
tmp = tmp.replaceAll(Lop.OPERAND_DELIMITOR, " ");
tmp = tmp.replaceAll(Lop.DATATYPE_PREFIX, ".");
tmp = tmp.replaceAll(Lop.INSTRUCTION_DELIMITOR, ", ");
}
return tmp;
}
/**
*
* @param inst
* @param level
* @return
*/
private static String explainMRJobInstruction( MRJobInstruction inst, int level )
{
String instruction = "MR-Job[\n";
String offset = createOffset(level+1);
instruction += offset+" jobtype = " + inst.getJobType() + " \n";
instruction += offset+" input labels = " + Arrays.toString(inst.getInputVars()) + " \n";
instruction += offset+" recReader inst = " + inst.getIv_recordReaderInstructions() + " \n";
instruction += offset+" rand inst = " + inst.getIv_randInstructions() + " \n";
instruction += offset+" mapper inst = " + inst.getIv_instructionsInMapper() + " \n";
instruction += offset+" shuffle inst = " + inst.getIv_shuffleInstructions() + " \n";
instruction += offset+" agg inst = " + inst.getIv_aggInstructions() + " \n";
instruction += offset+" other inst = " + inst.getIv_otherInstructions() + " \n";
instruction += offset+" output labels = " + Arrays.toString(inst.getOutputVars()) + " \n";
instruction += offset+" result indices = " + inst.getString(inst.getIv_resultIndices()) + " \n";
//instruction += offset+"result dims unknown " + getString(iv_resultDimsUnknown) + " \n";
instruction += offset+" num reducers = " + inst.getIv_numReducers() + " \n";
instruction += offset+" replication = " + inst.getIv_replication() + " ]";
//instruction += offset+"]\n";
return instruction;
}
/**
*
* @param mem
* @return
*/
@SuppressWarnings("unused")
private static String showMem(double mem, boolean units)
{
if( !SHOW_MEM_ABOVE_BUDGET && mem >= OptimizerUtils.DEFAULT_SIZE )
return "MAX";
return OptimizerUtils.toMB(mem) + (units?"MB":"");
}
/**
*
* @param level
* @return
*/
private static String createOffset( int level )
{
StringBuilder sb = new StringBuilder();
for( int i=0; i<level; i++ )
sb.append("--");
return sb.toString();
}
/**
*
* @param rtprog
* @param counts
* @param MR
* @param CP
* @param SP
*/
private static void countCompiledInstructions( Program rtprog, ExplainCounts counts, boolean MR, boolean CP, boolean SP )
{
//analyze DML-bodied functions
for( FunctionProgramBlock fpb : rtprog.getFunctionProgramBlocks().values() )
countCompiledInstructions( fpb, counts, MR, CP, SP );
//analyze main program
for( ProgramBlock pb : rtprog.getProgramBlocks() )
countCompiledInstructions( pb, counts, MR, CP, SP );
}
/**
* Recursively counts the number of compiled MRJob instructions in the
* given runtime program block.
*
* @param pb
* @param counts
* @param MR
* @param CP
* @param SP
*/
private static void countCompiledInstructions(ProgramBlock pb, ExplainCounts counts, boolean MR, boolean CP, boolean SP)
{
if (pb instanceof WhileProgramBlock)
{
WhileProgramBlock tmp = (WhileProgramBlock)pb;
countCompiledInstructions(tmp.getPredicate(), counts, MR, CP, SP);
for (ProgramBlock pb2 : tmp.getChildBlocks())
countCompiledInstructions(pb2, counts, MR, CP, SP);
}
else if (pb instanceof IfProgramBlock)
{
IfProgramBlock tmp = (IfProgramBlock)pb;
countCompiledInstructions(tmp.getPredicate(), counts, MR, CP, SP);
for( ProgramBlock pb2 : tmp.getChildBlocksIfBody() )
countCompiledInstructions(pb2, counts, MR, CP, SP);
for( ProgramBlock pb2 : tmp.getChildBlocksElseBody() )
countCompiledInstructions(pb2, counts, MR, CP, SP);
}
else if (pb instanceof ForProgramBlock) //includes ParFORProgramBlock
{
ForProgramBlock tmp = (ForProgramBlock)pb;
countCompiledInstructions(tmp.getFromInstructions(), counts, MR, CP, SP);
countCompiledInstructions(tmp.getToInstructions(), counts, MR, CP, SP);
countCompiledInstructions(tmp.getIncrementInstructions(), counts, MR, CP, SP);
for( ProgramBlock pb2 : tmp.getChildBlocks() )
countCompiledInstructions(pb2, counts, MR, CP, SP);
//additional parfor jobs counted during runtime
}
else if ( pb instanceof FunctionProgramBlock ) //includes ExternalFunctionProgramBlock and ExternalFunctionProgramBlockCP
{
FunctionProgramBlock fpb = (FunctionProgramBlock)pb;
for( ProgramBlock pb2 : fpb.getChildBlocks() )
countCompiledInstructions(pb2, counts, MR, CP, SP);
}
else
{
countCompiledInstructions(pb.getInstructions(), counts, MR, CP, SP);
}
}
/**
*
* @param instSet
* @param counts
* @param MR
* @param CP
* @param SP
* @return
*/
private static int countCompiledInstructions( ArrayList<Instruction> instSet, ExplainCounts counts, boolean MR, boolean CP, boolean SP )
{
int ret = 0;
for( Instruction inst : instSet )
{
if( MR && inst instanceof MRJobInstruction )
counts.numJobs++;
else if( SP && inst instanceof CPInstruction )
counts.numCPInst++;
else if( SP && inst instanceof SPInstruction )
counts.numJobs++;
//keep track of reblocks (in order to prevent unnecessary spark context creation)
if( SP && (inst instanceof CSVReblockSPInstruction || inst instanceof ReblockSPInstruction) )
counts.numReblocks++;
}
return ret;
}
/**
*
* @param sb
* @param fstack
* @param lfset
* @param level
* @return
* @throws HopsException
*/
private static String explainFunctionCallDag(StatementBlock sb, HashSet<String> fstack, HashSet<String> lfset, int level)
throws HopsException
{
StringBuilder builder = new StringBuilder();
if (sb instanceof WhileStatementBlock) {
WhileStatement ws = (WhileStatement)sb.getStatement(0);
for (StatementBlock current : ws.getBody())
builder.append(explainFunctionCallDag(current, fstack, lfset, level));
}
else if (sb instanceof IfStatementBlock) {
IfStatement ifs = (IfStatement) sb.getStatement(0);
for (StatementBlock current : ifs.getIfBody())
builder.append(explainFunctionCallDag(current, fstack, lfset, level));
for (StatementBlock current : ifs.getElseBody())
builder.append(explainFunctionCallDag(current, fstack, lfset, level));
}
else if (sb instanceof ForStatementBlock) {
ForStatement fs = (ForStatement)sb.getStatement(0);
for (StatementBlock current : fs.getBody())
builder.append(explainFunctionCallDag(current, fstack, lfset, level));
}
else if (sb instanceof FunctionStatementBlock) {
FunctionStatement fsb = (FunctionStatement) sb.getStatement(0);
for (StatementBlock current : fsb.getBody())
builder.append(explainFunctionCallDag(current, fstack, lfset, level));
}
else {
// For generic StatementBlock
ArrayList<Hop> hopsDAG = sb.get_hops();
if( hopsDAG != null && !hopsDAG.isEmpty() ) {
//function ops can only occur as root nodes of the dag
for( Hop h : hopsDAG )
if( h instanceof FunctionOp ){
FunctionOp fop = (FunctionOp) h;
String fkey = DMLProgram.constructFunctionKey(fop.getFunctionNamespace(), fop.getFunctionName());
//prevent redundant call edges
if( !lfset.contains(fkey) )
{
//recursively explain function call dag
if( !fstack.contains(fkey) ) {
fstack.add(fkey);
String offset = createOffset(level);
builder.append(offset + "--" + fkey + "\n");
FunctionStatementBlock fsb = sb.getDMLProg()
.getFunctionStatementBlock(fop.getFunctionNamespace(), fop.getFunctionName());
FunctionStatement fs = (FunctionStatement) fsb.getStatement(0);
HashSet<String> lfset2 = new HashSet<String>();
for( StatementBlock csb : fs.getBody() )
builder.append(explainFunctionCallDag(csb, fstack, lfset2, level+1));
fstack.remove(fkey);
}
//recursive function call
else {
String offset = createOffset(level);
builder.append(offset + "-->" + fkey + " (recursive)\n");
}
//mark as visited for current function call context
lfset.add( fkey );
}
}
}
}
return builder.toString();
}
}