package com.linkedin.cubert.app;
import com.linkedin.cubert.analyzer.physical.*;
import com.linkedin.cubert.plan.physical.ExecutorService;
import com.linkedin.cubert.plan.physical.PhysicalParser;
import com.linkedin.cubert.utils.ExecutionConfig;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.RewriteUtils;
import org.apache.commons.cli.ParseException;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Actual executor for Cubert CMR scripts.
*
* @author Mani Parkhe
*/
public class CmrExecutor
{
private static String CUBERT_PROP_IDENTIFIER = "param.";
private String program;
private JsonNode physicalPlan;
public CmrExecutor()
{
}
public void run(ExecutionSpec spec, Properties props)
throws Exception
{
program = spec.program;
// Step 1: Preprocess the file
if (props != null)
preprocess(props);
// command line option: if -s is set, show the code and exit
if (spec.preprocessOnly)
{
System.out.println(program);
return;
}
try
{
// Step 2: Translate to physical json plan
parse();
if (spec.parseOnly)
return;
// Step 3: compile plan
compile(spec.debugMode);
if (spec.compileOnly)
return;
}
catch (Exception e)
{
if (e instanceof PlanRewriteException)
{
System.err.println(e.getMessage());
if (spec.debugMode)
e.printStackTrace(System.err);
}
System.err.println("\nCannot parse cubert script. Exiting.");
throw e;
}
finally
{
if (spec.printJson)
printPhysical();
}
if (spec.describe)
{
new DescribePlan().describe(physicalPlan);
return;
}
if (physicalPlan != null)
{
((ObjectNode) physicalPlan).put("profileMode", spec.profileMode);
}
ExecutionConfig.getInstance().setParallelExec(spec.parallel);
// Prepare for execution. Add paths to distributed cache
if (spec.libJars != null)
{
ArrayNode libjars = (ArrayNode) physicalPlan.get("libjars");
for (String jarPath : spec.libJars)
{
libjars.add(jarPath);
}
}
// Step 4: execute jobs
execute(spec.jobs);
}
private void preprocess(Properties props)
throws
InstantiationException,
IllegalAccessException,
ClassNotFoundException,
IOException,
InterruptedException,
ParseException,
ScriptException
{
// get the list of macro variables in the script
Matcher matcher = Pattern.compile("\\$[a-zA-Z_][a-zA-Z0-9_]*").matcher(program);
Set<String> variables = new HashSet<String>();
while (matcher.find())
{
String key = matcher.group(0);
key = key.substring(1, key.length()); // remove the leading $
variables.add(key);
}
// check if the script has javascript
matcher = Pattern.compile("<javascript>(.*?)</javascript>", Pattern.DOTALL)
.matcher(program);
boolean scriptFound = false;
StringBuilder scriptBuilder = new StringBuilder();
StringBuffer sb = new StringBuffer();
while (matcher.find())
{
scriptFound = true;
scriptBuilder.append(matcher.group(1));
matcher.appendReplacement(sb, "");
}
matcher.appendTail(sb);
// if it does, then execute the javascript
if (scriptFound)
{
program = sb.toString();
String javascript = scriptBuilder.toString();
// get the java script engine
ScriptEngine engine =
(new ScriptEngineManager()).getEngineByName("JavaScript");
// introduce the base properties within the javascript engine
for (Object key : props.keySet())
{
String escapedKey = ((String) key).replaceAll("[^A-Za-z0-9]", "");
String cmd = String.format("var %s = \"%s\";",
escapedKey,
props.getProperty((String) key));
engine.eval(cmd);
}
// execute the java script
engine.eval(javascript);
// go over all macros variables and see if they are defined in the java script
Properties scriptProps = new Properties();
for (String variable : variables)
{
// see if it is defined in the javascript
String snippet = String.format("typeof %s !== 'undefined'", variable);
if ((Boolean) engine.eval(snippet))
{
String value = engine.eval(variable).toString();
scriptProps.put(variable, value);
}
}
// replace the variables found in the script
if (scriptProps.size() > 0)
replaceVariables(scriptProps);
}
// The macro variables that are not already defined in the props, may be defined
// as environment variables
boolean needCubertTemp = false;
for (String variable : variables)
{
if (!props.containsKey(variable))
{
String value = System.getenv(variable);
if (value != null)
props.put(variable, value);
else if (!needCubertTemp && variable.equals("CUBERT_TEMP"))
needCubertTemp = true;
}
}
if (needCubertTemp)
{
Random rand = new Random();
long suffix = 1;
while (suffix < 100000L)
suffix = Math.abs(rand.nextLong());
String cubertTemp = String.format("CUBERT_TEMP__%d", suffix);
System.out.println(
"variable CUBERT_TEMP not defined. Using value '" + cubertTemp + "'");
props.put("CUBERT_TEMP", cubertTemp);
}
// Substitute the base variable
if (props != null && props.size() > 0)
replaceVariables(props);
// Substitute backticks
substituteBackticks();
}
private void replaceVariables(Properties props)
{
List<Object> keys = new ArrayList<Object>(props.keySet());
Collections.sort(keys, new Comparator<Object>()
{
@Override public int compare(Object o1, Object o2)
{
String s1 = (String) o1;
String s2 = (String) o2;
return s2.length() - s1.length();
}
});
for (Object key : keys)
{
program = program.replaceAll("\\$" + key,
Matcher.quoteReplacement(((String) props.get(key))));
}
}
private void substituteBackticks()
throws IOException, InterruptedException
{
Pattern pattern = Pattern.compile("`([^`]+)`");
Matcher m = pattern.matcher(program);
StringBuffer sb = new StringBuffer();
while (m.find())
{
final String cmdString = m.group(1);
String[] cmdArgs = new String[3];
cmdArgs[0] = "bash";
cmdArgs[1] = "-c";
cmdArgs[2] = "exec " + cmdString;
final Process p = Runtime.getRuntime().exec(cmdArgs);
p.waitFor();
final InputStream inputStream = p.getInputStream();
byte[] buf = new byte[inputStream.available()];
inputStream.read(buf); // TODO: read fully
String repl = new String(buf).trim().replace("$", "\\$");
m.appendReplacement(sb, repl);
}
m.appendTail(sb);
program = sb.toString();
}
/**
* Parse the program and convert script into physical plan (json)
*
* @throws IOException
* @throws java.text.ParseException
*/
private void parse()
throws IOException, java.text.ParseException
{
physicalPlan = PhysicalParser.parseProgram(program);
}
/**
* Compile and Rewrite physical plan.
*
* @param debugMode
* @throws IOException
* @throws InstantiationException
* @throws IllegalAccessException
*/
private void compile(boolean debugMode)
throws IOException, InstantiationException, IllegalAccessException
{
VariableNameUsed nameUsedVisitor = new VariableNameUsed();
new PhysicalPlanWalker(physicalPlan, nameUsedVisitor).walk();
Set<String> namesUsed = nameUsedVisitor.getUsedNames();
List<Class<? extends PlanRewriter>> rewriters =
new ArrayList<Class<? extends PlanRewriter>>();
rewriters.add(ShuffleRewriter.class);
rewriters.add(CachedFileAnalyzer.class);
rewriters.add(DependencyAnalyzer.class);
rewriters.add(OverwriteAnalyzer.class);
rewriters.add(BlockgenLineageAnalyzer.class);
rewriters.add(SemanticAnalyzer.class);
rewriters.add(SummaryRewriter.class);
rewriters.add(OverwriteAnalyzer.class);
rewriters.add(BlockgenLineageAnalyzer.class);
rewriters.add(DependencyAnalyzer.class);
rewriters.add(SemanticAnalyzer.class);
HashSet<Class<? extends PlanRewriter>> visitedRewriters =
new HashSet<Class<? extends PlanRewriter>>();
for (Class<? extends PlanRewriter> rewriterClass : rewriters)
{
boolean revisit = (visitedRewriters.contains(rewriterClass) ? true : false);
// revisit only performed, in the event of actual summary compile
if (revisit && !RewriteUtils.hasSummaryRewrite((ObjectNode) physicalPlan))
continue;
physicalPlan = rewriterClass.newInstance()
.rewrite(physicalPlan,
namesUsed,
debugMode,
revisit);
visitedRewriters.add(rewriterClass);
if (debugMode)
{
if (rewriterClass == SummaryRewriter.class
&& RewriteUtils.hasSummaryRewrite((ObjectNode) physicalPlan))
{
System.out.println("Physical plan after summary compile");
this.printPhysical();
}
if (rewriterClass == SemanticAnalyzer.class && revisit)
{
System.out.println("Physical plan after semantic analyzer visit ");
this.printPhysical();
}
}
if (physicalPlan == null)
return;
}
}
@SuppressWarnings("deprecation") private void printPhysical()
throws IOException
{
System.out.println(new ObjectMapper().defaultPrettyPrintingWriter()
.writeValueAsString(physicalPlan));
}
private void execute(List<String> jobs)
throws
IOException,
InterruptedException,
ClassNotFoundException,
InstantiationException,
IllegalAccessException
{
ExecutorService executorService = new ExecutorService(physicalPlan);
if (jobs == null)
{
executorService.execute();
return;
}
// else
Set<Integer> jobsToRun = new TreeSet<Integer>();
for (String idStr : jobs)
jobsToRun.add(getJobId(idStr));
for (int id : jobsToRun)
executorService.execute(id);
}
private int getJobId(String idStr)
{
int id;
try
{
id = Integer.parseInt(idStr);
}
catch (NumberFormatException e)
{
Map<Integer, String> matchedJobs = new HashMap<Integer, String>();
String jobToRun = idStr;
id = 0;
for (JsonNode job : physicalPlan.get("jobs"))
{
String jobName = JsonUtils.getText(job, "name");
if (jobName.contains(jobToRun))
{
matchedJobs.put(id, jobName);
}
id++;
}
if (matchedJobs.isEmpty())
{
throw new IllegalStateException(
"ERROR: There is no job that matches [" + jobToRun + "]");
}
if (matchedJobs.size() > 1)
{
System.err.println(
"ERROR: There are more than one jobs that matches [" + jobToRun
+ "]:");
for (Map.Entry<Integer, String> entry : matchedJobs.entrySet())
System.err.println(String.format("\t[%d] %s",
entry.getKey(),
entry.getValue()));
throw new IllegalStateException();
}
id = matchedJobs.keySet().iterator().next();
}
return id;
}
}