/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.plan.physical;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.JsonNodeFactory;
import com.linkedin.cubert.utils.print;
/**
* Generates the execution plan of a DAG of dependencies, using topological sort.
*
* Dependencies are added via the {@code addNode} method, and the plan is obtained via the
* {@code getSerialPlan} method.
*
* @author Maneesh Varshney
*
*/
public class DependencyGraph
{
static enum Color
{
WHITE, GRAY, BLACK
}
static final class GraphNode
{
final String name;
final JsonNode json;
final List<String> parents = new ArrayList<String>();
final List<GraphNode> children = new ArrayList<GraphNode>();
Color color = Color.WHITE;
GraphNode(String name, List<String> parents, JsonNode json)
{
this.name = name;
this.json = json;
if (parents != null)
this.parents.addAll(parents);
}
}
private final Map<String, GraphNode> graphNodes = new HashMap<String, GraphNode>();
public void addNode(String name, List<String> parents, JsonNode json)
{
if (graphNodes.containsKey(name))
{
throw new IllegalArgumentException("Node [" + name + "] already exists");
}
GraphNode node = new GraphNode(name, parents, json);
graphNodes.put(name, node);
}
public List<JsonNode> getSerialPlan()
{
List<JsonNode> plan = new ArrayList<JsonNode>();
Set<String> whiteNodes = new HashSet<String>();
List<String> inputNodes = new ArrayList<String>();
setChildren();
// initialize state
for (GraphNode node : graphNodes.values())
{
if (node.parents.isEmpty())
{
inputNodes.add(node.name);
}
else
{
whiteNodes.add(node.name);
}
node.color = Color.WHITE;
}
// first visit all input nodes
for (String inputNode : inputNodes)
{
visit(inputNode, whiteNodes, plan);
}
// visit other nodes
while (!whiteNodes.isEmpty())
{
String next = whiteNodes.iterator().next();
visit(next, whiteNodes, plan);
}
Collections.reverse(plan);
return plan;
}
private void visit(String inputNode, Set<String> whiteNodes, List<JsonNode> plan)
{
GraphNode node = graphNodes.get(inputNode);
if (node.color == Color.GRAY)
{
throw new IllegalStateException("Cannot create plan for graph with cyclic dependency");
}
if (node.color == Color.WHITE)
{
node.color = Color.GRAY;
for (GraphNode child : node.children)
{
visit(child.name, whiteNodes, plan);
}
node.color = Color.BLACK;
whiteNodes.remove(node.name);
plan.add(node.json);
}
}
public void setChildren()
{
// clear out children list first
for (GraphNode node : graphNodes.values())
{
node.children.clear();
}
for (GraphNode node : graphNodes.values())
{
for (String parent : node.parents)
{
GraphNode parentNode = graphNodes.get(parent);
if (parentNode == null)
print.f("parent is null for %s %s", node.name, parent);
parentNode.children.add(node);
}
}
}
public boolean hasUnfinishedJobs()
{
for (GraphNode node : graphNodes.values())
{
if (node.color == Color.WHITE || node.color == Color.GRAY)
return true;
}
return false;
}
public List<JsonNode> getReadyJobs()
{
List<JsonNode> readyJobs = new ArrayList<JsonNode>();
for (GraphNode node : graphNodes.values())
{
if (readyForScheduling(node))
{
readyJobs.add(node.json);
}
}
return readyJobs;
}
private boolean readyForScheduling(GraphNode node)
{
if (node.color == Color.WHITE)
{
for (String parent : node.parents)
{
GraphNode parentNode = graphNodes.get(parent);
if (parentNode.color == Color.WHITE || parentNode.color == Color.GRAY)
{
return false;
}
}
return true;
}
return false;
}
public void setJobToScheduled(JsonNode job)
{
String jobName = job.get("name").getTextValue();
GraphNode inProgressJob = graphNodes.get(jobName);
inProgressJob.color = Color.GRAY;
}
public void setJobToFinished(JsonNode job)
{
String jobName = job.get("name").getTextValue();
GraphNode finishedJob = graphNodes.get(jobName);
finishedJob.color = Color.BLACK;
}
public String prettyPrint(List<String> jobNames)
{
String retVal = "\nDependency graph\n";
for (String name : jobNames)
{
List<String> nodes = graphNodes.get(name).parents;
if (nodes.size() == 0)
{
retVal += (name + " is independent\n");
}
else
{
String listOfNames = "";
for (String parentName : nodes)
{
listOfNames += (parentName + ", ");
}
retVal +=
(name + " depends on "
+ listOfNames.substring(0, listOfNames.length() - 2) + "\n");
}
}
return retVal;
}
public static void main(String[] args)
{
DependencyGraph g = new DependencyGraph();
JsonNodeFactory nc = JsonNodeFactory.instance;
JsonNode a = nc.numberNode(1);
JsonNode b = nc.numberNode(2);
JsonNode c = nc.numberNode(3);
JsonNode d = nc.numberNode(4);
JsonNode e = nc.numberNode(5);
JsonNode f = nc.numberNode(6);
JsonNode h = nc.numberNode(7);
JsonNode i = nc.numberNode(8);
g.addNode("input", null, a);
g.addNode("loaddict", null, b);
g.addNode("second", null, c);
g.addNode("encode", Arrays.asList(new String[] { "input", "loaddict" }), d);
g.addNode("groupby", Arrays.asList(new String[] { "encode" }), e);
g.addNode("filter", Arrays.asList(new String[] { "groupby" }), f);
g.addNode("join", Arrays.asList(new String[] { "filter", "second" }), h);
g.addNode("shuffle", Arrays.asList(new String[] { "join" }), i);
System.out.println(g.getSerialPlan());
}
}