/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.nephele.jobgraph; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.Vector; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.core.fs.FSDataInputStream; import eu.stratosphere.core.fs.FileStatus; import eu.stratosphere.core.fs.FileSystem; import eu.stratosphere.core.fs.Path; import eu.stratosphere.core.io.IOReadableWritable; import eu.stratosphere.core.io.StringRecord; import eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager; import eu.stratosphere.util.ClassUtils; /** * A job graph represents an entire job in Nephele. A job graph must consists at least of one job vertex * and must be acyclic. * */ public class JobGraph implements IOReadableWritable { /** * List of input vertices included in this job graph. */ private Map<JobVertexID, AbstractJobInputVertex> inputVertices = new HashMap<JobVertexID, AbstractJobInputVertex>(); /** * List of output vertices included in this job graph. */ private Map<JobVertexID, AbstractJobOutputVertex> outputVertices = new HashMap<JobVertexID, AbstractJobOutputVertex>(); /** * List of task vertices included in this job graph. */ private Map<JobVertexID, JobTaskVertex> taskVertices = new HashMap<JobVertexID, JobTaskVertex>(); /** * ID of this job. */ private JobID jobID; /** * Name of this job. */ private String jobName; /** * The job configuration attached to this job. */ private Configuration jobConfiguration = new Configuration(); /** * The configuration which should be applied to the task managers involved in processing this job. */ private final Configuration taskManagerConfiguration = new Configuration(); /** * List of JAR files required to run this job. */ private final ArrayList<Path> userJars = new ArrayList<Path>(); /** * Size of the buffer to be allocated for transferring attached files. */ private static final int BUFFERSIZE = 8192; /** * Constructs a new job graph with a random job ID. */ public JobGraph() { this.jobID = new JobID(); } /** * Constructs a new job graph with the given name and a random job ID. * * @param jobName * the name for this job graph */ public JobGraph(final String jobName) { this(); this.jobName = jobName; } /** * Returns the name assigned to the job graph. * * @return the name assigned to the job graph */ public String getName() { return this.jobName; } /** * Returns the configuration object for this job if it is set. * * @return the configuration object for this job, or <code>null</code> if it is not set */ public Configuration getJobConfiguration() { return this.jobConfiguration; } /** * Returns the configuration object distributed among the task managers * before they start processing this job. * * @return the configuration object for the task managers, or <code>null</code> if it is not set */ public Configuration getTaskmanagerConfiguration() { return this.taskManagerConfiguration; } /** * Adds a new input vertex to the job graph if it is not already included. * * @param inputVertex * the new input vertex to be added */ public void addVertex(final AbstractJobInputVertex inputVertex) { if (!inputVertices.containsKey(inputVertex.getID())) { inputVertices.put(inputVertex.getID(), inputVertex); } } /** * Adds a new task vertex to the job graph if it is not already included. * * @param taskVertex * the new task vertex to be added */ public void addVertex(final JobTaskVertex taskVertex) { if (!taskVertices.containsKey(taskVertex.getID())) { taskVertices.put(taskVertex.getID(), taskVertex); } } /** * Adds a new output vertex to the job graph if it is not already included. * * @param outputVertex * the new output vertex to be added */ public void addVertex(final AbstractJobOutputVertex outputVertex) { if (!outputVertices.containsKey(outputVertex.getID())) { outputVertices.put(outputVertex.getID(), outputVertex); } } /** * Returns the number of input vertices registered with the job graph. * * @return the number of input vertices registered with the job graph */ public int getNumberOfInputVertices() { return this.inputVertices.size(); } /** * Returns the number of output vertices registered with the job graph. * * @return the number of output vertices registered with the job graph */ public int getNumberOfOutputVertices() { return this.outputVertices.size(); } /** * Returns the number of task vertices registered with the job graph. * * @return the number of task vertices registered with the job graph */ public int getNumberOfTaskVertices() { return this.taskVertices.size(); } /** * Returns an iterator to iterate all input vertices registered with the job graph. * * @return an iterator to iterate all input vertices registered with the job graph */ public Iterator<AbstractJobInputVertex> getInputVertices() { final Collection<AbstractJobInputVertex> coll = this.inputVertices.values(); return coll.iterator(); } /** * Returns an iterator to iterate all output vertices registered with the job graph. * * @return an iterator to iterate all output vertices registered with the job graph */ public Iterator<AbstractJobOutputVertex> getOutputVertices() { final Collection<AbstractJobOutputVertex> coll = this.outputVertices.values(); return coll.iterator(); } /** * Returns an iterator to iterate all task vertices registered with the job graph. * * @return an iterator to iterate all task vertices registered with the job graph */ public Iterator<JobTaskVertex> getTaskVertices() { final Collection<JobTaskVertex> coll = this.taskVertices.values(); return coll.iterator(); } /** * Returns the number of all job vertices registered with this job graph. * * @return the number of all job vertices registered with this job graph */ public int getNumberOfVertices() { return this.inputVertices.size() + this.outputVertices.size() + this.taskVertices.size(); } /** * Returns an array of all job vertices than can be reached when traversing the job graph from the input vertices. * * @return an array of all job vertices than can be reached when traversing the job graph from the input vertices */ public AbstractJobVertex[] getAllReachableJobVertices() { final Vector<AbstractJobVertex> collector = new Vector<AbstractJobVertex>(); collectVertices(null, collector); return collector.toArray(new AbstractJobVertex[0]); } /** * Returns an array of all job vertices that are registered with the job graph. The order in which the vertices * appear in the list is not defined. * * @return an array of all job vertices that are registered with the job graph */ public AbstractJobVertex[] getAllJobVertices() { int i = 0; final AbstractJobVertex[] vertices = new AbstractJobVertex[inputVertices.size() + outputVertices.size() + taskVertices.size()]; final Iterator<AbstractJobInputVertex> iv = getInputVertices(); while (iv.hasNext()) { vertices[i++] = iv.next(); } final Iterator<AbstractJobOutputVertex> ov = getOutputVertices(); while (ov.hasNext()) { vertices[i++] = ov.next(); } final Iterator<JobTaskVertex> tv = getTaskVertices(); while (tv.hasNext()) { vertices[i++] = tv.next(); } return vertices; } /** * Auxiliary method to collect all vertices which are reachable from the input vertices. * * @param jv * the currently considered job vertex * @param collector * a temporary list to store the vertices that have already been visisted */ private void collectVertices(final AbstractJobVertex jv, final List<AbstractJobVertex> collector) { if (jv == null) { final Iterator<AbstractJobInputVertex> iter = getInputVertices(); while (iter.hasNext()) { collectVertices(iter.next(), collector); } } else { if (!collector.contains(jv)) { collector.add(jv); } else { return; } for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) { collectVertices(jv.getForwardConnection(i).getConnectedVertex(), collector); } } } /** * Returns the ID of the job. * * @return the ID of the job */ public JobID getJobID() { return this.jobID; } /** * Searches for a vertex with a matching ID and returns it. * * @param id * the ID of the vertex to search for * @return the vertex with the matching ID or <code>null</code> if no vertex with such ID could be found */ public AbstractJobVertex findVertexByID(final JobVertexID id) { if (this.inputVertices.containsKey(id)) { return this.inputVertices.get(id); } if (this.outputVertices.containsKey(id)) { return this.outputVertices.get(id); } if (this.taskVertices.containsKey(id)) { return this.taskVertices.get(id); } return null; } /** * Checks if the job vertex with the given ID is registered with the job graph. * * @param id * the ID of the vertex to search for * @return <code>true</code> if a vertex with the given ID is registered with the job graph, <code>false</code> * otherwise. */ private boolean includedInJobGraph(final JobVertexID id) { if (this.inputVertices.containsKey(id)) { return true; } if (this.outputVertices.containsKey(id)) { return true; } if (this.taskVertices.containsKey(id)) { return true; } return false; } /** * Checks if the job graph is weakly connected. * * @return <code>true</code> if the job graph is weakly connected, otherwise <code>false</code> */ public boolean isWeaklyConnected() { final AbstractJobVertex[] reachable = getAllReachableJobVertices(); final AbstractJobVertex[] all = getAllJobVertices(); // Check if number if reachable vertices matches number of registered vertices if (reachable.length != all.length) { return false; } final HashMap<JobVertexID, AbstractJobVertex> tmp = new HashMap<JobVertexID, AbstractJobVertex>(); for (int i = 0; i < reachable.length; i++) { tmp.put(reachable[i].getID(), reachable[i]); } // Check if all is subset of reachable for (int i = 0; i < all.length; i++) { if (!tmp.containsKey(all[i].getID())) { return false; } } // Check if reachable is a subset of all for (int i = 0; i < reachable.length; i++) { if (!includedInJobGraph(reachable[i].getID())) { return false; } } return true; } /** * Checks if the job graph is acyclic. * * @return <code>true</code> if the job graph is acyclic, <code>false</code> otherwise */ public boolean isAcyclic() { // Tarjan's algorithm to detect strongly connected componenent of a graph final AbstractJobVertex[] reachable = getAllReachableJobVertices(); final HashMap<AbstractJobVertex, Integer> indexMap = new HashMap<AbstractJobVertex, Integer>(); final HashMap<AbstractJobVertex, Integer> lowLinkMap = new HashMap<AbstractJobVertex, Integer>(); final Stack<AbstractJobVertex> stack = new Stack<AbstractJobVertex>(); final Integer index = Integer.valueOf(0); for (int i = 0; i < reachable.length; i++) { if (!indexMap.containsKey(reachable[i])) { if (!tarjan(reachable[i], index, indexMap, lowLinkMap, stack)) { return false; } } } return true; } /** * Auxiliary method implementing Tarjan's algorithm for strongly-connected components to determine whether the job * graph is acyclic. */ private boolean tarjan(final AbstractJobVertex jv, Integer index, final HashMap<AbstractJobVertex, Integer> indexMap, final HashMap<AbstractJobVertex, Integer> lowLinkMap, final Stack<AbstractJobVertex> stack) { indexMap.put(jv, Integer.valueOf(index)); lowLinkMap.put(jv, Integer.valueOf(index)); index = Integer.valueOf(index.intValue() + 1); stack.push(jv); for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) { final AbstractJobVertex jv2 = jv.getForwardConnection(i).getConnectedVertex(); if (!indexMap.containsKey(jv2) || stack.contains(jv2)) { if (!indexMap.containsKey(jv2)) { if (!tarjan(jv2, index, indexMap, lowLinkMap, stack)) { return false; } } if (lowLinkMap.get(jv) > lowLinkMap.get(jv2)) { lowLinkMap.put(jv, Integer.valueOf(lowLinkMap.get(jv2))); } } } if (lowLinkMap.get(jv).equals(indexMap.get(jv))) { int count = 0; while (stack.size() > 0) { final AbstractJobVertex jv2 = stack.pop(); if (jv == jv2) { break; } count++; } if (count > 0) { return false; } } return true; } /** * Checks for all registered job vertices if their in-/out-degree is correct. * * @return <code>null</code> if the in-/out-degree of all vertices is correct or the first job vertex whose * in-/out-degree is incorrect. */ public AbstractJobVertex areVertexDegreesCorrect() { // Check input vertices final Iterator<AbstractJobInputVertex> iter = getInputVertices(); while (iter.hasNext()) { final AbstractJobVertex jv = iter.next(); if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() > 0) { return jv; } } // Check task vertices final Iterator<JobTaskVertex> iter2 = getTaskVertices(); while (iter2.hasNext()) { final AbstractJobVertex jv = iter2.next(); if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() < 1) { return jv; } } // Check output vertices final Iterator<AbstractJobOutputVertex> iter3 = getOutputVertices(); while (iter3.hasNext()) { final AbstractJobVertex jv = iter3.next(); if (jv.getNumberOfForwardConnections() > 0 || jv.getNumberOfBackwardConnections() < 1) { return jv; } } return null; } @Override public void read(final DataInput in) throws IOException { // Read job id this.jobID.read(in); // Read the job name this.jobName = StringRecord.readString(in); // Read required jar files readRequiredJarFiles(in); // First read total number of vertices; final int numVertices = in.readInt(); // First, recreate each vertex and add it to reconstructionMap for (int i = 0; i < numVertices; i++) { final String className = StringRecord.readString(in); final JobVertexID id = new JobVertexID(); id.read(in); final String vertexName = StringRecord.readString(in); Class<? extends IOReadableWritable> c; try { c = ClassUtils.getRecordByName(className); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe.toString()); } // Find constructor Constructor<? extends IOReadableWritable> cst; try { cst = c.getConstructor(String.class, JobVertexID.class, JobGraph.class); } catch (SecurityException e1) { throw new IOException(e1.toString()); } catch (NoSuchMethodException e1) { throw new IOException(e1.toString()); } try { cst.newInstance(vertexName, id, this); } catch (IllegalArgumentException e) { throw new IOException(e.toString()); } catch (InstantiationException e) { throw new IOException(e.toString()); } catch (IllegalAccessException e) { throw new IOException(e.toString()); } catch (InvocationTargetException e) { throw new IOException(e.toString()); } } final JobVertexID tmpID = new JobVertexID(); for (int i = 0; i < numVertices; i++) { AbstractJobVertex jv; tmpID.read(in); if (inputVertices.containsKey(tmpID)) { jv = inputVertices.get(tmpID); } else { if (outputVertices.containsKey(tmpID)) { jv = outputVertices.get(tmpID); } else { if (taskVertices.containsKey(tmpID)) { jv = taskVertices.get(tmpID); } else { throw new IOException("Cannot find vertex with ID " + tmpID + " in any vertex map."); } } } // Read the vertex data jv.read(in); } // Find the class loader for the job final ClassLoader cl = LibraryCacheManager.getClassLoader(this.jobID); if (cl == null) { throw new IOException("Cannot find class loader for job graph " + this.jobID); } // Re-instantiate the job configuration object and read the configuration this.jobConfiguration = new Configuration(cl); this.jobConfiguration.read(in); // Read the task manager configuration this.taskManagerConfiguration.read(in); } @Override public void write(final DataOutput out) throws IOException { // Write job ID this.jobID.write(out); // Write out job name StringRecord.writeString(out, this.jobName); final AbstractJobVertex[] allVertices = this.getAllJobVertices(); // Write out all required jar files writeRequiredJarFiles(out, allVertices); // Write total number of vertices out.writeInt(allVertices.length); // First write out class name and id for every vertex for (int i = 0; i < allVertices.length; i++) { final String className = allVertices[i].getClass().getName(); StringRecord.writeString(out, className); allVertices[i].getID().write(out); StringRecord.writeString(out, allVertices[i].getName()); } // Now write out vertices themselves for (int i = 0; i < allVertices.length; i++) { allVertices[i].getID().write(out); allVertices[i].write(out); } // Write out configuration objects this.jobConfiguration.write(out); this.taskManagerConfiguration.write(out); } /** * Writes the JAR files of all vertices in array <code>jobVertices</code> to the specified output stream. * * @param out * the output stream to write the JAR files to * @param jobVertices * array of job vertices whose required JAR file are to be written to the output stream * @throws IOException * thrown if an error occurs while writing to the stream */ private void writeRequiredJarFiles(final DataOutput out, final AbstractJobVertex[] jobVertices) throws IOException { // Now check if all the collected jar files really exist final FileSystem fs = FileSystem.getLocalFileSystem(); for (int i = 0; i < this.userJars.size(); i++) { if (!fs.exists(this.userJars.get(i))) { throw new IOException("Cannot find jar file " + this.userJars.get(i)); } } // How many jar files follow? out.writeInt(this.userJars.size()); for (int i = 0; i < this.userJars.size(); i++) { final Path jar = this.userJars.get(i); // Write out the actual path jar.write(out); // Write out the length of the file final FileStatus file = fs.getFileStatus(jar); out.writeLong(file.getLen()); // Now write the jar file final FSDataInputStream inStream = fs.open(this.userJars.get(i)); final byte[] buf = new byte[BUFFERSIZE]; int read = inStream.read(buf, 0, buf.length); while (read > 0) { out.write(buf, 0, read); read = inStream.read(buf, 0, buf.length); } } } /** * Reads required JAR files from an input stream and adds them to the * library cache manager. * * @param in * the data stream to read the JAR files from * @throws IOException * thrown if an error occurs while reading the stream */ private void readRequiredJarFiles(final DataInput in) throws IOException { // Do jar files follow; final int numJars = in.readInt(); if (numJars > 0) { for (int i = 0; i < numJars; i++) { final Path p = new Path(); p.read(in); this.userJars.add(p); // Read the size of the jar file final long sizeOfJar = in.readLong(); // Add the jar to the library manager LibraryCacheManager.addLibrary(this.jobID, p, sizeOfJar, in); } } // Register this job with the library cache manager LibraryCacheManager.register(this.jobID, this.userJars.toArray(new Path[0])); } /** * Adds the path of a JAR file required to run the job on a task manager. * * @param jar * path of the JAR file required to run the job on a task manager */ public void addJar(final Path jar) { if (jar == null) { return; } if (!userJars.contains(jar)) { userJars.add(jar); } } /** * Returns a (possibly empty) array of paths to JAR files which are required to run the job on a task manager. * * @return a (possibly empty) array of paths to JAR files which are required to run the job on a task manager */ public Path[] getJars() { return userJars.toArray(new Path[userJars.size()]); } /** * Checks if any vertex of this job graph has an outgoing edge which is set to <code>null</code>. If this is the * case the respective vertex is returned. * * @return the vertex which has an outgoing edge set to <code>null</code> or <code>null</code> if no such vertex * exists */ public AbstractJobVertex findVertexWithNullEdges() { final AbstractJobVertex[] allVertices = getAllJobVertices(); for (int i = 0; i < allVertices.length; i++) { for (int j = 0; j < allVertices[i].getNumberOfForwardConnections(); j++) { if (allVertices[i].getForwardConnection(j) == null) { return allVertices[i]; } } for (int j = 0; j < allVertices[i].getNumberOfBackwardConnections(); j++) { if (allVertices[i].getBackwardConnection(j) == null) { return allVertices[i]; } } } return null; } /** * Checks if the instance dependency chain created with the <code>setVertexToShareInstancesWith</code> method is * acyclic. * * @return <code>true</code> if the dependency chain is acyclic, <code>false</code> otherwise */ public boolean isInstanceDependencyChainAcyclic() { final AbstractJobVertex[] allVertices = this.getAllJobVertices(); final Set<AbstractJobVertex> alreadyVisited = new HashSet<AbstractJobVertex>(); for (AbstractJobVertex vertex : allVertices) { if (alreadyVisited.contains(vertex)) { continue; } AbstractJobVertex vertexToShareInstancesWith = vertex.getVertexToShareInstancesWith(); if (vertexToShareInstancesWith != null) { final Set<AbstractJobVertex> cycleMap = new HashSet<AbstractJobVertex>(); while (vertexToShareInstancesWith != null) { if (cycleMap.contains(vertexToShareInstancesWith)) { return false; } else { alreadyVisited.add(vertexToShareInstancesWith); cycleMap.add(vertexToShareInstancesWith); vertexToShareInstancesWith = vertexToShareInstancesWith.getVertexToShareInstancesWith(); } } } } return true; } }