/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.common.jsonexplain; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.jsonexplain.Vertex.VertexType; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; public final class Stage { //external name is used to show at the console String externalName; //internal name is used to track the stages public final String internalName; //tezJsonParser public final DagJsonParser parser; // upstream stages, e.g., root stage public final List<Stage> parentStages = new ArrayList<>(); // downstream stages. public final List<Stage> childStages = new ArrayList<>(); public final Map<String, Vertex> vertexs =new LinkedHashMap<>(); public final Map<String, String> attrs = new TreeMap<>(); Map<Vertex, List<Connection>> tezStageDependency; // some stage may contain only a single operator, e.g., create table operator, // fetch operator. Op op; public Stage(String name, DagJsonParser tezJsonParser) { super(); internalName = name; externalName = name; parser = tezJsonParser; } public void addDependency(JSONObject object, Map<String, Stage> stages) throws JSONException { if (object.has("DEPENDENT STAGES")) { String names = object.getString("DEPENDENT STAGES"); for (String name : names.split(",")) { Stage parent = stages.get(name.trim()); this.parentStages.add(parent); parent.childStages.add(this); } } if (object.has("CONDITIONAL CHILD TASKS")) { String names = object.getString("CONDITIONAL CHILD TASKS"); this.externalName = this.internalName + "(CONDITIONAL CHILD TASKS: " + names + ")"; for (String name : names.split(",")) { Stage child = stages.get(name.trim()); child.externalName = child.internalName + "(CONDITIONAL)"; child.parentStages.add(this); this.childStages.add(child); } } } /** * @param object * @throws Exception * If the object of stage contains "Tez", we need to extract the * vertices and edges Else we need to directly extract operators * and/or attributes. */ public void extractVertex(JSONObject object) throws Exception { if (object.has(this.parser.getFrameworkName())) { this.tezStageDependency = new TreeMap<>(); JSONObject tez = (JSONObject) object.get(this.parser.getFrameworkName()); JSONObject vertices = tez.getJSONObject("Vertices:"); if (tez.has("Edges:")) { JSONObject edges = tez.getJSONObject("Edges:"); // iterate for the first time to get all the vertices for (String to : JSONObject.getNames(edges)) { vertexs.put(to, new Vertex(to, vertices.getJSONObject(to), this, parser)); } // iterate for the second time to get all the vertex dependency for (String to : JSONObject.getNames(edges)) { Object o = edges.get(to); Vertex v = vertexs.get(to); // 1 to 1 mapping if (o instanceof JSONObject) { JSONObject obj = (JSONObject) o; String parent = obj.getString("parent"); Vertex parentVertex = vertexs.get(parent); if (parentVertex == null) { parentVertex = new Vertex(parent, vertices.getJSONObject(parent), this, parser); vertexs.put(parent, parentVertex); } String type = obj.getString("type"); // for union vertex, we reverse the dependency relationship if (!"CONTAINS".equals(type)) { v.addDependency(new Connection(type, parentVertex)); parentVertex.setType(type); parentVertex.children.add(v); } else { parentVertex.addDependency(new Connection(type, v)); v.children.add(parentVertex); } this.tezStageDependency.put(v, Arrays.asList(new Connection(type, parentVertex))); } else { // 1 to many mapping JSONArray from = (JSONArray) o; List<Connection> list = new ArrayList<>(); for (int index = 0; index < from.length(); index++) { JSONObject obj = from.getJSONObject(index); String parent = obj.getString("parent"); Vertex parentVertex = vertexs.get(parent); if (parentVertex == null) { parentVertex = new Vertex(parent, vertices.getJSONObject(parent), this, parser); vertexs.put(parent, parentVertex); } String type = obj.getString("type"); if (!"CONTAINS".equals(type)) { v.addDependency(new Connection(type, parentVertex)); parentVertex.setType(type); parentVertex.children.add(v); } else { parentVertex.addDependency(new Connection(type, v)); v.children.add(parentVertex); } list.add(new Connection(type, parentVertex)); } this.tezStageDependency.put(v, list); } } } else { for (String vertexName : JSONObject.getNames(vertices)) { vertexs.put(vertexName, new Vertex(vertexName, vertices.getJSONObject(vertexName), this, parser)); } } // iterate for the first time to extract opTree in vertex for (Vertex v : vertexs.values()) { if (v.vertexType == VertexType.MAP || v.vertexType == VertexType.REDUCE) { v.extractOpTree(); } } // iterate for the second time to rewrite object for (Vertex v : vertexs.values()) { v.checkMultiReduceOperator(parser.rewriteObject); } } else { String[] names = JSONObject.getNames(object); if (names != null) { for (String name : names) { if (name.contains("Operator")) { this.op = extractOp(name, object.getJSONObject(name)); } else { if (!object.get(name).toString().isEmpty()) { attrs.put(name, object.get(name).toString()); } } } } } } /** * @param opName * @param opObj * @return * @throws Exception * This method address the create table operator, fetch operator, * etc */ Op extractOp(String opName, JSONObject opObj) throws Exception { Map<String, String> attrs = new TreeMap<>(); Vertex v = null; if (opObj.length() > 0) { String[] names = JSONObject.getNames(opObj); for (String name : names) { Object o = opObj.get(name); if (isPrintable(o) && !o.toString().isEmpty()) { attrs.put(name, o.toString()); } else if (o instanceof JSONObject) { JSONObject attrObj = (JSONObject) o; if (attrObj.length() > 0) { if (name.equals("Processor Tree:")) { JSONObject object = new JSONObject(new LinkedHashMap<>()); object.put(name, attrObj); v = new Vertex(null, object, this, parser); v.extractOpTree(); } else { for (String attrName : JSONObject.getNames(attrObj)) { if (!attrObj.get(attrName).toString().isEmpty()) { attrs.put(attrName, attrObj.get(attrName).toString()); } } } } } else { throw new Exception("Unsupported object in " + this.internalName); } } } Op op = new Op(opName, null, null, null, null, attrs, null, v, parser); if (v != null) { parser.addInline(op, new Connection(null, v)); } return op; } private boolean isPrintable(Object val) { if (val instanceof Boolean || val instanceof String || val instanceof Integer || val instanceof Long || val instanceof Byte || val instanceof Float || val instanceof Double || val instanceof Path) { return true; } if (val != null && val.getClass().isPrimitive()) { return true; } return false; } public void print(Printer printer, int indentFlag) throws Exception { // print stagename if (parser.printSet.contains(this)) { printer.println(DagJsonParser.prefixString(indentFlag) + " Please refer to the previous " + externalName); return; } parser.printSet.add(this); printer.println(DagJsonParser.prefixString(indentFlag) + externalName); // print vertexes indentFlag++; for (Vertex candidate : this.vertexs.values()) { if (!parser.isInline(candidate) && candidate.children.isEmpty()) { candidate.print(printer, indentFlag, null, null); } } if (!attrs.isEmpty()) { printer.println(DagJsonParser.prefixString(indentFlag) + DagJsonParserUtils.attrsToString(attrs)); } if (op != null) { op.print(printer, indentFlag, false); } indentFlag++; // print dependent stages for (Stage stage : this.parentStages) { stage.print(printer, indentFlag); } } }