/* * (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this * file except in compliance with the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. */ package com.linkedin.cubert.analyzer.physical; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.node.ArrayNode; import org.codehaus.jackson.node.ObjectNode; import com.linkedin.cubert.analyzer.physical.SemanticAnalyzer; import com.linkedin.cubert.analyzer.physical.Lineage.*; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.ColumnType; import com.linkedin.cubert.operator.PostCondition; import com.linkedin.cubert.operator.aggregate.AggregationType; import com.linkedin.cubert.utils.CommonUtils; import com.linkedin.cubert.utils.JsonUtils; import com.linkedin.cubert.utils.Pair; import com.linkedin.cubert.utils.RewriteUtils; public class LineageGraph { public static abstract class LineageGraphVertex { public List<LineageGraphVertex> getEdgeVertices(boolean isForward) { if (isForward) return getChildVertices(); else return getParentVertices(); } public abstract List<LineageGraphVertex> getChildVertices(); public abstract List<LineageGraphVertex> getParentVertices(); public abstract String getNodeType(); @Override public boolean equals(Object obj) { if (this == obj) return true; return false; } } public static interface LineageGraphVisitor { public boolean visit(LineageGraphVertex graphNode); public void finishSubtree(LineageGraphVertex graphNode); } public static void visitLineageGraph(LineageGraphVertex graphNode, LineageGraphVisitor graphVisitor, boolean isForward) { ArrayList<Pair<LineageGraphVertex, LineageGraphVertex>> qnodes = new ArrayList<Pair<LineageGraphVertex, LineageGraphVertex>>(); // hash set of in progress vertices HashSet<LineageGraphVertex> inProgressNodes = new HashSet<LineageGraphVertex>(); qnodes.add(new Pair<LineageGraphVertex, LineageGraphVertex>(null, graphNode)); visitLineageGraph(qnodes, graphVisitor, isForward, inProgressNodes); } private static boolean visitLineageGraph(ArrayList<Pair<LineageGraphVertex, LineageGraphVertex>> qnodes, LineageGraphVisitor graphVisitor, boolean isForward, Set<LineageGraphVertex> inProgressNodes) { if (qnodes.size() == 0) return false; Pair<LineageGraphVertex, LineageGraphVertex> edge = qnodes.remove(0); LineageGraphVertex snode = edge.getSecond(); // add to in progress inProgressNodes.add(snode); if (true) { LineageHelper.trace("Graph Visit Edge\n**************"); LineageHelper.trace("Parent vertex\n**************"); if (edge.getFirst() != null) LineageHelper.trace(edge.getFirst().toString() + "\n**********"); LineageHelper.trace("Child vertex\n*************"); LineageHelper.trace(snode.toString() + "\n**************"); } if (!graphVisitor.visit(snode)) return false; // when the node is a terminal node with no outgoing edges, it is // considered subtree complete. if (snode.getEdgeVertices(isForward) == null) completeSubtree(graphVisitor, snode, isForward, inProgressNodes); for (LineageGraphVertex v : snode.getEdgeVertices(isForward)) { qnodes.add(new Pair<LineageGraphVertex, LineageGraphVertex>(snode, v)); } visitLineageGraph(qnodes, graphVisitor, isForward, inProgressNodes); return true; } private static void completeSubtree(LineageGraphVisitor graphVisitor, LineageGraphVertex completedNode, boolean isForward, Set<LineageGraphVertex> inProgressNodes) { graphVisitor.finishSubtree(completedNode); inProgressNodes.remove(completedNode); for (LineageGraphVertex v : completedNode.getEdgeVertices(!isForward)) checkCompletion(graphVisitor, v, isForward, inProgressNodes); } private static void checkCompletion(LineageGraphVisitor graphVisitor, LineageGraphVertex node, boolean isForward, Set<LineageGraphVertex> inProgressNodes) { // for all outgoing edges, if the end point is no longer preset in // inProgressNodes, the subtree is complete. for (LineageGraphVertex v : node.getEdgeVertices(isForward)){ if (inProgressNodes.contains(v)) return; } completeSubtree(graphVisitor, node, isForward, inProgressNodes); } public static class TerminalNodeTracer implements LineageGraphVisitor { public ArrayList<LineageGraphVertex> matchedNodes = new ArrayList<LineageGraphVertex>(); private List<String> nodeTypes = null; public TerminalNodeTracer(String[] ntypes) { if (ntypes != null) this.nodeTypes = Arrays.asList(ntypes); } public boolean visit(LineageGraphVertex snode) { LineageHelper.trace("Terminal node tracer visiting " + snode.toString()); if (nodeTypes == null || nodeTypes.size() == 0 || this.nodeTypes.indexOf(snode.getNodeType()) != -1) matchedNodes.add(snode); return true; } @Override public void finishSubtree(LineageGraphVertex graphNode) { } } public static class LineagePath implements Cloneable { public ArrayList<LineageGraphVertex> nodes = new ArrayList<LineageGraphVertex>(); public LineagePath clone() { LineagePath result = new LineagePath(); result.nodes.addAll(this.nodes); return result; } public String toString() { return CommonUtils.listAsString(nodes, "\n"); } } public static class PathMatcher implements LineageGraphVisitor { // partial match maintained for each level which is a prefix of the matching // pattern. All possible partial matches are maintained public ArrayList<ArrayList<LineagePath>> partialMatchedPaths; public ArrayList<LineagePath> matchedPaths; public ArrayList<String> pathNodeTypeSequence; public LineageGraphVertex terminalNode = null; public PathMatcher(ArrayList<String> pathNodeTypeSequence, LineageGraphVertex terminalNode) { this.pathNodeTypeSequence = pathNodeTypeSequence; this.partialMatchedPaths = new ArrayList<ArrayList<LineagePath>>(pathNodeTypeSequence.size()); for (int i = 0; i < pathNodeTypeSequence.size(); i++) this.partialMatchedPaths.add(new ArrayList<LineagePath>()); this.matchedPaths = new ArrayList<LineagePath>(); this.terminalNode = terminalNode; } @Override public boolean visit(LineageGraphVertex graphNode) { // for all partial matches, see if the match can be extended into next bucket. LineageHelper.trace("PathMatcher visiting graphNode " + graphNode.toString()); for (int i = 0; i < pathNodeTypeSequence.size(); i++) { if (!graphNode.getNodeType().equals(this.pathNodeTypeSequence.get(i))) continue; // if no partial match at the current level. if ((i) > 0 && partialMatchedPaths.get(i - 1) == null) continue; if (i == 0) { LineagePath p = new LineagePath(); p.nodes.add(graphNode); partialMatchedPaths.get(i).add(p); continue; } for (LineagePath p : partialMatchedPaths.get(i - 1)) { LineagePath pExt = p.clone(); pExt.nodes.add(graphNode); partialMatchedPaths.get(i).add(pExt); if (i == pathNodeTypeSequence.size() - 1 && terminalNode == null) this.matchedPaths.add(pExt); } } // If matching a valid terminalNode, if (terminalNode == null || graphNode != terminalNode) return true; int lb = pathNodeTypeSequence.size() - 1; for (LineagePath p : partialMatchedPaths.get(lb)) { LineagePath pExt = p.clone(); pExt.nodes.add(graphNode); this.matchedPaths.add(pExt); LineageHelper.trace("Found matching lineage path = " + pExt.toString()); } return true; } @Override public void finishSubtree(LineageGraphVertex graphNode) { // all partially matched paths ending at graphNode are deleted. for (int i = 0; i < partialMatchedPaths.size(); i++) { ArrayList<LineagePath> pathList = partialMatchedPaths.get(i); ArrayList<LineagePath> deleteList = new ArrayList<LineagePath>(); for (LineagePath p : pathList) { int li = p.nodes.size() - 1; if (p.nodes.get(li) == graphNode) deleteList.add(p); } partialMatchedPaths.get(i).removeAll(deleteList); } } } public static List<LineageGraphVertex> traceTerminalNodes(LineageGraphVertex startNode, String[] terminalNodeTypes, boolean isForward) { LineageHelper.trace("Tracing terminal Nodes for nodeTypes = " + (terminalNodeTypes != null ? Arrays.toString(terminalNodeTypes) : "EMPTY")); TerminalNodeTracer tracer = new TerminalNodeTracer(terminalNodeTypes); visitLineageGraph(startNode, tracer, isForward); return tracer.matchedNodes; } public static List<LineagePath> traceMatchingPaths(LineageGraphVertex startVertex, ArrayList<String> nodeTypes, LineageGraphVertex terminalVertex, boolean isForward) { PathMatcher pathMatcher = new PathMatcher(nodeTypes, terminalVertex); LineageHelper.trace("Trace matching paths for " + CommonUtils.listAsString(nodeTypes)); visitLineageGraph(startVertex, pathMatcher, isForward); int li = nodeTypes.size() - 1; return pathMatcher.matchedPaths; } public static class PathTracer implements LineageGraphVisitor { private LineageGraphVertex endVertex; private ArrayList<LineagePath> pathList; private LineageGraphVertex startVertex; public PathTracer(LineageGraphVertex startVertex, LineageGraphVertex endVertex) { this.endVertex = endVertex; this.startVertex = startVertex; this.pathList = new ArrayList<LineagePath>(); LineagePath singlePath = new LineagePath(); singlePath.nodes.add(startVertex); this.pathList.add(singlePath); } @Override public boolean visit(LineageGraphVertex graphNode) { for (LineagePath lpath : this.pathList) { int size = lpath.nodes.size(); List<LineageGraphVertex> parentList = graphNode.getParentVertices(); if (CommonUtils.indexOfByRef(parentList, lpath.nodes.get(size - 1)) != -1) lpath.nodes.add(graphNode); } if (this.pathList.size() == 0) { } if (graphNode == endVertex) return false; return true; } @Override public void finishSubtree(LineageGraphVertex graphNode) { } } public static LineagePath tracePath(LineageGraphVertex startVertex, LineageGraphVertex endVertex) { PathTracer pathTracer = new PathTracer(startVertex, endVertex); visitLineageGraph(startVertex, pathTracer, true); for (LineagePath lPath : pathTracer.pathList) { if (lPath.nodes.size() > 0 && lPath.nodes.get(lPath.nodes.size() - 1) == endVertex) return lPath; } return null; } public static List<LineageGraphVertex> traceAllReachable(LineageGraphVertex startNode, boolean isForward) { return traceTerminalNodes(startNode, null, isForward); } }