/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.plan.physical; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Stack; import org.apache.hadoop.conf.Configuration; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ArrayNode; import org.codehaus.jackson.node.ObjectNode; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockProperties; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.BufferedTupleOperatorBlock; import com.linkedin.cubert.block.TupleOperatorBlock; import com.linkedin.cubert.block.TupleStoreBlock; import com.linkedin.cubert.functions.builtin.FunctionFactory; import com.linkedin.cubert.io.rubix.RubixMemoryBlock; import com.linkedin.cubert.operator.BlockOperator; import com.linkedin.cubert.operator.OperatorFactory; import com.linkedin.cubert.operator.OperatorType; import com.linkedin.cubert.operator.TupleOperator; import com.linkedin.cubert.utils.JsonUtils; /** * Parses and executes the physical plan operators within a Mapper or a Reducer. * * @author Maneesh Varshney * */ public class PhaseExecutor { private static final class BlockOperatorInfo { BlockOperator operator; String name; int operatorIndex; Block currentBlock; Iterator<ArrayNode> multipassChainIterator; public BlockOperatorInfo(BlockOperator operator, String name, int operatorIndex, Block currentBlock) { this.operator = operator; this.name = name; this.operatorIndex = operatorIndex; this.currentBlock = currentBlock; } } private ArrayNode operatorsJson; private final String outputBlockName; private final Configuration conf; private List<ArrayNode> multipassOperatorsJson; private final Map<String, Block> blocks = new HashMap<String, Block>(); private final Stack<BlockOperatorInfo> blockOperatorStack = new Stack<BlockOperatorInfo>(); private boolean firstBlock = true; private PerfProfiler profiler = null; // Only valid when profile mode is enabled. private BufferedTupleOperatorBlock previousProfileOutputBlock = null; public PhaseExecutor(String inputBlockName, Block inputBlock, String outputBlockName, ArrayNode operatorsJson, Configuration conf) throws IOException, InterruptedException { this.conf = conf; this.outputBlockName = outputBlockName; this.operatorsJson = operatorsJson; blocks.put(inputBlockName, inputBlock); if (isMultipass(operatorsJson)) { multipassOperatorsJson = extractMultipassChains(operatorsJson); this.operatorsJson = multipassOperatorsJson.get(0); } boolean profileMode = conf.get(CubertStrings.PROFILE_MODE).equals("true"); if (profileMode) { profiler = multipassOperatorsJson == null ? new PerfProfiler(operatorsJson) : new PerfProfiler(multipassOperatorsJson); } prepareOperatorChain(0); } public Block next() throws IOException, InterruptedException { if (firstBlock) { firstBlock = false; return getOutputBlock(); } while (!blockOperatorStack.isEmpty()) { BlockOperatorInfo info = blockOperatorStack.peek(); if (info.multipassChainIterator != null) { // this is the multipass root Block operator if (info.multipassChainIterator.hasNext()) { this.operatorsJson = info.multipassChainIterator.next(); if (profiler != null) { if (previousProfileOutputBlock != null) previousProfileOutputBlock.updatePerformanceCounter(); profiler.incPass(); } prepareOperatorChain(info.operatorIndex + 1); return getOutputBlock(); } info.multipassChainIterator = this.multipassOperatorsJson.iterator(); this.operatorsJson = info.multipassChainIterator.next(); if (profiler != null) { if (previousProfileOutputBlock != null) previousProfileOutputBlock.updatePerformanceCounter(); profiler.resetPass(); } } Block nextBlock = info.operator.next(); if (nextBlock == null) { blockOperatorStack.pop(); } else { blocks.put(info.name, nextBlock); info.currentBlock = nextBlock; prepareOperatorChain(info.operatorIndex + 1); return getOutputBlock(); } } // Inputs are exhausted, flush the profiling results. if (previousProfileOutputBlock != null) previousProfileOutputBlock.updatePerformanceCounter(); return null; } private Block getOutputBlock() { Block block = blocks.get(outputBlockName); if (profiler != null) { // Profile mode if (!(block instanceof BufferedTupleOperatorBlock)) { System.err.println("WARN: The output block is not a TupleOperatorBlock," + " profiling result will not be reported."); } else { previousProfileOutputBlock = (BufferedTupleOperatorBlock) block; previousProfileOutputBlock.setAsOutputBlock(profiler); } } return block; } void prepareOperatorChain(int startIndex) throws IOException, InterruptedException { int numOperators = operatorsJson.size(); for (int i = startIndex; i < numOperators; i++) { JsonNode operatorJson = operatorsJson.get(i); if (!operatorJson.has("operator")) continue; OperatorType type = OperatorType.valueOf(operatorJson.get("operator").getTextValue()); String name = operatorJson.get("output").getTextValue(); if (type.isTupleOperator()) { TupleOperator operator = type == OperatorType.USER_DEFINED_TUPLE_OPERATOR ? (TupleOperator) FunctionFactory.createFunctionObject(JsonUtils.getText(operatorJson, "class"), operatorJson.get("constructorArgs")) : OperatorFactory.getTupleOperator(type); Map<String, Block> inputBlocks = getInputBlocks(blocks, operatorJson); BlockProperties[] parentProps = new BlockProperties[inputBlocks.size()]; int idx = 0; for (Block parent : inputBlocks.values()) parentProps[idx++] = parent.getProperties(); BlockProperties props = new BlockProperties(name, new BlockSchema(operatorJson.get("schema")), parentProps); operator.setInput(inputBlocks, operatorJson, props); Block block = getTupleOperatorBlock(i, operator, props); blocks.put(name, block); } else if (type == OperatorType.LOAD_BLOCK) { BlockOperator operator = OperatorFactory.getBlockOperator(type); Map<String, Block> inputBlocks = getInputBlocks(blocks, operatorJson); operator.setInput(conf, inputBlocks, operatorJson); Block block = operator.next(); blocks.put(name, block); } else { BlockOperator operator = type == OperatorType.USER_DEFINED_BLOCK_OPERATOR ? (BlockOperator) FunctionFactory.createFunctionObject(JsonUtils.getText(operatorJson, "class"), operatorJson.get("constructorArgs")) : OperatorFactory.getBlockOperator(type); Map<String, Block> inputBlocks = getInputBlocks(blocks, operatorJson); operator.setInput(conf, inputBlocks, operatorJson); Block block = operator.next(); blocks.put(name, block); BlockOperatorInfo info = new BlockOperatorInfo(operator, name, i, block); blockOperatorStack.push(info); if (operatorJson.has("isMultipassRoot")) { info.multipassChainIterator = this.multipassOperatorsJson.iterator(); info.multipassChainIterator.next(); } } } } public Block getTupleOperatorBlock(int operatorIndex, TupleOperator operator, BlockProperties props) { if (profiler == null) return new TupleOperatorBlock(operator, props); else return profiler.getProfileOperatorBlock(operatorIndex, operator, props); } private boolean isMultipass(ArrayNode operatorsJson) { for (JsonNode json : operatorsJson) { if (json.has("multipassIndex")) return true; } return false; } private List<ArrayNode> extractMultipassChains(ArrayNode operatorsJson) { List<ArrayNode> list = new ArrayList<ArrayNode>(); // Find the number of passes int numPasses = 0; for (JsonNode json : operatorsJson) { if (json.has("multipassIndex")) { int multipassIndex = json.get("multipassIndex").getIntValue(); if (multipassIndex + 1 > numPasses) numPasses = multipassIndex + 1; } } // Locate the last BlockOperator that appears before multipass operators. // This is the BlockOperator that is the root of the multipass branches. JsonNode rootBlockOperator = null; for (JsonNode json : operatorsJson) { if (json.has("multipassIndex")) { break; } OperatorType type = OperatorType.valueOf(JsonUtils.getText(json, "operator")); if (!type.isTupleOperator()) { rootBlockOperator = json; } } if (rootBlockOperator == null) throw new IllegalStateException("No BLOCK operator was found at root on multipass operators"); ((ObjectNode) rootBlockOperator).put("isMultipassRoot", true); // Extract different passes from the chain ObjectMapper mapper = new ObjectMapper(); for (int i = 0; i < numPasses; i++) { ArrayNode anode = mapper.createArrayNode(); for (JsonNode json : operatorsJson) { // if this is a multipass operator, then add it only if it belongs to the // current pass if (json.has("multipassIndex")) { int multipassIndex = json.get("multipassIndex").getIntValue(); if (multipassIndex == i) anode.add(json); } // replace the GATHER operator with NO_OP. The input to this operator will // correspond to the correct multipass index else if (JsonUtils.getText(json, "operator").equals("GATHER")) { String[] inputs = JsonUtils.asArray(json, "input"); String output = JsonUtils.getText(json, "output"); anode.add(JsonUtils.createObjectNode("operator", "NO_OP", "input", inputs[i], "output", output, "schema", json.get("schema"))); } // all other operators are added as is. else { anode.add(json); } } list.add(anode); } return list; } Map<String, Block> getInputBlocks(Map<String, Block> allBlocks, JsonNode json) { if (!json.has("input")) return null; Map<String, Block> inputBlocks = new HashMap<String, Block>(); String[] inputs = JsonUtils.asArray(json.get("input")); for (String input : inputs) { inputBlocks.put(input, allBlocks.get(input)); } return inputBlocks; } public void rewindMemoryBlocks(int startIndex) throws IOException { for (BlockOperatorInfo boi : this.blockOperatorStack) { if (boi.operatorIndex >= startIndex) return; if (boi.currentBlock instanceof TupleStoreBlock || boi.currentBlock instanceof RubixMemoryBlock) boi.currentBlock.rewind(); } } }