/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.plan; import java.util.Arrays; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * BaseWork. Base class for any "work" that's being done on the cluster. Items like stats * gathering that are commonly used regardless of the type of work live here. */ @SuppressWarnings({"serial"}) public abstract class BaseWork extends AbstractOperatorDesc { protected static final Logger LOG = LoggerFactory.getLogger(BaseWork.class); // dummyOps is a reference to all the HashTableDummy operators in the // plan. These have to be separately initialized when we setup a task. // Their function is mainly as root ops to give the mapjoin the correct // schema info. List<HashTableDummyOperator> dummyOps; int tag = 0; private final List<String> sortColNames = new ArrayList<String>(); private MapredLocalWork mrLocalWork; public BaseWork() {} public BaseWork(String name) { setName(name); } private boolean gatheringStats; private String name; /* * Vectorization. */ // This will be true if a node was examined by the Vectorizer class. protected boolean vectorizationExamined; protected boolean vectorizationEnabled; protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; private VectorizerReason notVectorizedReason; private boolean groupByVectorOutput; private boolean allNative; private boolean usesVectorUDFAdaptor; protected long vectorizedVertexNum; protected boolean llapMode = false; protected boolean uberMode = false; private int reservedMemoryMB = -1; // default to -1 means we leave it up to Tez to decide // Used for value registry private Map<String, RuntimeValuesInfo> inputSourceToRuntimeValuesInfo = new HashMap<String, RuntimeValuesInfo>(); public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; } public boolean isGatheringStats() { return this.gatheringStats; } public String getName() { return name; } public void setName(String name) { this.name = name; } public List<HashTableDummyOperator> getDummyOps() { return dummyOps; } public void setDummyOps(List<HashTableDummyOperator> dummyOps) { if (this.dummyOps != null && !this.dummyOps.isEmpty() && (dummyOps == null || dummyOps.isEmpty())) { LOG.info("Removing dummy operators from " + name + " " + this.getClass().getSimpleName()); } this.dummyOps = dummyOps; } public void addDummyOp(HashTableDummyOperator dummyOp) { if (dummyOps == null) { dummyOps = new LinkedList<HashTableDummyOperator>(); } dummyOps.add(dummyOp); } public abstract void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap); public abstract Set<Operator<? extends OperatorDesc>> getAllRootOperators(); public abstract Operator<? extends OperatorDesc> getAnyRootOperator(); public Set<Operator<?>> getAllOperators() { Set<Operator<?>> returnSet = new LinkedHashSet<Operator<?>>(); Set<Operator<?>> opSet = getAllRootOperators(); Stack<Operator<?>> opStack = new Stack<Operator<?>>(); // add all children opStack.addAll(opSet); while(!opStack.empty()) { Operator<?> op = opStack.pop(); returnSet.add(op); if (op.getChildOperators() != null) { opStack.addAll(op.getChildOperators()); } } return returnSet; } /** * Returns a set containing all leaf operators from the operator tree in this work. * @return a set containing all leaf operators in this operator tree. */ public Set<Operator<? extends OperatorDesc>> getAllLeafOperators() { Set<Operator<?>> returnSet = new LinkedHashSet<Operator<?>>(); Set<Operator<?>> opSet = getAllRootOperators(); Stack<Operator<?>> opStack = new Stack<Operator<?>>(); // add all children opStack.addAll(opSet); while (!opStack.empty()) { Operator<?> op = opStack.pop(); if (op.getNumChild() == 0) { returnSet.add(op); } if (op.getChildOperators() != null) { opStack.addAll(op.getChildOperators()); } } return returnSet; } public void setVectorizedVertexNum(long vectorizedVertexNum) { this.vectorizedVertexNum = vectorizedVertexNum; } public long getVectorizedVertexNum() { return vectorizedVertexNum; } // ----------------------------------------------------------------------------------------------- public void setVectorizationExamined(boolean vectorizationExamined) { this.vectorizationExamined = vectorizationExamined; } public boolean getVectorizationExamined() { return vectorizationExamined; } public void setVectorizationEnabled(boolean vectorizationEnabled) { this.vectorizationEnabled = vectorizationEnabled; } public boolean getVectorizationEnabled() { return vectorizationEnabled; } /* * The vectorization context for creating the VectorizedRowBatch for the node. */ public VectorizedRowBatchCtx getVectorizedRowBatchCtx() { return vectorizedRowBatchCtx; } public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx) { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { this.notVectorizedReason = notVectorizedReason; } public VectorizerReason getNotVectorizedReason() { return notVectorizedReason; } public void setGroupByVectorOutput(boolean groupByVectorOutput) { this.groupByVectorOutput = groupByVectorOutput; } public boolean getGroupByVectorOutput() { return groupByVectorOutput; } public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; } public boolean getUsesVectorUDFAdaptor() { return usesVectorUDFAdaptor; } public void setAllNative(boolean allNative) { this.allNative = allNative; } public boolean getAllNative() { return allNative; } public static class BaseExplainVectorization { private final BaseWork baseWork; public BaseExplainVectorization(BaseWork baseWork) { this.baseWork = baseWork; } @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public boolean enabled() { return baseWork.getVectorizationEnabled(); } @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean vectorized() { if (!baseWork.getVectorizationEnabled()) { return null; } return baseWork.getVectorMode(); } @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String notVectorizedReason() { if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { return null; } VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); if (notVectorizedReason == null) { return "Unknown"; } return notVectorizedReason.toString(); } @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean groupByRowOutputCascade() { if (!baseWork.getVectorMode()) { return null; } return baseWork.getGroupByVectorOutput(); } @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean nativeVectorized() { if (!baseWork.getVectorMode()) { return null; } return baseWork.getAllNative(); } @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean usesVectorUDFAdaptor() { if (!baseWork.getVectorMode()) { return null; } return baseWork.getUsesVectorUDFAdaptor(); } public static class RowBatchContextExplainVectorization { private final VectorizedRowBatchCtx vectorizedRowBatchCtx; public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } private List<String> getColumns(int startIndex, int count) { String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); List<String> result = new ArrayList<String>(count); final int end = startIndex + count; for (int i = startIndex; i < end; i++) { result.add(rowColumnNames[i] + ":" + rowColumnTypeInfos[i]); } return result; } @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getDataColumns() { return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getPartitionColumns() { return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getDataColumnNums() { int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); if (dataColumnNums == null) { return null; } return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public int getDataColumnCount() { return vectorizedRowBatchCtx.getDataColumnCount(); } @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public int getPartitionColumnCount() { return vectorizedRowBatchCtx.getPartitionColumnCount(); } @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getScratchColumnTypeNames() { return Arrays.asList(vectorizedRowBatchCtx.getScratchColumnTypeNames()); } } @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public RowBatchContextExplainVectorization vectorizedRowBatchContext() { if (!baseWork.getVectorMode()) { return null; } return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx()); } } // ----------------------------------------------------------------------------------------------- /** * @return the mapredLocalWork */ @Explain(displayName = "Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public MapredLocalWork getMapRedLocalWork() { return mrLocalWork; } /** * @param mapLocalWork * the mapredLocalWork to set */ public void setMapRedLocalWork(final MapredLocalWork mapLocalWork) { this.mrLocalWork = mapLocalWork; } public void setUberMode(boolean uberMode) { this.uberMode = uberMode; } public boolean getUberMode() { return uberMode; } public void setLlapMode(boolean llapMode) { this.llapMode = llapMode; } public boolean getLlapMode() { return llapMode; } public int getReservedMemoryMB() { return reservedMemoryMB; } public void setReservedMemoryMB(int memoryMB) { reservedMemoryMB = memoryMB; } public abstract void configureJobConf(JobConf job); public void setTag(int tag) { this.tag = tag; } @Explain(displayName = "tag", explainLevels = { Level.USER }) public int getTag() { return tag; } public void addSortCols(List<String> sortCols) { this.sortColNames.addAll(sortCols); } public List<String> getSortCols() { return sortColNames; } public Map<String, RuntimeValuesInfo> getInputSourceToRuntimeValuesInfo() { return inputSourceToRuntimeValuesInfo; } public void setInputSourceToRuntimeValuesInfo( String workName, RuntimeValuesInfo runtimeValuesInfo) { inputSourceToRuntimeValuesInfo.put(workName, runtimeValuesInfo); } }