/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.physical;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SparkWork;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher;
public class AnnotateRunTimeStatsOptimizer implements PhysicalPlanResolver {
private static final Logger LOG = LoggerFactory.getLogger(AnnotateRunTimeStatsOptimizer.class);
private class AnnotateRunTimeStatsDispatcher implements Dispatcher {
private final PhysicalContext physicalContext;
public AnnotateRunTimeStatsDispatcher(PhysicalContext context, Map<Rule, NodeProcessor> rules) {
super();
physicalContext = context;
}
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
throws SemanticException {
Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
Set<Operator<? extends OperatorDesc>> ops = new HashSet<>();
if (currTask instanceof MapRedTask) {
MapRedTask mr = (MapRedTask) currTask;
ops.addAll(mr.getWork().getAllOperators());
} else if (currTask instanceof TezTask) {
TezWork work = ((TezTask) currTask).getWork();
for (BaseWork w : work.getAllWork()) {
ops.addAll(w.getAllOperators());
}
} else if (currTask instanceof SparkTask) {
SparkWork sparkWork = (SparkWork) currTask.getWork();
for (BaseWork w : sparkWork.getAllWork()) {
ops.addAll(w.getAllOperators());
}
}
setOrAnnotateStats(ops, physicalContext.getParseContext());
return null;
}
}
public static void setOrAnnotateStats(Set<Operator<? extends OperatorDesc>> ops, ParseContext pctx)
throws SemanticException {
for (Operator<? extends OperatorDesc> op : ops) {
if (pctx.getContext().getExplainAnalyze() == AnalyzeState.RUNNING) {
setRuntimeStatsDir(op, pctx);
} else if (pctx.getContext().getExplainAnalyze() == AnalyzeState.ANALYZING) {
annotateRuntimeStats(op, pctx);
} else {
throw new SemanticException("Unexpected stats in AnnotateWithRunTimeStatistics.");
}
}
}
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx)
throws SemanticException {
try {
OperatorDesc conf = op.getConf();
if (conf != null) {
LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(),
op.getOperatorId()).toString();
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
runtimeStatsContext.setStatsTmpDir(path);
if (!statsPublisher.init(runtimeStatsContext)) {
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
conf.setRuntimeStatsTmpDir(path);
} else {
LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId()
+ " because OperatorDesc is null");
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
private static void annotateRuntimeStats(Operator<? extends OperatorDesc> op, ParseContext pctx) {
Long runTimeNumRows = pctx.getContext().getExplainConfig().getOpIdToRuntimeNumRows()
.get(op.getOperatorId());
if (op.getConf() != null && op.getConf().getStatistics() != null && runTimeNumRows != null) {
LOG.info("annotateRuntimeStats for " + op.getOperatorId());
op.getConf().getStatistics().setRunTimeNumRows(runTimeNumRows);
} else {
LOG.debug("skip annotateRuntimeStats for " + op.getOperatorId());
}
}
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
Dispatcher disp = new AnnotateRunTimeStatsDispatcher(pctx, opRules);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
public void resolve(Set<Operator<?>> opSet, ParseContext pctx) throws SemanticException {
Set<Operator<?>> ops = getAllOperatorsForSimpleFetch(opSet);
setOrAnnotateStats(ops, pctx);
}
private Set<Operator<?>> getAllOperatorsForSimpleFetch(Set<Operator<?>> opSet) {
Set<Operator<?>> returnSet = new LinkedHashSet<Operator<?>>();
Stack<Operator<?>> opStack = new Stack<Operator<?>>();
// add all children
opStack.addAll(opSet);
while (!opStack.empty()) {
Operator<?> op = opStack.pop();
returnSet.add(op);
if (op.getChildOperators() != null) {
opStack.addAll(op.getChildOperators());
}
}
return returnSet;
}
}