/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.tez.util;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.mapreduce.hadoop.DeprecatedKeys;
@InterfaceAudience.Private
public class MRToTezHelper {
private static final Log LOG = LogFactory.getLog(MRToTezHelper.class);
private static List<String> mrSettingsToRetain = new ArrayList<String>();
private MRToTezHelper() {
}
static {
populateMRSettingsToRetain();
}
private static void populateMRSettingsToRetain() {
// FileInputFormat
mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);
// FileOutputFormat
mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}
public static TezConfiguration getDAGAMConfFromMRConf(
Configuration tezConf) {
// Set Tez parameters based on MR parameters.
TezConfiguration dagAMConf = new TezConfiguration(tezConf);
Map<String, String> mrParamToDAGParamMap = DeprecatedKeys
.getMRToDAGParamMap();
for (Entry<String, String> entry : mrParamToDAGParamMap.entrySet()) {
if (dagAMConf.get(entry.getKey()) != null) {
dagAMConf.set(entry.getValue(), dagAMConf.get(entry.getKey()));
dagAMConf.unset(entry.getKey());
if (LOG.isDebugEnabled()) {
LOG.debug("MR->DAG Translating MR key: " + entry.getKey()
+ " to Tez key: " + entry.getValue()
+ " with value " + dagAMConf.get(entry.getValue()));
}
}
}
String env = tezConf.get(MRJobConfig.MR_AM_ADMIN_USER_ENV);
if (tezConf.get(MRJobConfig.MR_AM_ENV) != null) {
env = (env == null) ? tezConf.get(MRJobConfig.MR_AM_ENV)
: env + "," + tezConf.get(MRJobConfig.MR_AM_ENV);
}
if (env != null) {
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_LAUNCH_ENV, env);
}
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS,
org.apache.tez.mapreduce.hadoop.MRHelpers
.getJavaOptsForMRAM(tezConf));
String queueName = tezConf.get(JobContext.QUEUE_NAME,
YarnConfiguration.DEFAULT_QUEUE_NAME);
dagAMConf.setIfUnset(TezConfiguration.TEZ_QUEUE_NAME, queueName);
int amMemMB = tezConf.getInt(MRJobConfig.MR_AM_VMEM_MB,
MRJobConfig.DEFAULT_MR_AM_VMEM_MB);
int amCores = tezConf.getInt(MRJobConfig.MR_AM_CPU_VCORES,
MRJobConfig.DEFAULT_MR_AM_CPU_VCORES);
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, ""
+ amMemMB);
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, ""
+ amCores);
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_VIEW_ACLS,
tezConf.get(MRJobConfig.JOB_ACL_VIEW_JOB, MRJobConfig.DEFAULT_JOB_ACL_VIEW_JOB));
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_MODIFY_ACLS,
tezConf.get(MRJobConfig.JOB_ACL_MODIFY_JOB, MRJobConfig.DEFAULT_JOB_ACL_MODIFY_JOB));
dagAMConf.setIfUnset(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, ""
+ dagAMConf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS,
MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
if (tezConf.get(MRConfiguration.JOB_CREDENTIALS_BINARY) != null) {
dagAMConf.setIfUnset(TezConfiguration.TEZ_CREDENTIALS_PATH,
tezConf.get(MRConfiguration.JOB_CREDENTIALS_BINARY));
}
//TODO: Strip out all MR settings
return dagAMConf;
}
/**
* Process the mapreduce configuration settings and
* - copy as is the still required ones (like those used by FileInputFormat/FileOutputFormat)
* - convert and set equivalent tez runtime settings
* - handle compression related settings
*
* @param conf Configuration on which the mapreduce settings will have to be transferred
* @param mrConf Configuration that contains mapreduce settings
*/
public static void processMRSettings(Configuration conf, Configuration mrConf) {
for (String mrSetting : mrSettingsToRetain) {
if (mrConf.get(mrSetting) != null) {
conf.set(mrSetting, mrConf.get(mrSetting));
}
}
JobControlCompiler.configureCompression(conf);
convertMRToTezRuntimeConf(conf, mrConf);
}
/**
* Convert MR settings to Tez settings and set on conf.
*
* @param conf Configuration on which MR equivalent Tez settings should be set
* @param mrConf Configuration that contains MR settings
*/
private static void convertMRToTezRuntimeConf(Configuration conf, Configuration mrConf) {
for (Entry<String, String> dep : DeprecatedKeys.getMRToTezRuntimeParamMap().entrySet()) {
if (mrConf.get(dep.getKey()) != null) {
conf.unset(dep.getKey());
LOG.info("Setting " + dep.getValue() + " to "
+ mrConf.get(dep.getKey()) + " from MR setting "
+ dep.getKey());
conf.setIfUnset(dep.getValue(), mrConf.get(dep.getKey()));
}
}
}
}