/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.templeton.tool; import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.Arrays; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Tool; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.templeton.AppConfig; import org.apache.hive.hcatalog.templeton.SecureProxySupport; import org.apache.hive.hcatalog.templeton.UgiFactory; import org.apache.thrift.TException; /** * A Map Reduce job that will start another job. * * We have a single Mapper job that starts a child MR job. The parent * monitors the child child job and ends when the child job exits. In * addition, we * * - write out the parent job id so the caller can record it. * - run a keep alive thread so the job doesn't end. * - Optionally, store the stdout, stderr, and exit value of the child * in hdfs files. * * A note on security. When jobs are submitted through WebHCat that use HCatalog, it means that * metastore access is required. Hive queries, of course, need metastore access. This in turn * requires delegation token to be obtained for metastore in a <em>secure cluster</em>. Since we * can't usually parse the job to find out if it is using metastore, we require 'usehcatalog' * parameter supplied in the REST call. WebHcat takes care of cancelling the token when the job * is complete. */ @InterfaceAudience.Private public class TempletonControllerJob extends Configured implements Tool, JobSubmissionConstants { private static final Logger LOG = LoggerFactory.getLogger(TempletonControllerJob.class); private final boolean secureMetastoreAccess; private final AppConfig appConf; /** * @param secureMetastoreAccess - if true, a delegation token will be created * and added to the job */ public TempletonControllerJob(boolean secureMetastoreAccess, AppConfig conf) { super(new Configuration(conf)); this.secureMetastoreAccess = secureMetastoreAccess; this.appConf = conf; } private Job job = null; public String getSubmittedId() { if (job == null ) { return null; } JobID submittedJobId = job.getJobID(); if (submittedJobId == null) { return null; } else { return submittedJobId.toString(); } } /** * Enqueue the job and print out the job id for later collection. * @see org.apache.hive.hcatalog.templeton.CompleteDelegator */ @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException { if(LOG.isDebugEnabled()) { LOG.debug("Preparing to submit job: " + Arrays.toString(args)); } Configuration conf = getConf(); conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); String memoryMb = appConf.mapperMemoryMb(); if(memoryMb != null && memoryMb.length() != 0) { conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb); } String amMemoryMB = appConf.amMemoryMb(); if (amMemoryMB != null && !amMemoryMB.isEmpty()) { conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB); } String amJavaOpts = appConf.controllerAMChildOpts(); if (amJavaOpts != null && !amJavaOpts.isEmpty()) { conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts); } String user = UserGroupInformation.getCurrentUser().getShortUserName(); conf.set("user.name", user); job = new Job(conf); job.setJarByClass(LaunchMapper.class); job.setJobName(TempletonControllerJob.class.getSimpleName()); job.setMapperClass(LaunchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(SingleInputFormat.class); NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>(); job.setOutputFormatClass(of.getClass()); job.setNumReduceTasks(0); JobClient jc = new JobClient(new JobConf(job.getConfiguration())); if(UserGroupInformation.isSecurityEnabled()) { Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token")); job.getCredentials().addToken(new Text("mr token"), mrdt); } String metastoreTokenStrForm = addHMSToken(job, user); job.submit(); JobID submittedJobId = job.getJobID(); if(metastoreTokenStrForm != null) { //so that it can be cancelled later from CompleteDelegator DelegationTokenCache.getStringFormTokenCache().storeDelegationToken( submittedJobId.toString(), metastoreTokenStrForm); LOG.debug("Added metastore delegation token for jobId=" + submittedJobId.toString() + " user=" + user); } return 0; } private String addHMSToken(Job job, String user) throws IOException, InterruptedException, TException { if(!secureMetastoreAccess) { return null; } Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier> hiveToken = new Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier>(); String metastoreTokenStrForm = buildHcatDelegationToken(user); hiveToken.decodeFromUrlString(metastoreTokenStrForm); job.getCredentials().addToken(new Text(SecureProxySupport.HCAT_SERVICE), hiveToken); return metastoreTokenStrForm; } private String buildHcatDelegationToken(String user) throws IOException, InterruptedException, TException { final HiveConf c = new HiveConf(); LOG.debug("Creating hive metastore delegation token for user " + user); final UserGroupInformation ugi = UgiFactory.getUgi(user); UserGroupInformation real = ugi.getRealUser(); return real.doAs(new PrivilegedExceptionAction<String>() { @Override public String run() throws IOException, TException, InterruptedException { final IMetaStoreClient client = HCatUtil.getHiveMetastoreClient(c); return ugi.doAs(new PrivilegedExceptionAction<String>() { @Override public String run() throws IOException, TException, InterruptedException { String u = ugi.getUserName(); return client.getDelegationToken(c.getUser(),u); } }); } }); } }