/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.templeton; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.commons.exec.ExecuteException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants; import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; /** * Submit a Pig job. * * This is the backend of the pig web service. */ public class PigDelegator extends LauncherDelegator { private static final Logger LOG = LoggerFactory.getLogger(PigDelegator.class); public PigDelegator(AppConfig appConf) { super(appConf); } public EnqueueBean run(String user, Map<String, Object> userArgs, String execute, String srcFile, List<String> pigArgs, String otherFiles, String statusdir, String callback, boolean usesHcatalog, String completedUrl, boolean enablelog, Boolean enableJobReconnect) throws NotAuthorizedException, BadParam, BusyException, QueueException, ExecuteException, IOException, InterruptedException, TooManyRequestsException { runAs = user; List<String> args = makeArgs(execute, srcFile, pigArgs, otherFiles, statusdir, usesHcatalog, completedUrl, enablelog, enableJobReconnect); return enqueueController(user, userArgs, callback, args); } /** * @param execute pig query string to be executed * @param srcFile pig query file to be executed * @param pigArgs pig command line arguments * @param otherFiles files to be copied to the map reduce cluster * @param statusdir status dir location * @param usesHcatalog whether the command uses hcatalog/needs to connect * to hive metastore server * @param completedUrl call back url * @param enablelog * @param enableJobReconnect * @return list of arguments * @throws BadParam * @throws IOException * @throws InterruptedException */ private List<String> makeArgs(String execute, String srcFile, List<String> pigArgs, String otherFiles, String statusdir, boolean usesHcatalog, String completedUrl, boolean enablelog, Boolean enableJobReconnect) throws BadParam, IOException, InterruptedException { ArrayList<String> args = new ArrayList<String>(); //check if the REST command specified explicitly to use hcatalog // or if it says that implicitly using the pig -useHCatalog arg boolean needsMetastoreAccess = usesHcatalog || hasPigArgUseHcat(pigArgs); try { ArrayList<String> allFiles = new ArrayList<String>(); if (TempletonUtils.isset(srcFile)) { allFiles.add(TempletonUtils.hadoopFsFilename(srcFile, appConf, runAs)); } if (TempletonUtils.isset(otherFiles)) { String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); allFiles.addAll(Arrays.asList(ofs)); } args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles, enablelog, enableJobReconnect, JobType.PIG)); boolean shipPigTar = appConf.pigArchive() != null && !appConf.pigArchive().equals(""); boolean shipHiveTar = needsMetastoreAccess && appConf.hiveArchive() != null && !appConf.hiveArchive().equals(""); if(shipPigTar || shipHiveTar) { args.add(ARCHIVES); StringBuilder archives = new StringBuilder(); if(shipPigTar) { archives.append(appConf.pigArchive()); } if(shipPigTar && shipHiveTar) { archives.append(","); } if(shipHiveTar) { archives.append(appConf.hiveArchive()); } args.add(archives.toString()); } if(shipHiveTar) { addDef(args, JobSubmissionConstants.PigConstants.HIVE_HOME, appConf.get(AppConfig.HIVE_HOME_PATH)); addDef(args, JobSubmissionConstants.PigConstants.HCAT_HOME, appConf.get(AppConfig.HCAT_HOME_PATH)); //Pig which uses HCat will pass this to HCat so that it can find the metastore addDef(args, JobSubmissionConstants.PigConstants.PIG_OPTS, appConf.get(AppConfig.HIVE_PROPS_NAME)); } args.add("--"); args.add(appConf.pigPath()); //the token file location should be first argument of pig args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); //add mapreduce job tag placeholder args.add("-D" + TempletonControllerJob.MAPREDUCE_JOB_TAGS_ARG_PLACEHOLDER); for (String pigArg : pigArgs) { args.add(pigArg); } if(needsMetastoreAccess) { addHiveMetaStoreTokenArg(); } if (TempletonUtils.isset(execute)) { args.add("-execute"); args.add(execute); } else if (TempletonUtils.isset(srcFile)) { args.add("-file"); args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) .getName()); } } catch (FileNotFoundException e) { throw new BadParam(e.getMessage()); } catch (URISyntaxException e) { throw new BadParam(e.getMessage()); } return args; } /** * Check if the pig arguments has -useHCatalog set * see http://hive.apache.org/docs/hcat_r0.5.0/loadstore.pdf */ private boolean hasPigArgUseHcat(List<String> pigArgs) { return pigArgs.contains("-useHCatalog"); } }