/* * Copyright [2012-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.pig; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.util.Map; import ml.shifu.guagua.hadoop.util.HDPUtils; import ml.shifu.shifu.container.obj.ModelConfig; import ml.shifu.shifu.container.obj.RawSourceData.SourceType; import ml.shifu.shifu.fs.PathFinder; import ml.shifu.shifu.util.CommonUtils; import ml.shifu.shifu.util.Environment; import org.apache.commons.lang.StringUtils; import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * PigExecutor class */ public class PigExecutor { private static Logger log = LoggerFactory.getLogger(PigExecutor.class); private static PigExecutor instance = new PigExecutor(); // avoid to create instance, used as singleton private PigExecutor() { } /** * Get the pig executor handler * * @return - executor handler */ public static PigExecutor getExecutor() { return instance; } /** * Submit the pig job with @ModelConfig and pig script * This functions doesn't allow customer setting * * @param modelConfig * - model configuration * @param pigScriptPath * - path of pig script * @throws IOException * throw IOException when loading the parameter from @ModelConfig */ public void submitJob(ModelConfig modelConfig, String pigScriptPath) throws IOException { submitJob(modelConfig, pigScriptPath, null); } /** * Run the pig, Local or MapReduce mode is decide by the training source data type in modelConfig * * @param modelConfig * - model configuration * @param pigScriptPath * - path of pig script * @param paramsMap * - additional parameters for pig script * @throws IOException * throw IOException when loading the parameter from @ModelConfig */ public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap) throws IOException { submitJob(modelConfig, pigScriptPath, paramsMap, modelConfig.getDataSet().getSource(), null); } public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap, SourceType sourceType) throws IOException { submitJob(modelConfig, pigScriptPath, paramsMap, sourceType, null); } public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap, SourceType sourceType, PathFinder pathFinder) throws IOException { submitJob(modelConfig, pigScriptPath, paramsMap, sourceType, null, pathFinder); } /** * Run the pig, Local or MapReduce mode is decide by parameter @sourceTpe * * @param modelConfig * - model configuration * @param pigScriptPath * - path of pig script * @param paramsMap * - additional parameters for pig script * @param sourceType * - the mode run pig: pig-local/pig-hdfs * @param confMap * the configuration map instance * @param pathFinder * the path finder * @throws IOException * throw IOException when loading the parameter from @ModelConfig */ public void submitJob(ModelConfig modelConfig, String pigScriptPath, Map<String, String> paramsMap, SourceType sourceType, Map<String, String> confMap, PathFinder pathFinder) throws IOException { // Run Pig Scripts PigServer pigServer = createPigServer(sourceType); for(Map.Entry<Object, Object> entry: Environment.getProperties().entrySet()) { if(CommonUtils.isHadoopConfigurationInjected(entry.getKey().toString())) { pigServer.getPigContext().getProperties().put(entry.getKey(), entry.getValue()); } } if(confMap != null) { for(Map.Entry<String, String> entry: confMap.entrySet()) { pigServer.getPigContext().getProperties().put(entry.getKey(), entry.getValue()); } } Map<String, String> pigParamsMap = CommonUtils.getPigParamMap(modelConfig, sourceType, pathFinder); if(paramsMap != null) { pigParamsMap.putAll(paramsMap); } log.debug("Pig submit parameters: {}", pigParamsMap); if(new File(pigScriptPath).isAbsolute()) { log.info("Pig script absolute path is {}", pigScriptPath); pigServer.registerScript(pigScriptPath, pigParamsMap); } else { log.info("Pig script relative path is {}", pigScriptPath); pigServer.registerScript(PigExecutor.class.getClassLoader().getResourceAsStream(pigScriptPath), pigParamsMap); } } public void submitJob(SourceType sourceType, String pigScripts) throws IOException { PigServer pigServer = createPigServer(sourceType); pigServer.registerScript(new ByteArrayInputStream(pigScripts.getBytes())); } private PigServer createPigServer(SourceType sourceType) throws IOException { PigServer pigServer = null; if(SourceType.HDFS.equals(sourceType)) { if(Environment.getProperty("shifu.pig.exectype", "MAPREDUCE").toLowerCase().equals("tez")) { if(isTezRunnable()) { try { Class<?> tezClazz = Class .forName("org.apache.pig.backend.hadoop.executionengine.tez.TezExecType"); log.info("Pig ExecType: TEZ"); pigServer = new ShifuPigServer((ExecType) tezClazz.newInstance()); } catch (Throwable t) { log.info("Pig ExecType: MAPREDUCE"); pigServer = new ShifuPigServer(ExecType.MAPREDUCE); } } else { // fall back to mapreduce log.info("Pig ExecType: MAPREDUCE"); pigServer = new ShifuPigServer(ExecType.MAPREDUCE); } } else { log.info("Pig ExecType: MAPREDUCE"); pigServer = new ShifuPigServer(ExecType.MAPREDUCE); } String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if(StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be added to container class pigServer.getPigContext().getProperties().put("hdp.version", hdpVersion); pigServer.getPigContext().addJar(HDPUtils.findContainingFile("hdfs-site.xml")); pigServer.getPigContext().addJar(HDPUtils.findContainingFile("core-site.xml")); pigServer.getPigContext().addJar(HDPUtils.findContainingFile("mapred-site.xml")); pigServer.getPigContext().addJar(HDPUtils.findContainingFile("yarn-site.xml")); } } else { log.info("ExecType: LOCAL"); pigServer = new ShifuPigServer(ExecType.LOCAL); } return pigServer; } /** * Check if tez version is ok to run. In hdp 2.4.0.2.1.2.0-402, with such error 'NoClassDefFoundError: * org/apache/tez/runtime/library/input/OrderedGroupedKVInput' * * @return if is tez running */ private boolean isTezRunnable() { boolean isTezRunnable = true; try { Class.forName("org.apache.tez.runtime.library.input.OrderedGroupedKVInput"); } catch (Throwable t) { isTezRunnable = false; } return isTezRunnable; } }