/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.yaess.basic; import java.io.IOException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.asakusafw.yaess.core.Blob; import com.asakusafw.yaess.core.ExecutionContext; import com.asakusafw.yaess.core.ExecutionMonitor; import com.asakusafw.yaess.core.ExecutionScript; import com.asakusafw.yaess.core.ExecutionScriptHandlerBase; import com.asakusafw.yaess.core.HadoopScript; import com.asakusafw.yaess.core.HadoopScriptHandler; import com.asakusafw.yaess.core.Job; import com.asakusafw.yaess.core.ServiceProfile; import com.asakusafw.yaess.core.YaessLogger; import com.asakusafw.yaess.core.util.HadoopScriptUtil; /** * An abstract implementations of process-based {@link HadoopScriptHandler}. * This handler just launches a command with following arguments in its tail. * <ol> * <li> {@link HadoopScript#getClassName() class name} </li> * <li> {@link ExecutionContext#getBatchId() batch-id} </li> * <li> {@link ExecutionContext#getFlowId() flow-id} </li> * <li> {@link ExecutionContext#getExecutionId() execution-id} </li> * <li> {@link ExecutionContext#getArgumentsAsString() batch-arguments} </li> * <li> {@link HadoopScript#getHadoopProperties() hadoop properties (with "-D")} </li> * </ol> * Additionally, the handler lanuches a command if {@code hadoop.cleanup} is true. * <ol> * <li> {@link #CLEANUP_STAGE_CLASS} </li> * <li> {@link ExecutionContext#getBatchId() batch-id} </li> * <li> {@link ExecutionContext#getFlowId() flow-id} </li> * <li> {@link ExecutionContext#getExecutionId() execution-id} </li> * <li> {@link ExecutionContext#getArgumentsAsString() batch-arguments} </li> * <li> hadoop properties (with "-D") </li> * </ol> * * <h3> Profile format </h3> <pre><code> # <position> = 0, 1, 2, ... # <prefix command token> can contain "@[position]," # this will be replaced as original command tokens (0-origin position) hadoop = <this class name> hadoop.env.ASAKUSA_HOME = ${ASAKUSA_HOME} hadoop.command.<position> = $<prefix command token> hadoop.cleanup = whether enables cleanup hadoop.env.<key> = $<extra environment variables> hadoop.prop.<key> = $<extra Hadoop properties> </code></pre> * @since 0.2.3 * @version 0.5.0 */ public abstract class ProcessHadoopScriptHandler extends ExecutionScriptHandlerBase implements HadoopScriptHandler { static final YaessLogger YSLOG = new YaessBasicLogger(ProcessHadoopScriptHandler.class); static final Logger LOG = LoggerFactory.getLogger(ProcessHadoopScriptHandler.class); /** * The class name of cleanup stage client. * @since 0.4.0 */ public static final String CLEANUP_STAGE_CLASS = HadoopScriptUtil.CLEANUP_STAGE_CLASS; /** * (sub) key name of working directory. * @deprecated cleanup is obsoleted */ @Deprecated public static final String KEY_WORKING_DIRECTORY = "workingDirectory"; /** * (sub) key name of cleanup enabled. */ public static final String KEY_CLEANUP = "cleanup"; /** * The path to the Hadoop execution executable file (relative path from Asakusa home). */ public static final String PATH_EXECUTE = "yaess-hadoop/libexec/hadoop-execute.sh"; /** * Variable name of batch ID. */ public static final String VAR_BATCH_ID = "batch_id"; /** * Variable name of flow ID. */ public static final String VAR_FLOW_ID = "flow_id"; /** * Variable name of execution ID. */ public static final String VAR_EXECUTION_ID = "execution_id"; private volatile ServiceProfile<?> currentProfile; private volatile List<String> commandPrefix; private boolean cleanup; @Override protected final void doConfigure( ServiceProfile<?> profile, Map<String, String> desiredProperties, Map<String, String> desiredEnvironmentVariables) throws InterruptedException, IOException { this.currentProfile = profile; this.commandPrefix = extractCommand(profile, ProcessUtil.PREFIX_COMMAND); this.cleanup = extractBoolean(profile, KEY_CLEANUP, true); checkCleanupConfigurations(profile); configureExtension(profile); } private void checkCleanupConfigurations(ServiceProfile<?> profile) throws IOException { assert profile != null; String workingDirectory = profile.getConfiguration().get(KEY_WORKING_DIRECTORY); if (workingDirectory != null) { YSLOG.warn("W10001", profile.getPrefix(), KEY_WORKING_DIRECTORY, KEY_CLEANUP); } List<String> cleanupPrefix = extractCommand(profile, ProcessUtil.PREFIX_CLEANUP); if (cleanupPrefix.isEmpty() == false) { YSLOG.warn("W10001", profile.getPrefix(), ProcessUtil.PREFIX_CLEANUP + "*", KEY_CLEANUP); } } private List<String> extractCommand(ServiceProfile<?> profile, String prefix) throws IOException { try { return ProcessUtil.extractCommandLineTokens( prefix, profile.getConfiguration(), profile.getContext().getContextParameters()); } catch (IllegalArgumentException e) { throw new IOException(MessageFormat.format( "Failed to resolve command line tokens ({0})", profile.getPrefix() + '.' + prefix + '*'), e); } } private boolean extractBoolean(ServiceProfile<?> profile, String key, boolean defaultValue) throws IOException { assert profile != null; assert key != null; String string = profile.getConfiguration(key, false, true); if (string == null) { return defaultValue; } string = string.trim(); if (string.isEmpty()) { return defaultValue; } try { return Boolean.parseBoolean(string); } catch (RuntimeException e) { throw new IOException(MessageFormat.format( "Failed to resolve boolean value ({0}={1})", profile.getPrefix() + '.' + key, string), e); } } /** * Configures this handler internally (extension point). * @param profile the profile of this service * @throws InterruptedException if interrupted in configuration * @throws IOException if failed to configure this service */ protected abstract void configureExtension(ServiceProfile<?> profile) throws InterruptedException, IOException; /** * Returns command executor for this handler (extension point). * @return command executor */ protected abstract ProcessExecutor getCommandExecutor(); @Override public final void execute( ExecutionMonitor monitor, ExecutionContext context, HadoopScript script) throws InterruptedException, IOException { monitor.open(1); try { execute0(monitor, context, script); } finally { monitor.close(); } } @Override public void cleanUp( ExecutionMonitor monitor, ExecutionContext context) throws InterruptedException, IOException { monitor.open(1); try { if (cleanup) { YSLOG.info("I51001", context.getBatchId(), context.getFlowId(), context.getExecutionId(), getHandlerId()); HadoopScript script = new HadoopScript( context.getPhase().getSymbol(), Collections.emptySet(), CLEANUP_STAGE_CLASS, Collections.emptyMap(), Collections.emptyMap()); execute0(monitor, context, script); } else { YSLOG.info("I51002", context.getBatchId(), context.getFlowId(), context.getExecutionId(), getHandlerId()); } } finally { monitor.close(); } } private void execute0( ExecutionMonitor monitor, ExecutionContext context, HadoopScript script) throws InterruptedException, IOException { assert monitor != null; assert context != null; assert script != null; Map<String, String> env = buildEnvironmentVariables(context, script); LOG.debug("env: {}", env); List<String> original = buildExecutionCommand(context, script); List<String> command; try { command = ProcessUtil.buildCommand(commandPrefix, original, Collections.emptyList()); } catch (IllegalArgumentException e) { throw new IOException(MessageFormat.format( "Failed to build command: " + "{6} (batch={0}, flow={1}, phase={2}, stage={4}, execution={3})", context.getBatchId(), context.getFlowId(), context.getPhase(), context.getExecutionId(), script.getId(), currentProfile.getPrefix(), original), e); } LOG.debug("command: {}", command); Map<String, Blob> extensions = BlobUtil.getExtensions(context, script); LOG.debug("extensions: {}", extensions); monitor.checkCancelled(); ProcessExecutor executor = getCommandExecutor(); int exit = executor.execute(context, command, env, extensions, monitor.getOutput()); if (exit == 0) { return; } throw new ExitCodeException(MessageFormat.format( "Unexpected exit code from Hadoop job: " + "code={5} (batch={0}, flow={1}, phase={2}, stage={4}, exection={3})", context.getBatchId(), context.getFlowId(), context.getPhase(), context.getExecutionId(), script.getId(), String.valueOf(exit)), exit); } private Map<String, String> buildEnvironmentVariables( ExecutionContext context, ExecutionScript script) throws InterruptedException, IOException { assert script != null; Map<String, String> env = new HashMap<>(); env.putAll(getEnvironmentVariables(context, script)); env.putAll(context.getEnvironmentVariables()); env.putAll(script.getEnvironmentVariables()); return env; } private List<String> buildExecutionCommand( ExecutionContext context, HadoopScript script) throws IOException, InterruptedException { assert context != null; assert script != null; List<String> command = new ArrayList<>(); command.add(getCommand(context, PATH_EXECUTE, script)); command.add(script.getClassName()); command.add(context.getBatchId()); command.add(context.getFlowId()); command.add(context.getExecutionId()); command.add(context.getArgumentsAsString()); Map<String, String> props = buildHadoopProperties(context, script); for (Map.Entry<String, String> entry : props.entrySet()) { command.add("-D"); command.add(MessageFormat.format("{0}={1}", entry.getKey(), entry.getValue())); } return command; } private Map<String, String> buildHadoopProperties( ExecutionContext context, HadoopScript script) throws InterruptedException, IOException { assert context != null; assert script != null; Map<String, String> props = new TreeMap<>(); props.putAll(getProperties(context, script)); props.putAll(script.getHadoopProperties()); props.put(HadoopScriptUtil.PROP_TRACKING_ID, Job.computeTrackingId(context, script)); return props; } private String getCommand( ExecutionContext context, String command, HadoopScript script) throws IOException, InterruptedException { assert command != null; Map<String, String> variables; if (script != null) { variables = buildEnvironmentVariables(context, script); } else { variables = getEnvironmentVariables(context, null); } String home = variables.get(ExecutionScript.ENV_ASAKUSA_HOME); if (home == null) { throw new IOException(MessageFormat.format( "Asakusa installation path is not known: {0}", currentProfile.getPrefix() + '.' + KEY_ENV_PREFIX + ExecutionScript.ENV_ASAKUSA_HOME)); } if (home.endsWith(getPathSegmentSeparator())) { return home + command; } else { return home + getPathSegmentSeparator() + command; } } /** * Returns the path segment separator. * @return the path segment separator string */ protected String getPathSegmentSeparator() { return "/"; } }