/** * Copyright (c) 2010 Yahoo! Inc. All rights reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. See accompanying LICENSE file. */ package org.apache.oozie.command.wf; import java.io.IOException; import java.io.StringReader; import java.net.URI; import java.net.URISyntaxException; import java.util.Date; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.DagELFunctions; import org.apache.oozie.WorkflowActionBean; import org.apache.oozie.WorkflowJobBean; import org.apache.oozie.action.ActionExecutor; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.client.WorkflowJob; import org.apache.oozie.command.CommandException; import org.apache.oozie.service.CallbackService; import org.apache.oozie.service.ELService; import org.apache.oozie.service.HadoopAccessorException; import org.apache.oozie.service.HadoopAccessorService; import org.apache.oozie.service.Services; import org.apache.oozie.store.StoreException; import org.apache.oozie.store.WorkflowStore; import org.apache.oozie.util.ELEvaluator; import org.apache.oozie.util.Instrumentation; import org.apache.oozie.util.XConfiguration; import org.apache.oozie.util.XLog; import org.apache.oozie.workflow.WorkflowException; import org.apache.oozie.workflow.WorkflowInstance; import org.apache.oozie.workflow.lite.LiteWorkflowInstance; /** * Base class for Action execution commands. Provides common functionality to handle different types of errors while * attempting to start or end an action. */ public abstract class ActionCommand<T> extends WorkflowCommand<Void> { private static final String INSTRUMENTATION_GROUP = "action.executors"; protected static final String INSTR_FAILED_JOBS_COUNTER = "failed"; protected static final String RECOVERY_ID_SEPARATOR = "@"; public ActionCommand(String name, String type, int priority) { super(name, type, priority, XLog.STD); } /** * Takes care of Transient failures. Sets the action status to retry and increments the retry count if not enough * attempts have been made. Otherwise returns false. * * @param context the execution context. * @param executor the executor instance being used. * @param status the status to be set for the action. * @return true if the action is scheduled for another retry. false if the number of retries has exceeded the * maximum number of configured retries. * @throws StoreException * @throws org.apache.oozie.command.CommandException */ protected boolean handleTransient(ActionExecutor.Context context, ActionExecutor executor, WorkflowAction.Status status) throws StoreException, CommandException { XLog.getLog(getClass()).debug("Attempting to retry"); ActionExecutorContext aContext = (ActionExecutorContext) context; WorkflowActionBean action = (WorkflowActionBean) aContext.getAction(); incrActionErrorCounter(action.getType(), "transient", 1); int actionRetryCount = action.getRetries(); if (actionRetryCount >= executor.getMaxRetries()) { XLog.getLog(getClass()).warn("Exceeded max retry count [{0}]. Suspending Job", executor.getMaxRetries()); return false; } else { action.setStatus(status); action.setPending(); action.incRetries(); long retryDelayMillis = executor.getRetryInterval() * 1000; action.setPendingAge(new Date(System.currentTimeMillis() + retryDelayMillis)); XLog.getLog(getClass()).info("Next Retry, Attempt Number [{0}] in [{1}] milliseconds", actionRetryCount + 1, retryDelayMillis); queueCallable(this, retryDelayMillis); return true; } } /** * Takes care of non transient failures. The job is suspended, and the state of the action is changed to *MANUAL * and set pending flag of action to false * * @param store WorkflowStore * @param context the execution context. * @param executor the executor instance being used. * @param status the status to be set for the action. * @throws StoreException * @throws CommandException */ protected void handleNonTransient(WorkflowStore store, ActionExecutor.Context context, ActionExecutor executor, WorkflowAction.Status status) throws StoreException, CommandException { ActionExecutorContext aContext = (ActionExecutorContext) context; WorkflowActionBean action = (WorkflowActionBean) aContext.getAction(); incrActionErrorCounter(action.getType(), "nontransient", 1); WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow(); String id = workflow.getId(); action.setStatus(status); action.resetPendingOnly(); XLog.getLog(getClass()).warn("Suspending Workflow Job id=" + id); try { SuspendCommand.suspendJob(store, workflow, id, action.getId()); } catch (WorkflowException e) { throw new CommandException(e); } } /** * Takes care of errors. </p> For errors while attempting to start the action, the job state is updated and an * {@link ActionEndCommand} is queued. </p> For errors while attempting to end the action, the job state is updated. * </p> * * @param context the execution context. * @param executor the executor instance being used. * @param message * @param isStart whether the error was generated while starting or ending an action. * @param status the status to be set for the action. * @throws org.apache.oozie.command.CommandException */ protected void handleError(ActionExecutor.Context context, ActionExecutor executor, String message, boolean isStart, WorkflowAction.Status status) throws CommandException { XLog.getLog(getClass()).warn("Setting Action Status to [{0}]", status); ActionExecutorContext aContext = (ActionExecutorContext) context; WorkflowActionBean action = (WorkflowActionBean) aContext.getAction(); incrActionErrorCounter(action.getType(), "error", 1); action.setPending(); if (isStart) { action.setExecutionData(message, null); queueCallable(new ActionEndCommand(action.getId(), action.getType())); } else { action.setEndData(status, WorkflowAction.Status.ERROR.toString()); } } public void failJob(ActionExecutor.Context context) throws CommandException { ActionExecutorContext aContext = (ActionExecutorContext) context; WorkflowActionBean action = (WorkflowActionBean) aContext.getAction(); incrActionErrorCounter(action.getType(), "failed", 1); WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow(); XLog.getLog(getClass()).warn("Failing Job due to failed action [{0}]", action.getName()); try { workflow.getWorkflowInstance().fail(action.getName()); WorkflowInstance wfInstance = workflow.getWorkflowInstance(); ((LiteWorkflowInstance) wfInstance).setStatus(WorkflowInstance.Status.FAILED); workflow.setWorkflowInstance(wfInstance); workflow.setStatus(WorkflowJob.Status.FAILED); action.setStatus(WorkflowAction.Status.FAILED); action.resetPending(); queueCallable(new NotificationCommand(workflow, action)); queueCallable(new KillCommand(workflow.getId())); incrJobCounter(INSTR_FAILED_JOBS_COUNTER, 1); } catch (WorkflowException ex) { throw new CommandException(ex); } } private void incrActionErrorCounter(String type, String error, int count) { getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#ex." + error, count); } protected void incrActionCounter(String type, int count) { getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#" + getName(), count); } protected void addActionCron(String type, Instrumentation.Cron cron) { getInstrumentation().addCron(INSTRUMENTATION_GROUP, type + "#" + getName(), cron); } public static class ActionExecutorContext implements ActionExecutor.Context { private WorkflowJobBean workflow; private Configuration protoConf; private WorkflowActionBean action; private boolean isRetry; private boolean started; private boolean ended; private boolean executed; public ActionExecutorContext(WorkflowJobBean workflow, WorkflowActionBean action, boolean isRetry) { this.workflow = workflow; this.action = action; this.isRetry = isRetry; try { protoConf = new XConfiguration(new StringReader(workflow.getProtoActionConf())); } catch (IOException ex) { throw new RuntimeException("It should not happen", ex); } } public String getCallbackUrl(String externalStatusVar) { return Services.get().get(CallbackService.class).createCallBackUrl(action.getId(), externalStatusVar); } public Configuration getProtoActionConf() { return protoConf; } public WorkflowJob getWorkflow() { return workflow; } public WorkflowAction getAction() { return action; } public ELEvaluator getELEvaluator() { ELEvaluator evaluator = Services.get().get(ELService.class).createEvaluator("workflow"); DagELFunctions.configureEvaluator(evaluator, workflow, action); return evaluator; } public void setVar(String name, String value) { name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name; WorkflowInstance wfInstance = workflow.getWorkflowInstance(); wfInstance.setVar(name, value); //workflow.getWorkflowInstance().setVar(name, value); workflow.setWorkflowInstance(wfInstance); } public String getVar(String name) { name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name; return workflow.getWorkflowInstance().getVar(name); } public void setStartData(String externalId, String trackerUri, String consoleUrl) { action.setStartData(externalId, trackerUri, consoleUrl); started = true; } public void setExecutionData(String externalStatus, Properties actionData) { action.setExecutionData(externalStatus, actionData); executed = true; } public void setEndData(WorkflowAction.Status status, String signalValue) { action.setEndData(status, signalValue); ended = true; } public boolean isRetry() { return isRetry; } /** * Returns whether setStartData has been called or not. * * @return true if start completion info has been set. */ public boolean isStarted() { return started; } /** * Returns whether setExecutionData has been called or not. * * @return true if execution completion info has been set, otherwise false. */ public boolean isExecuted() { return executed; } /** * Returns whether setEndData has been called or not. * * @return true if end completion info has been set. */ public boolean isEnded() { return ended; } public void setExternalStatus(String externalStatus) { action.setExternalStatus(externalStatus); } @Override public String getRecoveryId() { return action.getId() + RECOVERY_ID_SEPARATOR + workflow.getRun(); } /* (non-Javadoc) * @see org.apache.oozie.action.ActionExecutor.Context#getActionDir() */ public Path getActionDir() throws HadoopAccessorException, IOException, URISyntaxException { String name = getWorkflow().getId() + "/" + action.getName() + "--" + action.getType(); FileSystem fs = getAppFileSystem(); String actionDirPath = Services.get().getSystemId() + "/" + name; Path fqActionDir = new Path(fs.getHomeDirectory(), actionDirPath); return fqActionDir; } /* (non-Javadoc) * @see org.apache.oozie.action.ActionExecutor.Context#getAppFileSystem() */ public FileSystem getAppFileSystem() throws HadoopAccessorException, IOException, URISyntaxException { WorkflowJob workflow = getWorkflow(); XConfiguration jobConf = new XConfiguration(new StringReader(workflow.getConf())); Configuration fsConf = new Configuration(); XConfiguration.copy(jobConf, fsConf); return Services.get().get(HadoopAccessorService.class).createFileSystem(workflow.getUser(), workflow.getGroup(), new URI(getWorkflow().getAppPath()), fsConf); } @Override public void setErrorInfo(String str, String exMsg) { action.setErrorInfo(str, exMsg); } } }