/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.workflow;
import org.apache.commons.cli.*;
import org.apache.commons.lang.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.logging.LogMover;
import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
import org.apache.falcon.messaging.MessageProducer;
import org.apache.falcon.metadata.LineageArgs;
import org.apache.falcon.metadata.LineageRecorder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/**
* Utility called by oozie workflow engine post workflow execution in parent workflow.
*/
public class FalconPostProcessing extends Configured implements Tool {
private static final Logger LOG = LoggerFactory.getLogger(FalconPostProcessing.class);
/**
* Args that the utility understands.
*/
public enum Arg {
CLUSTER("cluster", "name of the current cluster"),
ENTITY_TYPE("entityType", "type of the entity"),
ENTITY_NAME("entityName", "name of the entity"),
NOMINAL_TIME("nominalTime", "instance time"),
OPERATION("operation", "operation like generate, delete, replicate"),
WORKFLOW_ID("workflowId", "current workflow-id of the instance"),
RUN_ID("runId", "current run-id of the instance"),
STATUS("status", "status of the user workflow instance"),
TIMESTAMP("timeStamp", "current timestamp"),
TOPIC_NAME("topicName", "name of the topic to be used to send JMS message"),
BRKR_IMPL_CLASS("brokerImplClass", "falcon message broker Implementation class"),
BRKR_URL("brokerUrl", "falcon message broker url"),
USER_BRKR_IMPL_CLASS("userBrokerImplClass", "user broker Impl class"),
USER_BRKR_URL("userBrokerUrl", "user broker url"),
BRKR_TTL("brokerTTL", "time to live for broker message in sec"),
FEED_NAMES("feedNames", "name of the feeds which are generated/replicated/deleted"),
FEED_INSTANCE_PATHS("feedInstancePaths", "comma separated feed instance paths"),
LOG_FILE("logFile", "log file path where feeds to be deleted are recorded"),
WF_ENGINE_URL("workflowEngineUrl", "url of workflow engine server, ex:oozie"),
USER_SUBFLOW_ID("subflowId", "external id of user workflow"),
USER_WORKFLOW_ENGINE("userWorkflowEngine", "user workflow engine type"),
USER_WORKFLOW_NAME("userWorkflowName", "user workflow name"),
USER_WORKFLOW_VERSION("userWorkflowVersion", "user workflow version"),
LOG_DIR("logDir", "log dir where job logs are copied"),
WORKFLOW_USER("workflowUser", "user who owns the feed instance (partition)"),
INPUT_FEED_NAMES("falconInputFeeds", "name of the feeds which are used as inputs"),
INPUT_FEED_PATHS("falconInputPaths", "comma separated input feed instance paths");
private String name;
private String description;
Arg(String name, String description) {
this.name = name;
this.description = description;
}
public Option getOption() {
return new Option(this.name, true, this.description);
}
public String getOptionName() {
return this.name;
}
public String getOptionValue(CommandLine cmd) {
return cmd.getOptionValue(this.name);
}
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(), new FalconPostProcessing(), args);
}
@Override
public int run(String[] args) throws Exception {
CommandLine cmd = getCommand(args);
LOG.info("Sending user message {}", cmd);
invokeUserMessageProducer(cmd);
if ("SUCCEEDED".equals(Arg.STATUS.getOptionValue(cmd))) {
LOG.info("Recording lineage for {}", cmd);
recordLineageMetadata(cmd);
}
//LogMover doesn't throw exception, a failed log mover will not fail the user workflow
LOG.info("Moving logs {}", cmd);
invokeLogProducer(cmd);
LOG.info("Sending falcon message {}", cmd);
invokeFalconMessageProducer(cmd);
//delete the instancePaths file
Path logFile = new Path(cmd.getOptionValue(ARG.logFile.getArgName()));
FileSystem fs = logFile.getFileSystem(new Configuration());
if (fs.exists(logFile) && !fs.delete(logFile, true)) {
throw new FalconException("failed to delete " + logFile);
}
return 0;
}
private void invokeUserMessageProducer(CommandLine cmd) throws Exception {
List<String> args = new ArrayList<String>();
addArg(args, cmd, Arg.CLUSTER);
addArg(args, cmd, Arg.ENTITY_TYPE);
addArg(args, cmd, Arg.ENTITY_NAME);
addArg(args, cmd, Arg.NOMINAL_TIME);
addArg(args, cmd, Arg.OPERATION);
addArg(args, cmd, Arg.WORKFLOW_ID);
addArg(args, cmd, Arg.RUN_ID);
addArg(args, cmd, Arg.STATUS);
addArg(args, cmd, Arg.TIMESTAMP);
//special args for user JMS message producer
args.add("-" + Arg.TOPIC_NAME.getOptionName()); //user topic
args.add("FALCON." + Arg.ENTITY_NAME.getOptionValue(cmd));
//note, the user broker impl class arg name to MessageProducer is brokerImplClass
args.add("-" + Arg.BRKR_IMPL_CLASS.getOptionName());
args.add(Arg.USER_BRKR_IMPL_CLASS.getOptionValue(cmd));
args.add("-" + Arg.BRKR_URL.getOptionName());
args.add(Arg.USER_BRKR_URL.getOptionValue(cmd));
addArg(args, cmd, Arg.BRKR_TTL);
addArg(args, cmd, Arg.FEED_NAMES);
addArg(args, cmd, Arg.FEED_INSTANCE_PATHS);
addArg(args, cmd, Arg.LOG_FILE);
MessageProducer.main(args.toArray(new String[0]));
}
private void invokeFalconMessageProducer(CommandLine cmd) throws Exception {
List<String> args = new ArrayList<String>();
addArg(args, cmd, Arg.CLUSTER);
addArg(args, cmd, Arg.ENTITY_TYPE);
addArg(args, cmd, Arg.ENTITY_NAME);
addArg(args, cmd, Arg.NOMINAL_TIME);
addArg(args, cmd, Arg.OPERATION);
addArg(args, cmd, Arg.WORKFLOW_ID);
addArg(args, cmd, Arg.RUN_ID);
addArg(args, cmd, Arg.STATUS);
addArg(args, cmd, Arg.TIMESTAMP);
//special args Falcon JMS message producer
args.add("-" + Arg.TOPIC_NAME.getOptionName());
args.add("FALCON.ENTITY.TOPIC");
args.add("-" + Arg.BRKR_IMPL_CLASS.getOptionName());
args.add(Arg.BRKR_IMPL_CLASS.getOptionValue(cmd));
args.add("-" + Arg.BRKR_URL.getOptionName());
args.add(Arg.BRKR_URL.getOptionValue(cmd));
addArg(args, cmd, Arg.BRKR_TTL);
addArg(args, cmd, Arg.FEED_NAMES);
addArg(args, cmd, Arg.FEED_INSTANCE_PATHS);
addArg(args, cmd, Arg.LOG_FILE);
addArg(args, cmd, Arg.WORKFLOW_USER);
addArg(args, cmd, Arg.LOG_DIR);
MessageProducer.main(args.toArray(new String[0]));
}
private void invokeLogProducer(CommandLine cmd) throws Exception {
// todo: need to move this out to Falcon in-process
if (UserGroupInformation.isSecurityEnabled()) {
LOG.info("Unable to move logs as security is enabled.");
return;
}
List<String> args = new ArrayList<String>();
addArg(args, cmd, Arg.WF_ENGINE_URL);
addArg(args, cmd, Arg.ENTITY_TYPE);
addArg(args, cmd, Arg.USER_SUBFLOW_ID);
addArg(args, cmd, Arg.USER_WORKFLOW_ENGINE);
addArg(args, cmd, Arg.RUN_ID);
addArg(args, cmd, Arg.LOG_DIR);
addArg(args, cmd, Arg.STATUS);
LogMover.main(args.toArray(new String[0]));
}
private void recordLineageMetadata(CommandLine cmd) throws Exception {
List<String> args = new ArrayList<String>();
for (LineageArgs arg : LineageArgs.values()) {
if (StringUtils.isNotEmpty(arg.getOptionValue(cmd))) {
args.add("-" + arg.getOptionName());
args.add(arg.getOptionValue(cmd));
}
}
LineageRecorder.main(args.toArray(new String[args.size()]));
}
private void addArg(List<String> args, CommandLine cmd, Arg arg) {
if (StringUtils.isNotEmpty(arg.getOptionValue(cmd))) {
args.add("-" + arg.getOptionName());
args.add(arg.getOptionValue(cmd));
}
}
private static CommandLine getCommand(String[] arguments)
throws ParseException {
Options options = new Options();
addOption(options, Arg.CLUSTER);
addOption(options, Arg.ENTITY_TYPE);
addOption(options, Arg.ENTITY_NAME);
addOption(options, Arg.NOMINAL_TIME);
addOption(options, Arg.OPERATION);
addOption(options, Arg.WORKFLOW_ID);
addOption(options, Arg.RUN_ID);
addOption(options, Arg.STATUS);
addOption(options, Arg.TIMESTAMP);
addOption(options, Arg.BRKR_IMPL_CLASS);
addOption(options, Arg.BRKR_URL);
addOption(options, Arg.USER_BRKR_IMPL_CLASS);
addOption(options, Arg.USER_BRKR_URL);
addOption(options, Arg.BRKR_TTL);
addOption(options, Arg.FEED_NAMES);
addOption(options, Arg.FEED_INSTANCE_PATHS);
addOption(options, Arg.LOG_FILE);
addOption(options, Arg.WF_ENGINE_URL);
addOption(options, Arg.USER_SUBFLOW_ID);
addOption(options, Arg.USER_WORKFLOW_NAME, false);
addOption(options, Arg.USER_WORKFLOW_VERSION, false);
addOption(options, Arg.USER_WORKFLOW_ENGINE, false);
addOption(options, Arg.LOG_DIR);
addOption(options, Arg.WORKFLOW_USER);
addOption(options, Arg.INPUT_FEED_NAMES, false);
addOption(options, Arg.INPUT_FEED_PATHS, false);
return new GnuParser().parse(options, arguments);
}
private static void addOption(Options options, Arg arg) {
addOption(options, arg, true);
}
private static void addOption(Options options, Arg arg, boolean isRequired) {
Option option = arg.getOption();
option.setRequired(isRequired);
options.addOption(option);
}
}