/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.oozie;
import org.apache.commons.io.IOUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.oozie.feed.FSReplicationWorkflowBuilder;
import org.apache.falcon.oozie.feed.FeedRetentionWorkflowBuilder;
import org.apache.falcon.oozie.feed.HCatReplicationWorkflowBuilder;
import org.apache.falcon.oozie.process.HiveProcessWorkflowBuilder;
import org.apache.falcon.oozie.process.OozieProcessWorkflowBuilder;
import org.apache.falcon.oozie.process.PigProcessWorkflowBuilder;
import org.apache.falcon.oozie.workflow.ACTION;
import org.apache.falcon.oozie.workflow.CREDENTIAL;
import org.apache.falcon.oozie.workflow.CREDENTIALS;
import org.apache.falcon.oozie.workflow.END;
import org.apache.falcon.oozie.workflow.KILL;
import org.apache.falcon.oozie.workflow.START;
import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
import org.apache.falcon.security.SecurityUtil;
import org.apache.falcon.util.OozieUtils;
import org.apache.falcon.util.RuntimeProperties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
/**
* Base class for building orchestration workflow in oozie.
* @param <T>
*/
public abstract class OozieOrchestrationWorkflowBuilder<T extends Entity> extends OozieEntityBuilder<T> {
protected static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth";
protected static final String USER_ACTION_NAME = "user-action";
protected static final String PREPROCESS_ACTION_NAME = "pre-processing";
protected static final String SUCCESS_POSTPROCESS_ACTION_NAME = "succeeded-post-processing";
protected static final String FAIL_POSTPROCESS_ACTION_NAME = "failed-post-processing";
protected static final String OK_ACTION_NAME = "end";
protected static final String FAIL_ACTION_NAME = "fail";
private static final String POSTPROCESS_TEMPLATE = "/action/post-process.xml";
private static final String PREPROCESS_TEMPLATE = "/action/pre-process.xml";
public static final Set<String> FALCON_ACTIONS = new HashSet<String>(Arrays.asList(
new String[]{PREPROCESS_ACTION_NAME, SUCCESS_POSTPROCESS_ACTION_NAME, FAIL_POSTPROCESS_ACTION_NAME, }));
private final Tag lifecycle;
public OozieOrchestrationWorkflowBuilder(T entity, Tag lifecycle) {
super(entity);
this.lifecycle = lifecycle;
}
public static final OozieOrchestrationWorkflowBuilder get(Entity entity, Cluster cluster, Tag lifecycle)
throws FalconException {
switch (entity.getEntityType()) {
case FEED:
Feed feed = (Feed) entity;
switch (lifecycle) {
case RETENTION:
return new FeedRetentionWorkflowBuilder(feed);
case REPLICATION:
boolean isTable = EntityUtil.isTableStorageType(cluster, feed);
if (isTable) {
return new HCatReplicationWorkflowBuilder(feed);
} else {
return new FSReplicationWorkflowBuilder(feed);
}
default:
throw new IllegalArgumentException("Unhandled type " + entity.getEntityType()
+ ", lifecycle " + lifecycle);
}
case PROCESS:
Process process = (Process) entity;
switch (process.getWorkflow().getEngine()) {
case PIG:
return new PigProcessWorkflowBuilder(process);
case OOZIE:
return new OozieProcessWorkflowBuilder(process);
case HIVE:
return new HiveProcessWorkflowBuilder(process);
default:
break;
}
default:
}
throw new IllegalArgumentException("Unhandled type " + entity.getEntityType() + ", lifecycle " + lifecycle);
}
protected void addTransition(ACTION action, String ok, String fail) {
action.getOk().setTo(ok);
action.getError().setTo(fail);
}
protected void decorateWorkflow(WORKFLOWAPP wf, String name, String startAction) {
wf.setName(name);
wf.setStart(new START());
wf.getStart().setTo(startAction);
wf.setEnd(new END());
wf.getEnd().setName(OK_ACTION_NAME);
KILL kill = new KILL();
kill.setName(FAIL_ACTION_NAME);
kill.setMessage("Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]");
wf.getDecisionOrForkOrJoin().add(kill);
}
protected ACTION getSuccessPostProcessAction() throws FalconException {
ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE);
decorateWithOozieRetries(action);
return action;
}
protected ACTION getFailPostProcessAction() throws FalconException {
ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE);
decorateWithOozieRetries(action);
action.setName(FAIL_POSTPROCESS_ACTION_NAME);
return action;
}
protected ACTION getPreProcessingAction(boolean isTableStorageType, Tag tag) throws FalconException {
ACTION action = unmarshalAction(PREPROCESS_TEMPLATE);
decorateWithOozieRetries(action);
if (isTableStorageType) {
// adds hive-site.xml in actions classpath
action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
}
List<String> args = action.getJava().getArg();
args.add("-out");
if (tag == Tag.REPLICATION) {
args.add("${logDir}/latedata/${nominalTime}/${srcClusterName}");
} else {
args.add("${logDir}/latedata/${nominalTime}");
}
return action;
}
protected Path marshal(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException {
return marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow),
OozieUtils.WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml"));
}
protected WORKFLOWAPP unmarshal(String template) throws FalconException {
return unmarshal(template, OozieUtils.WORKFLOW_JAXB_CONTEXT, WORKFLOWAPP.class);
}
protected ACTION unmarshalAction(String template) throws FalconException {
return unmarshal(template, OozieUtils.ACTION_JAXB_CONTEXT, ACTION.class);
}
protected boolean shouldPreProcess() throws FalconException {
if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null
|| EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) {
return false;
}
return true;
}
protected void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, Tag tag) throws FalconException {
String libext = ClusterHelper.getLocation(cluster, "working") + "/libext";
FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
try {
addExtensionJars(fs, new Path(libext), wf);
addExtensionJars(fs, new Path(libext, entity.getEntityType().name()), wf);
if (tag != null) {
addExtensionJars(fs, new Path(libext, entity.getEntityType().name() + "/" + tag.name().toLowerCase()),
wf);
}
} catch (IOException e) {
throw new FalconException(e);
}
}
private void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException {
FileStatus[] libs = null;
try {
libs = fs.listStatus(path);
} catch (FileNotFoundException ignore) {
//Ok if the libext is not configured
}
if (libs == null) {
return;
}
for (FileStatus lib : libs) {
if (lib.isDir()) {
continue;
}
for (Object obj : wf.getDecisionOrForkOrJoin()) {
if (!(obj instanceof ACTION)) {
continue;
}
ACTION action = (ACTION) obj;
List<String> files = null;
if (action.getJava() != null) {
files = action.getJava().getFile();
} else if (action.getPig() != null) {
files = action.getPig().getFile();
} else if (action.getMapReduce() != null) {
files = action.getMapReduce().getFile();
}
if (files != null) {
files.add(lib.getPath().toString());
}
}
}
}
// creates hive-site.xml configuration in conf dir for the given cluster on the same cluster.
protected void createHiveConfiguration(Cluster cluster, Path workflowPath, String prefix) throws FalconException {
Configuration hiveConf = getHiveCredentialsAsConf(cluster);
try {
Configuration conf = ClusterHelper.getConfiguration(cluster);
FileSystem fs = HadoopClientFactory.get().createFileSystem(conf);
// create hive conf to stagingDir
Path confPath = new Path(workflowPath + "/conf");
persistHiveConfiguration(fs, confPath, hiveConf, prefix);
} catch (IOException e) {
throw new FalconException("Unable to create create hive site", e);
}
}
private void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf, String prefix)
throws IOException {
OutputStream out = null;
try {
out = fs.create(new Path(confPath, prefix + "hive-site.xml"));
hiveConf.writeXml(out);
} finally {
IOUtils.closeQuietly(out);
}
}
/**
* This is only necessary if table is involved and is secure mode.
*
* @param workflowApp workflow xml
* @param cluster cluster entity
*/
protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName) {
CREDENTIALS credentials = workflowApp.getCredentials();
if (credentials == null) {
credentials = new CREDENTIALS();
}
credentials.getCredential().add(createHCatalogCredential(cluster, credentialName));
// add credential for workflow
workflowApp.setCredentials(credentials);
}
/**
* This is only necessary if table is involved and is secure mode.
*
* @param workflowApp workflow xml
* @param cluster cluster entity
*/
protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName,
Set<String> actions) {
addHCatalogCredentials(workflowApp, cluster, credentialName);
// add credential to each action
for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
if (!(object instanceof ACTION)) {
continue;
}
ACTION action = (ACTION) object;
String actionName = action.getName();
if (actions.contains(actionName)) {
action.setCred(credentialName);
}
}
}
/**
* This is only necessary if table is involved and is secure mode.
*
* @param cluster cluster entity
* @param credentialName credential name
* @return CREDENTIALS object
*/
private CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) {
final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
CREDENTIAL credential = new CREDENTIAL();
credential.setName(credentialName);
credential.setType("hcat");
credential.getProperty().add(createProperty("hcat.metastore.uri", metaStoreUrl));
credential.getProperty().add(createProperty("hcat.metastore.principal",
ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL)));
return credential;
}
private CREDENTIAL.Property createProperty(String name, String value) {
CREDENTIAL.Property property = new CREDENTIAL.Property();
property.setName(name);
property.setValue(value);
return property;
}
protected void decorateWithOozieRetries(ACTION action) {
Properties props = RuntimeProperties.get();
action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3"));
action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1"));
}
}