/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.aws; import java.io.File; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Optional; import com.google.common.base.Splitter; import gobblin.annotation.Alpha; import gobblin.cluster.GobblinClusterConfigurationKeys; import gobblin.util.JvmUtils; import static gobblin.aws.GobblinAWSUtils.encodeBase64; /** * Class to generate script for launching Gobblin cluster master and workers via cloud-init * on EC2 instance boot up. * * @author Abhishek Tiwari */ @Alpha public class CloudInitScriptBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(CloudInitScriptBuilder.class); private static final Splitter SPLITTER = Splitter.on(",").trimResults().omitEmptyStrings(); private static final String STDOUT = "stdout"; private static final String STDERR = "stderr"; private static final String NFS_SHARE_ALL_IPS = "*"; private static final String NFS_SHARE_DEFAULT_OPTS = "rw,sync,no_subtree_check,fsid=1,no_root_squash"; private static final String NFS_CONF_FILE = "/etc/exports"; private static final String NFS_SERVER_INSTALL_CMD = "yum install nfs-utils nfs-utils-lib"; private static final String NFS_SERVER_START_CMD = "/etc/init.d/nfs start"; private static final String NFS_EXPORT_FS_CMD = "exportfs -a"; private static final String NFS_TYPE_4 = "nfs4"; /*** * This method generates the script that would be executed by cloud-init module in EC2 instance * upon boot up for {@link GobblinAWSClusterManager}. * * This will generate cloud init shell script that does the following: * 1. Mount NFS Server (TODO: To be replaced with EFS soon) * 2. Create all prerequisite directories * 3. Download cluster configuration from S3 * 4. Download Gobblin application jars from S3 (TODO: To be replaced via baked in jars in custom Gobblin AMI) * 5. Download Gobblin custom jars from S3 * 6. Launch {@link GobblinAWSClusterManager} java application * 7. TODO: Add cron that watches the {@link GobblinAWSClusterManager} application and restarts it if it dies * * @param clusterName Name of the cluster * @param nfsParentDir Directory within which NFS directory should be created and mounted * @param sinkLogRootDir Log sink root directory * @param awsConfDir Directory to save downloaded Gobblin cluster configuration files * @param appWorkDir Gobblin application work directory * @param masterS3ConfUri S3 URI to download cluster configuration files from * @param masterS3ConfFiles Comma separated list of configuration files to download from masterS3ConfUri * @param masterS3JarsUri S3 URI to download Gobblin jar files from * @param masterS3JarsFiles Comma separated list of jar files to download from masterS3JarUri * @param masterJarsDir Directory to save downloaded Gobblin jar files * @param masterJvmMemory Xmx memory setting for Gobblin master java application * @param masterJvmArgs JVM arguments for Gobblin master application * @param gobblinVersion Optional Gobblin version * @return Cloud-init script to launch {@link GobblinAWSClusterManager} */ public static String buildClusterMasterCommand(String clusterName, String nfsParentDir, String sinkLogRootDir, String awsConfDir, String appWorkDir, String masterS3ConfUri, String masterS3ConfFiles, String masterS3JarsUri, String masterS3JarsFiles, String masterJarsDir, String masterJvmMemory, Optional<String> masterJvmArgs, Optional<String> gobblinVersion) { final StringBuilder cloudInitCmds = new StringBuilder().append("#!/bin/bash").append("\n"); final String clusterMasterClassName = GobblinAWSClusterManager.class.getSimpleName(); // Create NFS server // TODO: Replace with EFS (it went into GA on 6/30/2016) // Note: Until EFS availability, ClusterMaster is SPOF because we loose NFS when it's relaunched / replaced // .. this can be worked around, but would be an un-necessary work final String nfsDir = nfsParentDir + clusterName; final String nfsShareDirCmd = String.format("echo '%s %s(%s)' | tee --append %s", nfsDir, NFS_SHARE_ALL_IPS, NFS_SHARE_DEFAULT_OPTS, NFS_CONF_FILE); cloudInitCmds.append("mkdir -p ").append(nfsDir).append(File.separator).append("1").append("\n"); cloudInitCmds.append(NFS_SERVER_INSTALL_CMD).append("\n"); cloudInitCmds.append(nfsShareDirCmd).append("\n"); cloudInitCmds.append(NFS_SERVER_START_CMD).append("\n"); cloudInitCmds.append(NFS_EXPORT_FS_CMD).append("\n"); // Create various directories cloudInitCmds.append("mkdir -p ").append(sinkLogRootDir).append("\n"); cloudInitCmds.append("chown -R ec2-user:ec2-user /home/ec2-user/*").append("\n"); // Setup short variables to save cloud-init script space if (gobblinVersion.isPresent()) { cloudInitCmds.append("vr=").append(gobblinVersion.get()).append("\n"); } cloudInitCmds.append("cgS3=").append(masterS3ConfUri).append("\n"); cloudInitCmds.append("cg=").append(awsConfDir).append("\n"); cloudInitCmds.append("jrS3=").append(masterS3JarsUri).append("\n"); cloudInitCmds.append("jr=").append(masterJarsDir).append("\n"); // Download configurations from S3 final StringBuilder classpath = new StringBuilder(); final List<String> awsConfs = SPLITTER.splitToList(masterS3ConfFiles); for (String awsConf : awsConfs) { cloudInitCmds.append(String.format("wget -P \"${cg}\" \"${cgS3}\"%s", awsConf)).append("\n"); } classpath.append(awsConfDir); // Download jars from S3 // TODO: Eventually limit only custom user jars to pulled from S3, load rest from AMI final List<String> awsJars = SPLITTER.splitToList(masterS3JarsFiles); for (String awsJar : awsJars) { cloudInitCmds.append(String.format("wget -P \"${jr}\" \"${jrS3}\"%s", awsJar)).append("\n"); } classpath.append(":").append(masterJarsDir).append("*"); // TODO: Add cron that brings back master if it dies // Launch Gobblin Cluster Master final StringBuilder launchGobblinClusterMasterCmd = new StringBuilder() .append("java") .append(" -cp ").append(classpath) .append(" -Xmx").append(masterJvmMemory) .append(" ").append(JvmUtils.formatJvmArguments(masterJvmArgs)) .append(" ").append(GobblinAWSClusterManager.class.getName()) .append(" --").append(GobblinClusterConfigurationKeys.APPLICATION_NAME_OPTION_NAME) .append(" ").append(clusterName) .append(" --").append(GobblinAWSConfigurationKeys.APP_WORK_DIR) .append(" ").append(appWorkDir) .append(" 1>").append(sinkLogRootDir) .append(clusterMasterClassName).append(".") .append("master").append(".") .append(CloudInitScriptBuilder.STDOUT) .append(" 2>").append(sinkLogRootDir) .append(clusterMasterClassName).append(".") .append("master").append(".") .append(CloudInitScriptBuilder.STDERR); cloudInitCmds.append(launchGobblinClusterMasterCmd).append("\n"); final String cloudInitScript = cloudInitCmds.toString(); LOGGER.info("Cloud-init script for master node: " + cloudInitScript); return encodeBase64(cloudInitScript); } /*** * This method generates the script that would be executed by cloud-init module in EC2 instance * upon boot up for {@link GobblinAWSTaskRunner}. * * This will generate cloud init shell script that does the following: * 1. Mount NFS volume (TODO: To be replaced with EFS soon) * 2. Create all prerequisite directories * 3. Download cluster configuration from S3 * 4. Download Gobblin application jars from S3 (TODO: To be replaced via baked in jars in custom Gobblin AMI) * 5. Download Gobblin custom jars from S3 * 6. Launch {@link GobblinAWSTaskRunner} java application * 7. TODO: Add cron that watches the {@link GobblinAWSTaskRunner} application and restarts it if it dies * * @param clusterName Name of the cluster * @param nfsParentDir Directory within which NFS directory should be created and mounted * @param sinkLogRootDir Log sink root directory * @param awsConfDir Directory to save downloaded Gobblin cluster configuration files * @param appWorkDir Gobblin application work directory * @param masterPublicIp IP of Gobblin cluster worker * @param workerS3ConfUri S3 URI to download cluster configuration files from * @param workerS3ConfFiles Comma separated list of configuration files to download from workerS3ConfUri * @param workerS3JarsUri S3 URI to download Gobblin jar files from * @param workerS3JarsFiles Comma separated list of jar files to download from workerS3JarUri * @param workerJarsDir Directory to save downloaded Gobblin jar files * @param workerJvmMemory Xmx memory setting for Gobblin worker java application * @param workerJvmArgs JVM arguments for Gobblin worker application * @param gobblinVersion Optional Gobblin version * @return Cloud-init script to launch {@link GobblinAWSTaskRunner} */ public static String buildClusterWorkerCommand(String clusterName, String nfsParentDir, String sinkLogRootDir, String awsConfDir, String appWorkDir, String masterPublicIp, String workerS3ConfUri, String workerS3ConfFiles, String workerS3JarsUri, String workerS3JarsFiles, String workerJarsDir, String workerJvmMemory, Optional<String> workerJvmArgs, Optional<String> gobblinVersion) { final StringBuilder cloudInitCmds = new StringBuilder().append("#!/bin/bash").append("\n"); final String clusterWorkerClassName = GobblinAWSTaskRunner.class.getSimpleName(); // Connect to NFS server // TODO: Replace with EFS (it went into GA on 6/30/2016) final String nfsDir = nfsParentDir + clusterName; final String nfsMountCmd = String.format("mount -t %s %s:%s %s", NFS_TYPE_4, masterPublicIp, nfsDir, nfsDir); cloudInitCmds.append("mkdir -p ").append(nfsDir).append("\n"); cloudInitCmds.append(nfsMountCmd).append("\n"); // Create various other directories cloudInitCmds.append("mkdir -p ").append(sinkLogRootDir).append("\n"); cloudInitCmds.append("chown -R ec2-user:ec2-user /home/ec2-user/*").append("\n"); // Setup short variables to save cloud-init script space if (gobblinVersion.isPresent()) { cloudInitCmds.append("vr=").append(gobblinVersion.get()).append("\n"); } cloudInitCmds.append("cg0=").append(workerS3ConfUri).append("\n"); cloudInitCmds.append("cg=").append(awsConfDir).append("\n"); cloudInitCmds.append("jr0=").append(workerS3JarsUri).append("\n"); cloudInitCmds.append("jr=").append(workerJarsDir).append("\n"); // Download configurations from S3 final StringBuilder classpath = new StringBuilder(); final List<String> awsConfs = SPLITTER.splitToList(workerS3ConfFiles); for (String awsConf : awsConfs) { cloudInitCmds.append(String.format("wget -P \"${cg}\" \"${cg0}\"%s", awsConf)).append("\n"); } classpath.append(awsConfDir); // Download jars from S3 // TODO: Limit only custom user jars to pulled from S3, load rest from AMI final List<String> awsJars = SPLITTER.splitToList(workerS3JarsFiles); for (String awsJar : awsJars) { cloudInitCmds.append(String.format("wget -P \"${jr}\" \"${jr0}\"%s", awsJar)).append("\n"); } classpath.append(":").append(workerJarsDir).append("*"); // Get a random Helix instance name cloudInitCmds.append("pi=`curl http://169.254.169.254/latest/meta-data/local-ipv4`").append("\n"); // TODO: Add cron that brings back worker if it dies // Launch Gobblin Worker final StringBuilder launchGobblinClusterWorkerCmd = new StringBuilder() .append("java") .append(" -cp ").append(classpath) .append(" -Xmx").append(workerJvmMemory) .append(" ").append(JvmUtils.formatJvmArguments(workerJvmArgs)) .append(" ").append(GobblinAWSTaskRunner.class.getName()) .append(" --").append(GobblinClusterConfigurationKeys.APPLICATION_NAME_OPTION_NAME) .append(" ").append(clusterName) .append(" --").append(GobblinClusterConfigurationKeys.HELIX_INSTANCE_NAME_OPTION_NAME) .append(" ").append("$pi") .append(" --").append(GobblinAWSConfigurationKeys.APP_WORK_DIR) .append(" ").append(appWorkDir) .append(" 1>").append(sinkLogRootDir) .append(clusterWorkerClassName).append(".") .append("$pi").append(".") .append(CloudInitScriptBuilder.STDOUT) .append(" 2>").append(sinkLogRootDir) .append(clusterWorkerClassName).append(".") .append("$pi").append(".") .append(CloudInitScriptBuilder.STDERR); cloudInitCmds.append(launchGobblinClusterWorkerCmd); final String cloudInitScript = cloudInitCmds.toString(); LOGGER.info("Cloud-init script for worker node: " + cloudInitScript); return encodeBase64(cloudInitScript); } }