/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.yarn; import alluxio.Configuration; import alluxio.Constants; import alluxio.PropertyKey; import alluxio.util.FormatUtils; import alluxio.util.io.PathUtils; import alluxio.util.network.NetworkAddressUtils; import alluxio.yarn.YarnUtils.YarnContainerType; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.Options; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest; import org.apache.hadoop.yarn.client.api.NMClient; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync; import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync.CallbackHandler; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.util.Records; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.HttpURLConnection; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import javax.annotation.concurrent.NotThreadSafe; import javax.ws.rs.HttpMethod; /** * Actual owner of Alluxio running on Yarn. The YARN ResourceManager will launch this * ApplicationMaster on an allocated container. The ApplicationMaster communicates with the YARN * cluster, and handles application execution. It performs operations asynchronously. */ @NotThreadSafe public final class ApplicationMaster implements AMRMClientAsync.CallbackHandler { private static final Logger LOG = LoggerFactory.getLogger(ApplicationMaster.class); /** * Resources needed by the master and worker containers. Yarn will copy these to the container * before running the container's command. */ private static final List<String> LOCAL_RESOURCE_NAMES = Lists.newArrayList(YarnUtils.ALLUXIO_TARBALL, YarnUtils.ALLUXIO_SETUP_SCRIPT); /* Parameters sent from Client. */ private final int mMasterCpu; private final int mWorkerCpu; private final int mMasterMemInMB; private final int mWorkerMemInMB; private final int mRamdiskMemInMB; private final int mNumWorkers; private final String mMasterAddress; private final int mMaxWorkersPerHost; private final String mResourcePath; private final YarnConfiguration mYarnConf = new YarnConfiguration(); /** The count starts at 1, then becomes 0 when the application is done. */ private final CountDownLatch mApplicationDoneLatch; /** Client to talk to Resource Manager. */ private final AMRMClientAsync<ContainerRequest> mRMClient; /** Client to talk to Node Manager. */ private final NMClient mNMClient; /** Client Resource Manager Service. */ private final YarnClient mYarnClient; /** Network address of the container allocated for Alluxio master. */ private String mMasterContainerNetAddress; private volatile ContainerAllocator mContainerAllocator; /** * A factory which creates an AMRMClientAsync with a heartbeat interval and callback handler. */ public interface AMRMClientAsyncFactory { /** * @param heartbeatMs the interval at which to send heartbeats to the resource manager * @param handler a handler for callbacks from the resource manager * @return a client for making requests to the resource manager */ AMRMClientAsync<ContainerRequest> createAMRMClientAsync(int heartbeatMs, CallbackHandler handler); } /** Security tokens for HDFS. */ private ByteBuffer mAllTokens; /** * Convenience constructor which uses the default Alluxio configuration. * * @param numWorkers the number of workers to launch * @param masterAddress the address at which to start the Alluxio master * @param resourcePath an hdfs path shared by all yarn nodes which can be used to share resources */ public ApplicationMaster(int numWorkers, String masterAddress, String resourcePath) { this(numWorkers, masterAddress, resourcePath, YarnClient.createYarnClient(), NMClient.createNMClient(), new AMRMClientAsyncFactory() { @Override public AMRMClientAsync<ContainerRequest> createAMRMClientAsync(int heartbeatMs, CallbackHandler handler) { return AMRMClientAsync.createAMRMClientAsync(heartbeatMs, handler); } }); } /** * Constructs an {@link ApplicationMaster}. * * Clients will be initialized and started during the {@link #start()} method. * * @param numWorkers the number of workers to launch * @param masterAddress the address at which to start the Alluxio master * @param resourcePath an hdfs path shared by all yarn nodes which can be used to share resources * @param yarnClient the client to use for communicating with Yarn * @param nMClient the client to use for communicating with the node manager * @param amrmFactory a factory for creating an {@link AMRMClientAsync} */ public ApplicationMaster(int numWorkers, String masterAddress, String resourcePath, YarnClient yarnClient, NMClient nMClient, AMRMClientAsyncFactory amrmFactory) { mMasterCpu = Configuration.getInt(PropertyKey.INTEGRATION_MASTER_RESOURCE_CPU); mMasterMemInMB = (int) (Configuration.getBytes(PropertyKey.INTEGRATION_MASTER_RESOURCE_MEM) / Constants.MB); mWorkerCpu = Configuration.getInt(PropertyKey.INTEGRATION_WORKER_RESOURCE_CPU); // TODO(binfan): request worker container and ramdisk container separately // memory for running worker mWorkerMemInMB = (int) (Configuration.getBytes(PropertyKey.INTEGRATION_WORKER_RESOURCE_MEM) / Constants.MB); // memory for running ramdisk mRamdiskMemInMB = (int) (Configuration.getBytes(PropertyKey.WORKER_MEMORY_SIZE) / Constants.MB); mMaxWorkersPerHost = Configuration.getInt(PropertyKey.INTEGRATION_YARN_WORKERS_PER_HOST_MAX); mNumWorkers = numWorkers; mMasterAddress = masterAddress; mResourcePath = resourcePath; mApplicationDoneLatch = new CountDownLatch(1); mYarnClient = yarnClient; mNMClient = nMClient; // Heartbeat to the resource manager every 500ms. mRMClient = amrmFactory.createAMRMClientAsync(500, this); } /** * @param args Command line arguments to launch application master */ public static void main(String[] args) { Options options = new Options(); options.addOption("num_workers", true, "Number of Alluxio workers to launch. Default 1"); options.addOption("master_address", true, "(Required) Address to run Alluxio master"); options.addOption("resource_path", true, "(Required) HDFS path containing the Application Master"); try { LOG.info("Starting Application Master with args {}", Arrays.toString(args)); final CommandLine cliParser = new GnuParser().parse(options, args); YarnConfiguration conf = new YarnConfiguration(); UserGroupInformation.setConfiguration(conf); if (UserGroupInformation.isSecurityEnabled()) { String user = System.getenv("ALLUXIO_USER"); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user); for (Token token : UserGroupInformation.getCurrentUser().getTokens()) { ugi.addToken(token); } LOG.info("UserGroupInformation: " + ugi); ugi.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { runApplicationMaster(cliParser); return null; } }); } else { runApplicationMaster(cliParser); } } catch (Exception e) { LOG.error("Error running Application Master", e); System.exit(1); } } /** * Run the application master. * * @param cliParser client arguments parser */ private static void runApplicationMaster(final CommandLine cliParser) throws Exception { int numWorkers = Integer.parseInt(cliParser.getOptionValue("num_workers", "1")); String masterAddress = cliParser.getOptionValue("master_address"); String resourcePath = cliParser.getOptionValue("resource_path"); ApplicationMaster applicationMaster = new ApplicationMaster(numWorkers, masterAddress, resourcePath); applicationMaster.start(); applicationMaster.requestAndLaunchContainers(); applicationMaster.waitForShutdown(); applicationMaster.stop(); } @Override public void onContainersAllocated(List<Container> containers) { for (Container container : containers) { mContainerAllocator.allocateContainer(container); } } @Override public void onContainersCompleted(List<ContainerStatus> statuses) { for (ContainerStatus status : statuses) { // Releasing worker containers because we already have workers on their host will generate a // callback to this method, so we use debug instead of error. if (status.getExitStatus() == ContainerExitStatus.ABORTED) { LOG.debug("Aborted container {}", status.getContainerId()); } else { LOG.error("Container {} completed with exit status {}", status.getContainerId(), status.getExitStatus()); } } } @Override public void onNodesUpdated(List<NodeReport> updated) {} @Override public void onShutdownRequest() { mApplicationDoneLatch.countDown(); } @Override public void onError(Throwable t) { LOG.error("Error reported by resource manager", t); } @Override public float getProgress() { return 0; } /** * Starts the application master. */ public void start() throws IOException, YarnException { if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer credentialsBuffer = new DataOutputBuffer(); credentials.writeTokenStorageToStream(credentialsBuffer); // Now remove the AM -> RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } mAllTokens = ByteBuffer.wrap(credentialsBuffer.getData(), 0, credentialsBuffer.getLength()); } mNMClient.init(mYarnConf); mNMClient.start(); mRMClient.init(mYarnConf); mRMClient.start(); mYarnClient.init(mYarnConf); mYarnClient.start(); // Register with ResourceManager String hostname = NetworkAddressUtils.getLocalHostName(); mRMClient.registerApplicationMaster(hostname, 0 /* port */, "" /* tracking url */); LOG.info("ApplicationMaster registered"); } /** * Submits requests for containers until the master and all workers are launched. */ public void requestAndLaunchContainers() throws Exception { if (masterExists()) { InetAddress address = InetAddress.getByName(mMasterAddress); mMasterContainerNetAddress = address.getHostAddress(); LOG.info("Found master already running on " + mMasterAddress); } else { LOG.info("Configuring master container request."); Resource masterResource = Records.newRecord(Resource.class); masterResource.setMemory(mMasterMemInMB); masterResource.setVirtualCores(mMasterCpu); mContainerAllocator = new ContainerAllocator("master", 1, 1, masterResource, mYarnClient, mRMClient, mMasterAddress); List<Container> masterContainers = mContainerAllocator.allocateContainers(); launchMasterContainer(Iterables.getOnlyElement(masterContainers)); } Resource workerResource = Records.newRecord(Resource.class); workerResource.setMemory(mWorkerMemInMB + mRamdiskMemInMB); workerResource.setVirtualCores(mWorkerCpu); mContainerAllocator = new ContainerAllocator("worker", mNumWorkers, mMaxWorkersPerHost, workerResource, mYarnClient, mRMClient); List<Container> workerContainers = mContainerAllocator.allocateContainers(); for (Container container : workerContainers) { launchWorkerContainer(container); } LOG.info("Master and workers are launched"); } /** * @throws InterruptedException if interrupted while awaiting shutdown */ public void waitForShutdown() throws InterruptedException { mApplicationDoneLatch.await(); } /** * Shuts down the application master, unregistering it from Yarn and stopping its clients. */ public void stop() { try { mRMClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", ""); } catch (YarnException e) { LOG.error("Failed to unregister application", e); } catch (IOException e) { LOG.error("Failed to unregister application", e); } mRMClient.stop(); // TODO(andrew): Think about whether we should stop mNMClient here mYarnClient.stop(); } private void launchMasterContainer(Container container) { String command = YarnUtils.buildCommand(YarnContainerType.ALLUXIO_MASTER); try { ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); ctx.setCommands(Lists.newArrayList(command)); ctx.setLocalResources(setupLocalResources(mResourcePath)); ctx.setEnvironment(setupMasterEnvironment()); if (UserGroupInformation.isSecurityEnabled()) { ctx.setTokens(mAllTokens.duplicate()); } LOG.info("Launching container {} for Alluxio master on {} with master command: {}", container.getId(), container.getNodeHttpAddress(), command); mNMClient.startContainer(container, ctx); String containerUri = container.getNodeHttpAddress(); // in the form of 1.2.3.4:8042 mMasterContainerNetAddress = containerUri.split(":")[0]; LOG.info("Master address: {}", mMasterContainerNetAddress); return; } catch (Exception e) { LOG.error("Error launching container {}", container.getId(), e); } } private void launchWorkerContainer(Container container) { String command = YarnUtils.buildCommand(YarnContainerType.ALLUXIO_WORKER); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); ctx.setCommands(Lists.newArrayList(command)); ctx.setLocalResources(setupLocalResources(mResourcePath)); ctx.setEnvironment(setupWorkerEnvironment(mMasterContainerNetAddress, mRamdiskMemInMB)); if (UserGroupInformation.isSecurityEnabled()) { ctx.setTokens(mAllTokens.duplicate()); } try { LOG.info("Launching container {} for Alluxio worker on {} with worker command: {}", container.getId(), container.getNodeHttpAddress(), command); mNMClient.startContainer(container, ctx); } catch (Exception e) { LOG.error("Error launching container {}", container.getId(), e); } } /** * Checks if an Alluxio master node is already running * or not on the master address given. * * @return true if master exists, false otherwise */ private boolean masterExists() { String webPort = Configuration.get(PropertyKey.MASTER_WEB_PORT); try { URL myURL = new URL("http://" + mMasterAddress + ":" + webPort + Constants.REST_API_PREFIX + "/master/version"); LOG.debug("Checking for master at: " + myURL.toString()); HttpURLConnection connection = (HttpURLConnection) myURL.openConnection(); connection.setRequestMethod(HttpMethod.GET); int resCode = connection.getResponseCode(); LOG.debug("Response code from master was: " + Integer.toString(resCode)); connection.disconnect(); return resCode == HttpURLConnection.HTTP_OK; } catch (MalformedURLException e) { LOG.error("Malformed URL in attempt to check if master is running already", e); } catch (IOException e) { LOG.debug("No existing master found", e); } return false; } private static Map<String, LocalResource> setupLocalResources(String resourcePath) { try { Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); for (String resourceName : LOCAL_RESOURCE_NAMES) { localResources.put(resourceName, YarnUtils.createLocalResourceOfFile( new YarnConfiguration(), PathUtils.concatPath(resourcePath, resourceName))); } return localResources; } catch (IOException e) { throw new RuntimeException("Cannot find resource", e); } } private static Map<String, String> setupMasterEnvironment() { return setupCommonEnvironment(); } private static Map<String, String> setupWorkerEnvironment(String masterContainerNetAddress, int ramdiskMemInMB) { Map<String, String> env = setupCommonEnvironment(); env.put("ALLUXIO_MASTER_HOSTNAME", masterContainerNetAddress); env.put("ALLUXIO_WORKER_MEMORY_SIZE", FormatUtils.getSizeFromBytes((long) ramdiskMemInMB * Constants.MB)); if (UserGroupInformation.isSecurityEnabled()) { try { env.put("ALLUXIO_USER", UserGroupInformation.getCurrentUser().getShortUserName()); } catch (IOException e) { LOG.error("Get user name failed", e); } } return env; } private static Map<String, String> setupCommonEnvironment() { // Setup the environment needed for the launch context. Map<String, String> env = new HashMap<String, String>(); env.put("ALLUXIO_HOME", ApplicationConstants.Environment.PWD.$()); env.put("ALLUXIO_RAM_FOLDER", ApplicationConstants.Environment.LOCAL_DIRS.$()); if (UserGroupInformation.isSecurityEnabled()) { try { env.put("ALLUXIO_USER", UserGroupInformation.getCurrentUser().getShortUserName()); } catch (IOException e) { LOG.error("Get user name failed", e); } } return env; } }