/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package oz.hadoop.yarn.test.cluster;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
/**
* COPIED FROM APACHE with minor modification mainly to allow bootstrapping from
* provided configuration file.
*
* Embedded Yarn minicluster for testcases that need to interact with a cluster.
* <p/>
* In a real cluster, resource request matching is done using the hostname, and
* by default Yarn minicluster works in the exact same way as a real cluster.
* <p/>
* If a testcase needs to use multiple nodes and exercise resource request
* matching to a specific node, then the property
* {@YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME
*
* } should be set
* <code>true</code> in the configuration used to initialize the minicluster.
* <p/>
* With this property set to <code>true</code>, the matching will be done using
* the <code>hostname:port</code> of the namenodes. In such case, the AM must do
* resource request using <code>hostname:port</code> as the location.
*/
public class MiniYarnCluster extends CompositeService {
private static final Log logger = LogFactory.getLog(MiniYarnCluster.class);
// temp fix until metrics system can auto-detect itself running in unit
// test:
static {
DefaultMetricsSystem.setMiniClusterMode(true);
}
private final ExecutorService serviceStartExecutor;
private final ScheduledExecutorService serviceStartMonitoringExecutor;
private final ResourceManager resourceManager;
private final NodeManager[] nodeManagers;
private final ConcurrentMap<ApplicationAttemptId, Long> appMasters;
// Number of nm-local-dirs per nodemanager
private final int numLocalDirs;
// Number of nm-log-dirs per nodemanager
private final int numLogDirs;
private File testWorkDir;
private String[] rmIds;
/**
*
* @param clusterName
* @param numNodeManagers
* @param numLocalDirs
* @param numLogDirs
*/
public MiniYarnCluster(String clusterName, int numNodeManagers) {
super(clusterName.replace("$", ""));
this.resourceManager = new UnsecureResourceManager();
this.serviceStartExecutor = Executors.newCachedThreadPool();
this.serviceStartMonitoringExecutor = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()/2);
this.appMasters = new ConcurrentHashMap<ApplicationAttemptId, Long>(16, 0.75f, 2);
this.numLocalDirs = 1;
this.numLogDirs = 1;
this.prepareScriptExecutionEnv(clusterName);
nodeManagers = new NodeManager[numNodeManagers];
}
/**
*
*/
@Override
public void serviceInit(Configuration conf) throws Exception {
conf.setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true);
this.addService(new ResourceManagerWrapper(0));
for (int index = 0; index < this.nodeManagers.length; index++) {
this.nodeManagers[index] = new ShortCircuitedNodeManager();
this.addService(new NodeManagerWrapper(index));
}
super.serviceInit(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
}
/**
*
* @return
*/
public File getTestWorkDir() {
return this.testWorkDir;
}
/**
*
*/
@Override
public void stop() {
// TODO add await termination logic
this.serviceStartExecutor.shutdown();
this.serviceStartMonitoringExecutor.shutdown();
}
/**
*
* @return
*/
protected ResourceManager getResourceManager() {
return resourceManager;
}
protected NodeManager getNodeManager(int index) {
return this.nodeManagers[index];
}
/**
*
* @param service
* @throws Exception
*/
private void startService(final CompositeService service) {
this.serviceStartExecutor.execute(new Runnable() {
@Override
public void run() {
service.start();
}
});
CountDownLatch completionLatch = new CountDownLatch(1);
ServiceStartMonitor serviceStartMonitor = new ServiceStartMonitor(service, 1000, 60, completionLatch);
this.serviceStartMonitoringExecutor.execute(serviceStartMonitor);
try {
completionLatch.await();
if (service.getServiceState() != STATE.STARTED) {
throw new IllegalStateException("Service " + service + " failed to start");
}
super.serviceStart();
}
catch (InterruptedException e) {
logger.warn("Thread monitoring service startup for service " + service + " was interrupted", e);
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
}
catch (Exception e) {
throw new IllegalStateException("Service " + service + " failed to start with exception", e);
}
}
/**
*
* @param clusterName
*/
private void prepareScriptExecutionEnv(String clusterName) {
String testSubDir = clusterName.replace("$", "");
File targetWorkDir = new File("target", testSubDir);
try {
FileContext.getLocalFSFileContext().delete(new Path(targetWorkDir.getAbsolutePath()), true);
}
catch (Exception e) {
logger.warn("COULD NOT CLEANUP", e);
throw new YarnRuntimeException("could not cleanup test dir: " + e, e);
}
if (Shell.WINDOWS) {
// The test working directory can exceed the maximum path length
// supported
// by some Windows APIs and cmd.exe (260 characters). To work around
// this,
// create a symlink in temporary storage with a much shorter path,
// targeting the full path to the test working directory. Then, use
// the
// symlink as the test working directory.
String targetPath = targetWorkDir.getAbsolutePath();
File link = new File(System.getProperty("java.io.tmpdir"), String.valueOf(System.currentTimeMillis()));
String linkPath = link.getAbsolutePath();
try {
FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
}
catch (IOException e) {
throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
}
// Guarantee target exists before creating symlink.
targetWorkDir.mkdirs();
ShellCommandExecutor shexec = new ShellCommandExecutor(
Shell.getSymlinkCommand(targetPath, linkPath));
try {
shexec.execute();
}
catch (IOException e) {
throw new YarnRuntimeException(String.format("failed to create symlink from %s to %s, shell output: %s",
linkPath, targetPath, shexec.getOutput()), e);
}
this.testWorkDir = link;
}
else {
this.testWorkDir = targetWorkDir;
}
}
/**
*
*/
private class UnsecureResourceManager extends ResourceManager {
@Override
protected void doSecureLogin() throws IOException {
// Don't try to login using keytab in the testcases.
}
}
/**
*
*/
private class ResourceManagerWrapper extends AbstractService {
private final int index;
public ResourceManagerWrapper(int i) {
super(ResourceManagerWrapper.class.getName() + "_" + i);
this.index = i;
}
@Override
protected synchronized void serviceInit(Configuration conf) throws Exception {
initResourceManager(this.index, conf);
super.serviceInit(conf);
}
@Override
protected synchronized void serviceStart() throws Exception {
MiniYarnCluster.this.startService(MiniYarnCluster.this.resourceManager);
logger.info("MiniYARN ResourceManager address: " + getConfig().get(YarnConfiguration.RM_ADDRESS));
logger.info("MiniYARN ResourceManager web address: " + WebAppUtils.getRMWebAppURLWithoutScheme(getConfig()));
}
private void waitForAppMastersToFinish(long timeoutMillis) throws InterruptedException {
long started = System.currentTimeMillis();
synchronized (MiniYarnCluster.this.appMasters) {
while (!MiniYarnCluster.this.appMasters.isEmpty() && System.currentTimeMillis() - started < timeoutMillis) {
MiniYarnCluster.this.appMasters.wait(1000);
}
}
if (!MiniYarnCluster.this.appMasters.isEmpty()) {
logger.warn("Stopping RM while some app masters are still alive");
}
}
@Override
protected synchronized void serviceStop() throws Exception {
if (MiniYarnCluster.this.resourceManager != null) {
this.waitForAppMastersToFinish(5000);
MiniYarnCluster.this.resourceManager.stop();
}
if (Shell.WINDOWS) {
// On Windows, clean up the short temporary symlink that was
// created to
// work around path length limitation.
String testWorkDirPath = testWorkDir.getAbsolutePath();
try {
FileContext.getLocalFSFileContext().delete(
new Path(testWorkDirPath), true);
} catch (IOException e) {
logger.warn("could not cleanup symlink: "
+ testWorkDir.getAbsolutePath());
}
}
super.serviceStop();
}
/**
*
* @param index
* @param conf
*/
private synchronized void initResourceManager(int index, Configuration conf) {
if (HAUtil.isHAEnabled(conf)) {
conf.set(YarnConfiguration.RM_HA_ID, MiniYarnCluster.this.rmIds[index]);
}
MiniYarnCluster.this.resourceManager.init(conf);
MiniYarnCluster.this.resourceManager.getRMContext().getDispatcher().register(RMAppAttemptEventType.class,
new EventHandler<RMAppAttemptEvent>() {
@Override
public void handle(RMAppAttemptEvent event) {
if (event instanceof RMAppAttemptRegistrationEvent) {
MiniYarnCluster.this.appMasters.put(event.getApplicationAttemptId(), event.getTimestamp());
}
else if (event instanceof RMAppAttemptUnregistrationEvent) {
MiniYarnCluster.this.appMasters.remove(event.getApplicationAttemptId());
}
}
});
}
}
/**
*
*/
private class NodeManagerWrapper extends AbstractService {
int index = 0;
public NodeManagerWrapper(int i) {
super(NodeManagerWrapper.class.getName() + "_" + i);
index = i;
}
@Override
protected synchronized void serviceInit(Configuration conf) throws Exception {
Configuration config = new YarnConfiguration(conf);
// create nm-local-dirs and configure them for the nodemanager
String localDirsString = prepareDirs("local", numLocalDirs);
config.set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString);
// create nm-log-dirs and configure them for the nodemanager
String logDirsString = prepareDirs("log", numLogDirs);
config.set(YarnConfiguration.NM_LOG_DIRS, logDirsString);
File remoteLogDir = new File(MiniYarnCluster.this.testWorkDir,
MiniYarnCluster.this.getName() + "-remoteLogDir-nm-" + index);
remoteLogDir.mkdir();
if (!remoteLogDir.exists()){
throw new IllegalStateException("Failed to make " + remoteLogDir.getAbsolutePath() + " directory");
}
config.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());
config.setInt(YarnConfiguration.NM_PMEM_MB, 4 * 1024);
config.set(YarnConfiguration.NM_ADDRESS, this.getConfig().get("yarn.resourcemanager.hostname") + ":0");
config.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, this.getConfig().get("yarn.resourcemanager.hostname") + ":0");
WebAppUtils.setNMWebAppHostNameAndPort(config, this.getConfig().get("yarn.resourcemanager.hostname"), 0);
// Disable resource checks by default
if (!config.getBoolean(YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
YarnConfiguration.DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
config.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
config.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
}
logger.info("Starting NM: " + index);
nodeManagers[index].init(config);
super.serviceInit(config);
}
/**
* Create local/log directories
*
* @param dirType
* type of directories i.e. local dirs or log dirs
* @param numDirs
* number of directories
* @return the created directories as a comma delimited String
*/
private String prepareDirs(String dirType, int numDirs) {
File[] dirs = new File[numDirs];
String dirsString = "";
for (int i = 0; i < numDirs; i++) {
dirs[i] = new File(MiniYarnCluster.this.testWorkDir, MiniYarnCluster.this.getName()
+ "-" + dirType + "Dir-nm-" + this.index + "_" + i);
dirs[i].mkdirs();
logger.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath());
String delimiter = (i > 0) ? "," : "";
dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath());
}
return dirsString;
}
@Override
protected synchronized void serviceStart() throws Exception {
MiniYarnCluster.this.startService(MiniYarnCluster.this.nodeManagers[this.index]);
}
@Override
protected synchronized void serviceStop() throws Exception {
if (MiniYarnCluster.this.nodeManagers[this.index] != null) {
MiniYarnCluster.this.nodeManagers[this.index].stop();
}
super.serviceStop();
}
}
/**
*
*/
private class ServiceStartMonitor implements Runnable {
private final int waitIntervalMilis;
private final int attempts;
private final CompositeService service;
private final CountDownLatch completionLatch;
private int attemptCounter;
public ServiceStartMonitor(CompositeService service, int waitIntervalMilis, int attempts, CountDownLatch completionLatch){
this.service = service;
this.attempts = attempts;
this.waitIntervalMilis = waitIntervalMilis;
this.completionLatch = completionLatch;
}
@Override
public void run() {
STATE state = this.service.getServiceState();
if (state == STATE.INITED ){
logger.info("Waiting for Service " + this.service + " to start...");
if (this.attemptCounter < this.attempts){
MiniYarnCluster.this.serviceStartMonitoringExecutor.schedule(this, this.waitIntervalMilis, TimeUnit.MILLISECONDS);
}
this.attemptCounter++;
}
else {
this.completionLatch.countDown();
}
}
}
/**
*
*/
private class ShortCircuitedNodeManager extends NodeManager {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
return new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {
@Override
protected ResourceTracker getRMClient() {
final ResourceTrackerService rt = MiniYarnCluster.this.resourceManager.getResourceTrackerService();
// For in-process communication without RPC
return new ResourceTracker() {
@Override
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnException, IOException {
NodeHeartbeatResponse response;
try {
response = rt.nodeHeartbeat(request);
}
catch (YarnException e) {
logger.info("Exception in heartbeat from node "
+ request.getNodeStatus().getNodeId(), e);
throw e;
}
return response;
}
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
RegisterNodeManagerResponse response;
try {
response = rt.registerNodeManager(request);
}
catch (YarnException e) {
logger.info("Exception in node registration from "
+ request.getNodeId().toString(), e);
throw e;
}
return response;
}
};
}
@Override
protected void stopRMProxy() {
}
};
}
}
}