/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import junit.framework.Assert;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin;
import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
import org.apache.hadoop.yarn.util.TestProcfsBasedProcessTree;
import org.junit.Before;
import org.junit.Test;
public class TestContainersMonitor extends BaseContainerManagerTest {
public TestContainersMonitor() throws UnsupportedFileSystemException {
super();
}
static {
LOG = LogFactory.getLog(TestContainersMonitor.class);
}
@Before
public void setup() throws IOException {
conf.setClass(
YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
super.setup();
}
/**
* Test to verify the check for whether a process tree is over limit or not.
*
* @throws IOException
* if there was a problem setting up the fake procfs directories or
* files.
*/
@Test
public void testProcessTreeLimits() throws IOException {
// set up a dummy proc file system
File procfsRootDir = new File(localDir, "proc");
String[] pids = { "100", "200", "300", "400", "500", "600", "700" };
try {
TestProcfsBasedProcessTree.setupProcfsRootDir(procfsRootDir);
// create pid dirs.
TestProcfsBasedProcessTree.setupPidDirs(procfsRootDir, pids);
// create process infos.
TestProcfsBasedProcessTree.ProcessStatInfo[] procs =
new TestProcfsBasedProcessTree.ProcessStatInfo[7];
// assume pids 100, 500 are in 1 tree
// 200,300,400 are in another
// 600,700 are in a third
procs[0] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "100", "proc1", "1", "100", "100", "100000" });
procs[1] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "200", "proc2", "1", "200", "200", "200000" });
procs[2] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "300", "proc3", "200", "200", "200", "300000" });
procs[3] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "400", "proc4", "200", "200", "200", "400000" });
procs[4] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "500", "proc5", "100", "100", "100", "1500000" });
procs[5] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "600", "proc6", "1", "600", "600", "100000" });
procs[6] = new TestProcfsBasedProcessTree.ProcessStatInfo(
new String[] { "700", "proc7", "600", "600", "600", "100000" });
// write stat files.
TestProcfsBasedProcessTree.writeStatFiles(procfsRootDir, pids, procs);
// vmem limit
long limit = 700000;
ContainersMonitorImpl test = new ContainersMonitorImpl(null, null, null);
// create process trees
// tree rooted at 100 is over limit immediately, as it is
// twice over the mem limit.
ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree(
"100",
procfsRootDir.getAbsolutePath());
pTree.updateProcessTree();
assertTrue("tree rooted at 100 should be over limit " +
"after first iteration.",
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// the tree rooted at 200 is initially below limit.
pTree = new ProcfsBasedProcessTree("200",
procfsRootDir.getAbsolutePath());
pTree.updateProcessTree();
assertFalse("tree rooted at 200 shouldn't be over limit " +
"after one iteration.",
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// second iteration - now the tree has been over limit twice,
// hence it should be declared over limit.
pTree.updateProcessTree();
assertTrue(
"tree rooted at 200 should be over limit after 2 iterations",
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// the tree rooted at 600 is never over limit.
pTree = new ProcfsBasedProcessTree("600",
procfsRootDir.getAbsolutePath());
pTree.updateProcessTree();
assertFalse("tree rooted at 600 should never be over limit.",
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// another iteration does not make any difference.
pTree.updateProcessTree();
assertFalse("tree rooted at 600 should never be over limit.",
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
} finally {
FileUtil.fullyDelete(procfsRootDir);
}
}
@Test
public void testContainerKillOnMemoryOverflow() throws IOException,
InterruptedException, YarnException {
if (!ProcfsBasedProcessTree.isAvailable()) {
return;
}
containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile =
new File(tmpDir, "start_file.txt").getAbsoluteFile();
fileWriter.write("\numask 0"); // So that start file is readable by the
// test.
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nsleep 15");
fileWriter.close();
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ApplicationId appId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cId = ContainerId.newInstance(appAttemptId, 0);
int port = 12345;
URL resource_alpha =
ConverterUtils.getYarnUrlFromPath(localFS
.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha =
recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources =
new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
containerLaunchContext.setCommands(commands);
Resource r = BuilderUtils.newResource(8 * 1024 * 1024, 1);
ContainerTokenIdentifier containerIdentifier =
new ContainerTokenIdentifier(cId, context.getNodeId().toString(), user,
r, System.currentTimeMillis() + 120000, 123, DUMMY_RM_IDENTIFIER);
Token containerToken =
BuilderUtils.newContainerToken(context.getNodeId(),
containerManager.getContext().getContainerTokenSecretManager()
.createPassword(containerIdentifier), containerIdentifier);
StartContainerRequest scRequest =
StartContainerRequest.newInstance(containerLaunchContext,
containerToken);
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests =
StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!",
processStartFile.exists());
// Now verify the contents of the file
BufferedReader reader =
new BufferedReader(new FileReader(processStartFile));
Assert.assertEquals("Hello World!", reader.readLine());
// Get the pid of the process
String pid = reader.readLine().trim();
// No more lines
Assert.assertEquals(null, reader.readLine());
BaseContainerManagerTest.waitForContainerState(containerManager, cId,
ContainerState.COMPLETE, 60);
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
GetContainerStatusesRequest gcsRequest =
GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus =
containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
Assert.assertEquals(ExitCode.TERMINATED.getExitCode(),
containerStatus.getExitStatus());
String expectedMsgPattern =
"Container \\[pid=" + pid + ",containerID=" + cId
+ "\\] is running beyond virtual memory limits. Current usage: "
+ "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B physical memory used; "
+ "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B virtual memory used. "
+ "Killing container.\nDump of the process-tree for "
+ cId + " :\n";
Pattern pat = Pattern.compile(expectedMsgPattern);
Assert.assertEquals("Expected message pattern is: " + expectedMsgPattern
+ "\n\nObserved message is: " + containerStatus.getDiagnostics(),
true, pat.matcher(containerStatus.getDiagnostics()).find());
// Assert that the process is not alive anymore
Assert.assertFalse("Process is still alive!",
exec.signalContainer(user,
pid, Signal.NULL));
}
@Test(timeout = 20000)
public void testContainerMonitorMemFlags() {
ContainersMonitor cm = null;
long expPmem = 8192 * 1024 * 1024l;
long expVmem = (long) (expPmem * 2.1f);
cm = new ContainersMonitorImpl(mock(ContainerExecutor.class),
mock(AsyncDispatcher.class), mock(Context.class));
cm.init(getConfForCM(false, false, 8192, 2.1f));
assertEquals(expPmem, cm.getPmemAllocatedForContainers());
assertEquals(expVmem, cm.getVmemAllocatedForContainers());
assertEquals(false, cm.isPmemCheckEnabled());
assertEquals(false, cm.isVmemCheckEnabled());
cm = new ContainersMonitorImpl(mock(ContainerExecutor.class),
mock(AsyncDispatcher.class), mock(Context.class));
cm.init(getConfForCM(true, false, 8192, 2.1f));
assertEquals(expPmem, cm.getPmemAllocatedForContainers());
assertEquals(expVmem, cm.getVmemAllocatedForContainers());
assertEquals(true, cm.isPmemCheckEnabled());
assertEquals(false, cm.isVmemCheckEnabled());
cm = new ContainersMonitorImpl(mock(ContainerExecutor.class),
mock(AsyncDispatcher.class), mock(Context.class));
cm.init(getConfForCM(true, true, 8192, 2.1f));
assertEquals(expPmem, cm.getPmemAllocatedForContainers());
assertEquals(expVmem, cm.getVmemAllocatedForContainers());
assertEquals(true, cm.isPmemCheckEnabled());
assertEquals(true, cm.isVmemCheckEnabled());
cm = new ContainersMonitorImpl(mock(ContainerExecutor.class),
mock(AsyncDispatcher.class), mock(Context.class));
cm.init(getConfForCM(false, true, 8192, 2.1f));
assertEquals(expPmem, cm.getPmemAllocatedForContainers());
assertEquals(expVmem, cm.getVmemAllocatedForContainers());
assertEquals(false, cm.isPmemCheckEnabled());
assertEquals(true, cm.isVmemCheckEnabled());
}
private YarnConfiguration getConfForCM(boolean pMemEnabled,
boolean vMemEnabled, int nmPmem, float vMemToPMemRatio) {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.NM_PMEM_MB, nmPmem);
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, pMemEnabled);
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, vMemEnabled);
conf.setFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, vMemToPMemRatio);
return conf;
}
}