/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.yarn.server.nodemanager; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.TimerTask; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class TestNodeHealthService { private static volatile Log LOG = LogFactory .getLog(TestNodeHealthService.class); protected static File testRootDir = new File("target", TestNodeHealthService.class.getName() + "-localDir").getAbsoluteFile(); final static File nodeHealthConfigFile = new File(testRootDir, "modified-mapred-site.xml"); private File nodeHealthscriptFile = new File(testRootDir, "failingscript.sh"); @Before public void setup() { testRootDir.mkdirs(); } @After public void tearDown() throws Exception { if (testRootDir.exists()) { FileContext.getLocalFSFileContext().delete( new Path(testRootDir.getAbsolutePath()), true); } } private Configuration getConfForNodeHealthScript() { Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH, nodeHealthscriptFile.getAbsolutePath()); conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500); conf.setLong( YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000); return conf; } private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable) throws IOException { PrintWriter pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile)); pw.println(scriptStr); pw.flush(); pw.close(); nodeHealthscriptFile.setExecutable(setExecutable); } @Test public void testNodeHealthScriptShouldRun() throws IOException { // Node health script should not start if there is no property called // node health script path. Assert.assertFalse("By default Health script should not have started", NodeHealthScriptRunner.shouldRun(new Configuration())); Configuration conf = getConfForNodeHealthScript(); // Node health script should not start if the node health script does not // exists Assert.assertFalse("Node health script should start", NodeHealthScriptRunner.shouldRun(conf)); // Create script path. conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); writeNodeHealthScriptFile("", false); // Node health script should not start if the node health script is not // executable. Assert.assertFalse("Node health script should start", NodeHealthScriptRunner.shouldRun(conf)); writeNodeHealthScriptFile("", true); Assert.assertTrue("Node health script should start", NodeHealthScriptRunner.shouldRun(conf)); } private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy, String healthReport, long lastHealthReportTime) { healthStatus.setHealthReport(healthReport); healthStatus.setIsNodeHealthy(isHealthy); healthStatus.setLastHealthReportTime(lastHealthReportTime); } @Test public void testNodeHealthScript() throws Exception { RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); NodeHealthStatus healthStatus = factory.newRecordInstance(NodeHealthStatus.class); String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; String normalScript = "echo \"I am all fine\""; String timeOutScript = "sleep 4\n echo\"I am fine\""; Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); writeNodeHealthScriptFile(normalScript, true); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(); nodeHealthChecker.init(conf); NodeHealthScriptRunner nodeHealthScriptRunner = nodeHealthChecker.getNodeHealthScriptRunner(); TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); timerTask.run(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking initial healthy condition"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // write out error file. // Healthy to unhealthy transition writeNodeHealthScriptFile(errorScript, true); // Run timer timerTask.run(); // update health status setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->Unhealthy"); Assert.assertFalse("Node health status reported healthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported healthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Check unhealthy to healthy transitions. writeNodeHealthScriptFile(normalScript, true); timerTask.run(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking UnHealthy--->healthy"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. writeNodeHealthScriptFile(timeOutScript, true); timerTask.run(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->timeout"); Assert.assertFalse("Node health status reported healthy even after timeout", healthStatus.getIsNodeHealthy()); Assert.assertTrue("Node script time out message not propogated", healthStatus.getHealthReport().equals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG + NodeHealthCheckerService.SEPARATOR + nodeHealthChecker.getDiskHandler().getDisksHealthReport())); } }