/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.configuration; import static org.apache.ambari.server.agent.DummyHeartbeatConstants.DATANODE; import static org.apache.ambari.server.agent.DummyHeartbeatConstants.DummyHostname1; import static org.apache.ambari.server.agent.DummyHeartbeatConstants.HDFS; import static org.apache.ambari.server.agent.DummyHeartbeatConstants.NAMENODE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.ambari.server.H2DatabaseCleaner; import org.apache.ambari.server.agent.HeartbeatTestHelper; import org.apache.ambari.server.agent.RecoveryConfig; import org.apache.ambari.server.agent.RecoveryConfigHelper; import org.apache.ambari.server.events.publishers.AmbariEventPublisher; import org.apache.ambari.server.orm.GuiceJpaInitializer; import org.apache.ambari.server.orm.InMemoryDefaultTestModule; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Config; import org.apache.ambari.server.state.MaintenanceState; import org.apache.ambari.server.state.Service; import org.apache.ambari.server.state.State; import org.apache.ambari.server.utils.EventBusSynchronizer; import org.junit.After; import org.junit.Before; import org.junit.Test; import com.google.common.eventbus.EventBus; import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; /** * Test RecoveryConfigHelper class */ public class RecoveryConfigHelperTest { private Injector injector; private InMemoryDefaultTestModule module; @Inject private HeartbeatTestHelper heartbeatTestHelper; @Inject private RecoveryConfigHelper recoveryConfigHelper; @Inject private AmbariEventPublisher eventPublisher; @Before public void setup() throws Exception { module = HeartbeatTestHelper.getTestModule(); injector = Guice.createInjector(module); injector.getInstance(GuiceJpaInitializer.class); injector.injectMembers(this); // Synchronize the publisher (AmbariEventPublisher) and subscriber (RecoveryConfigHelper), // so that the events get handled as soon as they are published, allowing the tests to // verify the methods under test. EventBus synchronizedBus = EventBusSynchronizer.synchronizeAmbariEventPublisher(injector); synchronizedBus.register(recoveryConfigHelper); } @After public void teardown() throws Exception { H2DatabaseCleaner.clearDatabaseAndStopPersistenceService(injector); } /** * Test default cluster-env properties for recovery. */ @Test public void testRecoveryConfigDefaultValues() throws Exception { RecoveryConfig recoveryConfig = recoveryConfigHelper.getDefaultRecoveryConfig(); assertEquals(recoveryConfig.getMaxLifetimeCount(), RecoveryConfigHelper.RECOVERY_LIFETIME_MAX_COUNT_DEFAULT); assertEquals(recoveryConfig.getMaxCount(), RecoveryConfigHelper.RECOVERY_MAX_COUNT_DEFAULT); assertEquals(recoveryConfig.getRetryGap(), RecoveryConfigHelper.RECOVERY_RETRY_GAP_DEFAULT); assertEquals(recoveryConfig.getWindowInMinutes(), RecoveryConfigHelper.RECOVERY_WINDOW_IN_MIN_DEFAULT); assertEquals(recoveryConfig.getType(), RecoveryConfigHelper.RECOVERY_TYPE_DEFAULT); assertNull(recoveryConfig.getEnabledComponents()); } /** * Test cluster-env properties from a dummy cluster * * @throws Exception */ @Test public void testRecoveryConfigValues() throws Exception { String hostname = "hostname1"; Cluster cluster = getDummyCluster(hostname); RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), hostname); assertEquals(recoveryConfig.getMaxLifetimeCount(), "10"); assertEquals(recoveryConfig.getMaxCount(), "4"); assertEquals(recoveryConfig.getRetryGap(), "2"); assertEquals(recoveryConfig.getWindowInMinutes(), "23"); assertEquals(recoveryConfig.getType(), "AUTO_START"); assertNotNull(recoveryConfig.getEnabledComponents()); } /** * Install a component with auto start enabled. Verify that the old config was * invalidated. * * @throws Exception */ @Test public void testServiceComponentInstalled() throws Exception { Cluster cluster = heartbeatTestHelper.getDummyCluster(); Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1); // Get the recovery configuration RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE"); // Install HDFS::NAMENODE to trigger a component installed event hdfs.addServiceComponent(NAMENODE).setRecoveryEnabled(true); hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1); // Verify that the config is stale now boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), DummyHostname1, recoveryConfig.getRecoveryTimestamp()); assertTrue(isConfigStale); // Verify the new config recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE,NAMENODE"); } /** * Uninstall a component and verify that the config is stale. * * @throws Exception */ @Test public void testServiceComponentUninstalled() throws Exception { Cluster cluster = heartbeatTestHelper.getDummyCluster(); Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1); hdfs.addServiceComponent(NAMENODE).setRecoveryEnabled(true); hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1); // Get the recovery configuration RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE,NAMENODE"); // Uninstall HDFS::DATANODE from host1 hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).delete(); // Verify that the config is stale boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), DummyHostname1, recoveryConfig.getRecoveryTimestamp()); assertTrue(isConfigStale); // Verify the new config recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "NAMENODE"); } /** * Disable cluster level auto start and verify that the config is stale. * * @throws Exception */ @Test public void testClusterEnvConfigChanged() throws Exception { Cluster cluster = heartbeatTestHelper.getDummyCluster(); Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1); hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).setDesiredState(State.INSTALLED); // Get the recovery configuration RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE"); // Get cluser-env config and turn off recovery for the cluster Config config = cluster.getDesiredConfigByType("cluster-env"); config.updateProperties(new HashMap<String, String>() {{ put(RecoveryConfigHelper.RECOVERY_ENABLED_KEY, "false"); }}); config.save(); // Recovery config should be stale because of the above change. boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), DummyHostname1, recoveryConfig.getRecoveryTimestamp()); assertTrue(isConfigStale); // Get the recovery configuration again and verify that there are no components to be auto started recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertNull(recoveryConfig.getEnabledComponents()); } /** * Change the maintenance mode of a service component host and verify that * config is stale. * * @throws Exception */ @Test public void testMaintenanceModeChanged() throws Exception { Cluster cluster = heartbeatTestHelper.getDummyCluster(); Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1); hdfs.addServiceComponent(NAMENODE).setRecoveryEnabled(true); hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1); // Get the recovery configuration RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE,NAMENODE"); hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).setMaintenanceState(MaintenanceState.ON); // We need a new config boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), DummyHostname1, recoveryConfig.getRecoveryTimestamp()); assertTrue(isConfigStale); // Only NAMENODE is left recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "NAMENODE"); } /** * Disable recovery on a component and verify that the config is stale. * * @throws Exception */ @Test public void testServiceComponentRecoveryChanged() throws Exception { Cluster cluster = heartbeatTestHelper.getDummyCluster(); Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1); // Get the recovery configuration RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE"); // Turn off auto start for HDFS::DATANODE hdfs.getServiceComponent(DATANODE).setRecoveryEnabled(false); // Config should be stale now boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), DummyHostname1, recoveryConfig.getRecoveryTimestamp()); assertTrue(isConfigStale); // Get the latest config. DATANODE should not be present. recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), DummyHostname1); assertEquals(recoveryConfig.getEnabledComponents(), ""); } /** * Test a cluster with two hosts. The first host gets the configuration during * registration. The second host gets it during it's first heartbeat. * * @throws Exception */ @Test public void testMultiNodeCluster() throws Exception { Set<String> hostNames = new HashSet<String>() {{ add("Host1"); add("Host2"); }}; // Create a cluster with 2 hosts Cluster cluster = getDummyCluster(hostNames); // Add HDFS service with DATANODE component to the cluster Service hdfs = cluster.addService(HDFS); hdfs.addServiceComponent(DATANODE).setRecoveryEnabled(true); // Add SCH to Host1 and Host2 hdfs.getServiceComponent(DATANODE).addServiceComponentHost("Host1"); hdfs.getServiceComponent(DATANODE).addServiceComponentHost("Host2"); // Simulate registration for Host1: Get the recovery configuration right away for Host1. // It makes an entry for cluster name and Host1 in the timestamp dictionary. RecoveryConfig recoveryConfig = recoveryConfigHelper.getRecoveryConfig(cluster.getClusterName(), "Host1"); assertEquals(recoveryConfig.getEnabledComponents(), "DATANODE"); // Simulate heartbeat for Host2: When second host heartbeats, it first checks if config stale. // This should return true since it did not get the configuration during registration. // There is an entry for the cluster name, made by Host1, but no entry for Host2 in the timestamp // dictionary since we skipped registration. Lookup for cluster name will succeed but lookup for Host2 // will return null. boolean isConfigStale = recoveryConfigHelper.isConfigStale(cluster.getClusterName(), "Host2", -1); assertTrue(isConfigStale); } private Cluster getDummyCluster(Set<String> hostNames) throws Exception { Map<String, String> configProperties = new HashMap<String, String>() {{ put(RecoveryConfigHelper.RECOVERY_ENABLED_KEY, "true"); put(RecoveryConfigHelper.RECOVERY_TYPE_KEY, "AUTO_START"); put(RecoveryConfigHelper.RECOVERY_MAX_COUNT_KEY, "4"); put(RecoveryConfigHelper.RECOVERY_LIFETIME_MAX_COUNT_KEY, "10"); put(RecoveryConfigHelper.RECOVERY_WINDOW_IN_MIN_KEY, "23"); put(RecoveryConfigHelper.RECOVERY_RETRY_GAP_KEY, "2"); }}; return heartbeatTestHelper.getDummyCluster("cluster1", "HDP-0.1", configProperties, hostNames); } private Cluster getDummyCluster(final String hostname) throws Exception { Set<String> hostNames = new HashSet<String>(){{ add(hostname); }}; return getDummyCluster(hostNames); } }