/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.state.cluster; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; import org.apache.ambari.server.AmbariException; import org.apache.ambari.server.H2DatabaseCleaner; import org.apache.ambari.server.ServiceComponentNotFoundException; import org.apache.ambari.server.ServiceNotFoundException; import org.apache.ambari.server.events.listeners.upgrade.HostVersionOutOfSyncListener; import org.apache.ambari.server.orm.GuiceJpaInitializer; import org.apache.ambari.server.orm.InMemoryDefaultTestModule; import org.apache.ambari.server.orm.OrmTestHelper; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Clusters; import org.apache.ambari.server.state.Host; import org.apache.ambari.server.state.RepositoryVersionState; import org.apache.ambari.server.state.Service; import org.apache.ambari.server.state.ServiceComponent; import org.apache.ambari.server.state.ServiceComponentFactory; import org.apache.ambari.server.state.ServiceComponentHost; import org.apache.ambari.server.state.ServiceComponentHostFactory; import org.apache.ambari.server.state.ServiceFactory; import org.apache.ambari.server.state.StackId; import org.apache.ambari.server.state.State; import org.easymock.EasyMock; import org.junit.After; import org.junit.Before; import org.junit.Test; import com.google.inject.Binder; import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import com.google.inject.Module; import com.google.inject.Provider; import com.google.inject.util.Modules; import junit.framework.Assert; /** * Tests AMBARI-9738 which produced a deadlock during read and writes between * {@link ClustersImpl} and {@link ClusterImpl}. */ public class ClustersDeadlockTest { private static final String CLUSTER_NAME = "c1"; private static final int NUMBER_OF_HOSTS = 100; private static final int NUMBER_OF_THREADS = 3; private final AtomicInteger hostNameCounter = new AtomicInteger(0); private CountDownLatch writerStoppedSignal; private CountDownLatch readerStoppedSignal; private final StackId stackId = new StackId("HDP-0.1"); @Inject private Injector injector; @Inject private Clusters clusters; @Inject private ServiceFactory serviceFactory; @Inject private ServiceComponentFactory serviceComponentFactory; @Inject private ServiceComponentHostFactory serviceComponentHostFactory; @Inject private OrmTestHelper helper; private Cluster cluster; @Before public void setup() throws Exception { injector = Guice.createInjector(Modules.override( new InMemoryDefaultTestModule()).with(new MockModule())); injector.getInstance(GuiceJpaInitializer.class); injector.injectMembers(this); StackId stackId = new StackId("HDP-0.1"); clusters.addCluster(CLUSTER_NAME, stackId); cluster = clusters.getCluster(CLUSTER_NAME); helper.getOrCreateRepositoryVersion(stackId, stackId.getStackVersion()); cluster.createClusterVersion(stackId, stackId.getStackVersion(), "admin", RepositoryVersionState.INSTALLING); // install HDFS installService("HDFS"); writerStoppedSignal = new CountDownLatch(NUMBER_OF_THREADS); readerStoppedSignal = new CountDownLatch(NUMBER_OF_THREADS); } @After public void teardown() throws AmbariException, SQLException { H2DatabaseCleaner.clearDatabaseAndStopPersistenceService(injector); } /** * Launches reader and writer threads simultaneously to check for a deadlock. * The numbers of launched reader and writer threads are equal to * the {@code}numberOfThreads{@code}. This method expects that reader * and writer threads are using {@code}readerStoppedSignal{@code} * and {@code}writerStoppedSignal{@code} correctly. * * Reader threads should be stopped after writer threads are finished. */ private void doLoadTest(Provider<? extends Thread> readerProvider, Provider<? extends Thread> writerProvider, final int numberOfThreads, CountDownLatch writerStoppedSignal, CountDownLatch readerStoppedSignal) throws Exception { List<Thread> writerThreads = new ArrayList<>(); for (int i = 0; i < numberOfThreads; i++) { Thread readerThread = readerProvider.get(); Thread writerThread = writerProvider.get(); writerThreads.add(writerThread); readerThread.start(); writerThread.start(); } for (Thread writerThread : writerThreads) { writerThread.join(); // Notify that one writer thread is stopped writerStoppedSignal.countDown(); } // All writer threads are stopped. Reader threads should finish now. // Await for all reader threads to stop readerStoppedSignal.await(); } /** * Tests that no deadlock exists when adding hosts while reading from the * cluster. * * @throws Exception */ @Test(timeout = 40000) public void testDeadlockWhileMappingHosts() throws Exception { Provider<ClustersHostMapperThread> clustersHostMapperThreadFactory = new Provider<ClustersHostMapperThread>() { @Override public ClustersHostMapperThread get() { return new ClustersHostMapperThread(); } }; doLoadTest(new ClusterReaderThreadFactory(), clustersHostMapperThreadFactory, NUMBER_OF_THREADS, writerStoppedSignal, readerStoppedSignal); Assert.assertEquals(NUMBER_OF_THREADS * NUMBER_OF_HOSTS, clusters.getHostsForCluster(CLUSTER_NAME).size()); } /** * Tests that no deadlock exists when adding hosts while reading from the * cluster. This test ensures that there are service components installed on * the hosts so that the cluster health report does some more work. * * @throws Exception */ @Test(timeout = 40000) public void testDeadlockWhileMappingHostsWithExistingServices() throws Exception { Provider<ClustersHostAndComponentMapperThread> clustersHostAndComponentMapperThreadFactory = new Provider<ClustersHostAndComponentMapperThread>() { @Override public ClustersHostAndComponentMapperThread get() { return new ClustersHostAndComponentMapperThread(); } }; doLoadTest(new ClusterReaderThreadFactory(), clustersHostAndComponentMapperThreadFactory, NUMBER_OF_THREADS, writerStoppedSignal, readerStoppedSignal); } /** * Tests that no deadlock exists when adding hosts while reading from the * cluster. * * @throws Exception */ @Test(timeout = 40000) public void testDeadlockWhileUnmappingHosts() throws Exception { Provider<ClustersHostUnMapperThread> clustersHostUnMapperThreadFactory = new Provider<ClustersHostUnMapperThread>() { @Override public ClustersHostUnMapperThread get() { return new ClustersHostUnMapperThread(); } }; doLoadTest(new ClusterReaderThreadFactory(), clustersHostUnMapperThreadFactory, NUMBER_OF_THREADS, writerStoppedSignal, readerStoppedSignal); Assert.assertEquals(0, clusters.getHostsForCluster(CLUSTER_NAME).size()); } private final class ClusterReaderThreadFactory implements Provider<ClusterReaderThread> { @Override public ClusterReaderThread get() { return new ClusterReaderThread(); } } /** * The {@link ClusterReaderThread} reads from a cluster over and over again * with a slight pause. */ private final class ClusterReaderThread extends Thread { /** * {@inheritDoc} */ @Override public void run() { try { // Repeat until writer threads exist while (true) { if (writerStoppedSignal.getCount() == 0) { break; } cluster.convertToResponse(); Thread.sleep(10); } } catch (Exception exception) { throw new RuntimeException(exception); } finally { // Notify that one reader was stopped readerStoppedSignal.countDown(); } } } /** * The {@link ClustersHostMapperThread} is used to map hosts to a cluster over * and over. */ private final class ClustersHostMapperThread extends Thread { /** * {@inheritDoc} */ @Override public void run() { try { for (int i = 0; i < NUMBER_OF_HOSTS; i++) { String hostName = "c64-" + hostNameCounter.getAndIncrement(); clusters.addHost(hostName); setOsFamily(clusters.getHost(hostName), "redhat", "6.4"); clusters.mapHostToCluster(hostName, CLUSTER_NAME); Thread.sleep(10); } } catch (Exception exception) { throw new RuntimeException(exception); } } } /** * The {@link ClustersHostAndComponentMapperThread} is used to map hosts to a * cluster over and over. This will also add components to the hosts that are * being mapped to further exercise the cluster health report concurrency. */ private final class ClustersHostAndComponentMapperThread extends Thread { /** * {@inheritDoc} */ @Override public void run() { try { for (int i = 0; i < NUMBER_OF_HOSTS; i++) { String hostName = "c64-" + hostNameCounter.getAndIncrement(); clusters.addHost(hostName); setOsFamily(clusters.getHost(hostName), "redhat", "6.4"); clusters.mapHostToCluster(hostName, CLUSTER_NAME); // create DATANODE on this host so that we end up exercising the // cluster health report since we need a service component host createNewServiceComponentHost("HDFS", "DATANODE", hostName); Thread.sleep(10); } } catch (Exception exception) { throw new RuntimeException(exception); } } } /** * The {@link ClustersHostUnMapperThread} is used to unmap hosts to a cluster * over and over. */ private final class ClustersHostUnMapperThread extends Thread { /** * {@inheritDoc} */ @Override public void run() { List<String> hostNames = new ArrayList<>(100); try { // pre-map the hosts for (int i = 0; i < NUMBER_OF_HOSTS; i++) { String hostName = "c64-" + hostNameCounter.getAndIncrement(); hostNames.add(hostName); clusters.addHost(hostName); setOsFamily(clusters.getHost(hostName), "redhat", "6.4"); clusters.mapHostToCluster(hostName, CLUSTER_NAME); } // unmap them all now for (String hostName : hostNames) { clusters.unmapHostFromCluster(hostName, CLUSTER_NAME); Thread.sleep(10); } } catch (Exception exception) { throw new RuntimeException(exception); } } } private void setOsFamily(Host host, String osFamily, String osVersion) { Map<String, String> hostAttributes = new HashMap<>(2); hostAttributes.put("os_family", osFamily); hostAttributes.put("os_release_version", osVersion); host.setHostAttributes(hostAttributes); } private Service installService(String serviceName) throws AmbariException { Service service = null; try { service = cluster.getService(serviceName); } catch (ServiceNotFoundException e) { service = serviceFactory.createNew(cluster, serviceName); cluster.addService(service); } return service; } private ServiceComponent addServiceComponent(Service service, String componentName) throws AmbariException { ServiceComponent serviceComponent = null; try { serviceComponent = service.getServiceComponent(componentName); } catch (ServiceComponentNotFoundException e) { serviceComponent = serviceComponentFactory.createNew(service, componentName); service.addServiceComponent(serviceComponent); serviceComponent.setDesiredState(State.INSTALLED); } return serviceComponent; } private ServiceComponentHost createNewServiceComponentHost(String svc, String svcComponent, String hostName) throws AmbariException { Assert.assertNotNull(cluster.getConfigGroups()); Service s = installService(svc); ServiceComponent sc = addServiceComponent(s, svcComponent); ServiceComponentHost sch = serviceComponentHostFactory.createNew(sc, hostName); sc.addServiceComponentHost(sch); sch.setDesiredState(State.INSTALLED); sch.setState(State.INSTALLED); sch.setDesiredStackVersion(stackId); sch.setStackVersion(stackId); return sch; } /** * */ private class MockModule implements Module { /** * */ @Override public void configure(Binder binder) { // this listener gets in the way of actually testing the concurrency // between the threads; it slows them down too much, so mock it out binder.bind(HostVersionOutOfSyncListener.class).toInstance( EasyMock.createNiceMock(HostVersionOutOfSyncListener.class)); } } }