/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.server.state.cluster;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.H2DatabaseCleaner;
import org.apache.ambari.server.ServiceComponentNotFoundException;
import org.apache.ambari.server.ServiceNotFoundException;
import org.apache.ambari.server.events.listeners.upgrade.HostVersionOutOfSyncListener;
import org.apache.ambari.server.orm.GuiceJpaInitializer;
import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
import org.apache.ambari.server.orm.OrmTestHelper;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.Config;
import org.apache.ambari.server.state.ConfigFactory;
import org.apache.ambari.server.state.Host;
import org.apache.ambari.server.state.MaintenanceState;
import org.apache.ambari.server.state.RepositoryVersionState;
import org.apache.ambari.server.state.Service;
import org.apache.ambari.server.state.ServiceComponent;
import org.apache.ambari.server.state.ServiceComponentFactory;
import org.apache.ambari.server.state.ServiceComponentHost;
import org.apache.ambari.server.state.ServiceComponentHostFactory;
import org.apache.ambari.server.state.ServiceFactory;
import org.apache.ambari.server.state.StackId;
import org.apache.ambari.server.state.State;
import org.apache.ambari.server.testing.DeadlockWarningThread;
import org.easymock.EasyMock;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import com.google.inject.Binder;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.util.Modules;
/**
* Tests AMBARI-9368 and AMBARI-9761 which produced a deadlock during read and
* writes of some of the impl classes.
*/
public class ClusterDeadlockTest {
private static final int NUMBER_OF_HOSTS = 40;
private static final int NUMBER_OF_THREADS = 5;
private final AtomicInteger hostNameCounter = new AtomicInteger(0);
@Inject
private Injector injector;
@Inject
private Clusters clusters;
@Inject
private ServiceFactory serviceFactory;
@Inject
private ServiceComponentFactory serviceComponentFactory;
@Inject
private ServiceComponentHostFactory serviceComponentHostFactory;
@Inject
private ConfigFactory configFactory;
@Inject
private OrmTestHelper helper;
private StackId stackId = new StackId("HDP-0.1");
/**
* The cluster.
*/
private Cluster cluster;
/**
*
*/
private List<String> hostNames = new ArrayList<>(NUMBER_OF_HOSTS);
/**
* Creates 100 hosts and adds them to the cluster.
*
* @throws Exception
*/
@Before
public void setup() throws Exception {
injector = Guice.createInjector(Modules.override(
new InMemoryDefaultTestModule()).with(new MockModule()));
injector.getInstance(GuiceJpaInitializer.class);
injector.injectMembers(this);
clusters.addCluster("c1", stackId);
cluster = clusters.getCluster("c1");
helper.getOrCreateRepositoryVersion(stackId, stackId.getStackVersion());
cluster.createClusterVersion(stackId,
stackId.getStackVersion(), "admin", RepositoryVersionState.INSTALLING);
Config config1 = configFactory.createNew(cluster, "test-type1", "version1", new HashMap<String, String>(), new HashMap<String,
Map<String, String>>());
Config config2 = configFactory.createNew(cluster, "test-type2", "version1", new HashMap<String, String>(), new HashMap<String,
Map<String, String>>());
cluster.addDesiredConfig("test user", new HashSet<>(Arrays.asList(config1, config2)));
// 100 hosts
for (int i = 0; i < NUMBER_OF_HOSTS; i++) {
String hostName = "c64-" + i;
hostNames.add(hostName);
clusters.addHost(hostName);
setOsFamily(clusters.getHost(hostName), "redhat", "6.4");
clusters.mapHostToCluster(hostName, "c1");
}
Service service = installService("HDFS");
addServiceComponent(service, "NAMENODE");
addServiceComponent(service, "DATANODE");
}
@After
public void teardown() throws AmbariException, SQLException {
H2DatabaseCleaner.clearDatabaseAndStopPersistenceService(injector);
}
/**
* Tests that concurrent impl serialization and impl writing doesn't cause a
* deadlock.
*
* @throws Exception
*/
@Test()
public void testDeadlockBetweenImplementations() throws Exception {
Service service = cluster.getService("HDFS");
ServiceComponent nameNodeComponent = service.getServiceComponent("NAMENODE");
ServiceComponent dataNodeComponent = service.getServiceComponent("DATANODE");
ServiceComponentHost nameNodeSCH = createNewServiceComponentHost("HDFS",
"NAMENODE", "c64-0");
ServiceComponentHost dataNodeSCH = createNewServiceComponentHost("HDFS",
"DATANODE", "c64-0");
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < NUMBER_OF_THREADS; i++) {
DeadlockExerciserThread thread = new DeadlockExerciserThread();
thread.setCluster(cluster);
thread.setService(service);
thread.setDataNodeComponent(dataNodeComponent);
thread.setNameNodeComponent(nameNodeComponent);
thread.setNameNodeSCH(nameNodeSCH);
thread.setDataNodeSCH(dataNodeSCH);
thread.start();
threads.add(thread);
}
DeadlockWarningThread wt = new DeadlockWarningThread(threads);
while (true) {
if(!wt.isAlive()) {
break;
}
}
if (wt.isDeadlocked()){
Assert.assertFalse(wt.getErrorMessages().toString(), wt.isDeadlocked());
} else {
Assert.assertFalse(wt.isDeadlocked());
}
}
/**
* Tests that while serializing a service component, writes to that service
* component do not cause a deadlock with the global cluster lock.
*
* @throws Exception
*/
@Test()
public void testAddingHostComponentsWhileReading() throws Exception {
Service service = cluster.getService("HDFS");
ServiceComponent nameNodeComponent = service.getServiceComponent("NAMENODE");
ServiceComponent dataNodeComponent = service.getServiceComponent("DATANODE");
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < 5; i++) {
ServiceComponentReaderWriterThread thread = new ServiceComponentReaderWriterThread();
thread.setDataNodeComponent(dataNodeComponent);
thread.setNameNodeComponent(nameNodeComponent);
thread.start();
threads.add(thread);
}
DeadlockWarningThread wt = new DeadlockWarningThread(threads);
while (true) {
if(!wt.isAlive()) {
break;
}
}
if (wt.isDeadlocked()){
Assert.assertFalse(wt.getErrorMessages().toString(), wt.isDeadlocked());
} else {
Assert.assertFalse(wt.isDeadlocked());
}
}
/**
* Tests that no deadlock exists while restarting components and reading from
* the cluster.
*
* @throws Exception
*/
@Test()
public void testDeadlockWhileRestartingComponents() throws Exception {
// for each host, install both components
List<ServiceComponentHost> serviceComponentHosts = new ArrayList<>();
for (String hostName : hostNames) {
serviceComponentHosts.add(createNewServiceComponentHost("HDFS",
"NAMENODE", hostName));
serviceComponentHosts.add(createNewServiceComponentHost("HDFS",
"DATANODE", hostName));
}
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < NUMBER_OF_THREADS; i++) {
ClusterReaderThread clusterReaderThread = new ClusterReaderThread();
ClusterWriterThread clusterWriterThread = new ClusterWriterThread();
ServiceComponentRestartThread schWriterThread = new ServiceComponentRestartThread(
serviceComponentHosts);
threads.add(clusterReaderThread);
threads.add(clusterWriterThread);
threads.add(schWriterThread);
clusterReaderThread.start();
clusterWriterThread.start();
schWriterThread.start();
}
DeadlockWarningThread wt = new DeadlockWarningThread(threads, 20, 1000);
while (true) {
if(!wt.isAlive()) {
break;
}
}
if (wt.isDeadlocked()){
Assert.assertFalse(wt.getErrorMessages().toString(), wt.isDeadlocked());
} else {
Assert.assertFalse(wt.isDeadlocked());
}
}
@Test
public void testDeadlockWithConfigsUpdate() throws Exception {
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < NUMBER_OF_THREADS; i++) {
ClusterDesiredConfigsReaderThread readerThread = null;
for (int j = 0; j < NUMBER_OF_THREADS; j++) {
readerThread = new ClusterDesiredConfigsReaderThread();
threads.add(readerThread);
}
for (Config config : cluster.getAllConfigs()) {
ConfigUpdaterThread configUpdaterThread = new ConfigUpdaterThread(config);
threads.add(configUpdaterThread);
}
}
for (Thread thread : threads) {
thread.start();
}
DeadlockWarningThread wt = new DeadlockWarningThread(threads);
while (true) {
if(!wt.isAlive()) {
break;
}
}
if (wt.isDeadlocked()){
Assert.assertFalse(wt.getErrorMessages().toString(), wt.isDeadlocked());
} else {
Assert.assertFalse(wt.isDeadlocked());
}
}
private final class ClusterDesiredConfigsReaderThread extends Thread {
@Override
public void run() {
for (int i =0; i<1000; i++) {
cluster.getDesiredConfigs();
}
}
}
private final class ConfigUpdaterThread extends Thread {
private Config config;
public ConfigUpdaterThread(Config config) {
this.config = config;
}
@Override
public void run() {
for (int i =0; i<300; i++) {
config.save();
}
}
}
/**
* The {@link ClusterReaderThread} reads from a cluster over and over again
* with a slight pause.
*/
private final class ClusterReaderThread extends Thread {
/**
* {@inheritDoc}
*/
@Override
public void run() {
try {
for (int i = 0; i < 1000; i++) {
cluster.convertToResponse();
Thread.sleep(10);
}
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
}
/**
* The {@link ClusterWriterThread} writes some information to the cluster
* instance over and over again with a slight pause.
*/
private final class ClusterWriterThread extends Thread {
/**
* {@inheritDoc}
*/
@Override
public void run() {
try {
for (int i = 0; i < 1500; i++) {
cluster.setDesiredStackVersion(stackId);
Thread.sleep(10);
}
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
}
/**
* The {@link ServiceComponentRestartThread} is used to constantly set SCH
* restart values.
*/
private final class ServiceComponentRestartThread extends Thread {
private List<ServiceComponentHost> serviceComponentHosts;
/**
* Constructor.
*
* @param serviceComponentHosts
*/
private ServiceComponentRestartThread(
List<ServiceComponentHost> serviceComponentHosts) {
this.serviceComponentHosts = serviceComponentHosts;
}
/**
* {@inheritDoc}
*/
@Override
public void run() {
try {
for (int i = 0; i < 1000; i++) {
// about 30ms to go through all SCHs, no sleep needed
for (ServiceComponentHost serviceComponentHost : serviceComponentHosts) {
serviceComponentHost.setRestartRequired(true);
}
}
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
}
/**
* The {@link ServiceComponentRestartThread} is used to continuously add
* components to hosts while reading from those components.
*/
private final class ServiceComponentReaderWriterThread extends Thread {
private ServiceComponent nameNodeComponent;
private ServiceComponent dataNodeComponent;
/**
* @param nameNodeComponent
* the nameNodeComponent to set
*/
public void setNameNodeComponent(ServiceComponent nameNodeComponent) {
this.nameNodeComponent = nameNodeComponent;
}
/**
* @param dataNodeComponent
* the dataNodeComponent to set
*/
public void setDataNodeComponent(ServiceComponent dataNodeComponent) {
this.dataNodeComponent = dataNodeComponent;
}
/**
* {@inheritDoc}
*/
@Override
public void run() {
try {
for (int i = 0; i < 15; i++) {
int hostNumeric = hostNameCounter.getAndIncrement();
nameNodeComponent.convertToResponse();
createNewServiceComponentHost("HDFS", "NAMENODE", "c64-"
+ hostNumeric);
dataNodeComponent.convertToResponse();
createNewServiceComponentHost("HDFS", "DATANODE", "c64-"
+ hostNumeric);
Thread.sleep(10);
}
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
}
/**
* Tests AMBARI-9368 which produced a deadlock during read and writes of some
* of the impl classes.
*/
private static final class DeadlockExerciserThread extends Thread {
private Cluster cluster;
private Service service;
private ServiceComponent nameNodeComponent;
private ServiceComponent dataNodeComponent;
private ServiceComponentHost nameNodeSCH;
private ServiceComponentHost dataNodeSCH;
/**
* @param cluster
* the cluster to set
*/
public void setCluster(Cluster cluster) {
this.cluster = cluster;
}
/**
* @param service
* the service to set
*/
public void setService(Service service) {
this.service = service;
}
/**
* @param nameNodeComponent
* the nameNodeComponent to set
*/
public void setNameNodeComponent(ServiceComponent nameNodeComponent) {
this.nameNodeComponent = nameNodeComponent;
}
/**
* @param dataNodeComponent
* the dataNodeComponent to set
*/
public void setDataNodeComponent(ServiceComponent dataNodeComponent) {
this.dataNodeComponent = dataNodeComponent;
}
/**
* @param nameNodeSCH
* the nameNodeSCH to set
*/
public void setNameNodeSCH(ServiceComponentHost nameNodeSCH) {
this.nameNodeSCH = nameNodeSCH;
}
/**
* @param dataNodeSCH
* the dataNodeSCH to set
*/
public void setDataNodeSCH(ServiceComponentHost dataNodeSCH) {
this.dataNodeSCH = dataNodeSCH;
}
/**
* {@inheritDoc}
*/
@Override
public void run() {
try {
for (int i = 0; i < 10; i++) {
cluster.convertToResponse();
service.convertToResponse();
nameNodeComponent.convertToResponse();
dataNodeComponent.convertToResponse();
nameNodeSCH.convertToResponse(null);
dataNodeSCH.convertToResponse(null);
cluster.setProvisioningState(org.apache.ambari.server.state.State.INIT);
service.setMaintenanceState(MaintenanceState.OFF);
nameNodeComponent.setDesiredState(org.apache.ambari.server.state.State.STARTED);
dataNodeComponent.setDesiredState(org.apache.ambari.server.state.State.INSTALLED);
nameNodeSCH.setState(org.apache.ambari.server.state.State.STARTED);
dataNodeSCH.setState(org.apache.ambari.server.state.State.INSTALLED);
Thread.sleep(100);
}
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
}
private void setOsFamily(Host host, String osFamily, String osVersion) {
Map<String, String> hostAttributes = new HashMap<>(2);
hostAttributes.put("os_family", osFamily);
hostAttributes.put("os_release_version", osVersion);
host.setHostAttributes(hostAttributes);
}
private ServiceComponentHost createNewServiceComponentHost(String svc,
String svcComponent, String hostName) throws AmbariException {
Assert.assertNotNull(cluster.getConfigGroups());
Service s = installService(svc);
ServiceComponent sc = addServiceComponent(s, svcComponent);
ServiceComponentHost sch = serviceComponentHostFactory.createNew(sc,
hostName);
sc.addServiceComponentHost(sch);
sch.setDesiredState(State.INSTALLED);
sch.setState(State.INSTALLED);
sch.setDesiredStackVersion(stackId);
sch.setStackVersion(stackId);
return sch;
}
private Service installService(String serviceName) throws AmbariException {
Service service = null;
try {
service = cluster.getService(serviceName);
} catch (ServiceNotFoundException e) {
service = serviceFactory.createNew(cluster, serviceName);
cluster.addService(service);
}
return service;
}
private ServiceComponent addServiceComponent(Service service,
String componentName) throws AmbariException {
ServiceComponent serviceComponent = null;
try {
serviceComponent = service.getServiceComponent(componentName);
} catch (ServiceComponentNotFoundException e) {
serviceComponent = serviceComponentFactory.createNew(service,
componentName);
service.addServiceComponent(serviceComponent);
serviceComponent.setDesiredState(State.INSTALLED);
}
return serviceComponent;
}
/**
*
*/
private class MockModule implements Module {
/**
*
*/
@Override
public void configure(Binder binder) {
// this listener gets in the way of actually testing the concurrency
// between the threads; it slows them down too much, so mock it out
binder.bind(HostVersionOutOfSyncListener.class).toInstance(
EasyMock.createNiceMock(HostVersionOutOfSyncListener.class));
}
}
}