/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.server.state.services;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.H2DatabaseCleaner;
import org.apache.ambari.server.Role;
import org.apache.ambari.server.RoleCommand;
import org.apache.ambari.server.actionmanager.HostRoleStatus;
import org.apache.ambari.server.orm.GuiceJpaInitializer;
import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
import org.apache.ambari.server.orm.OrmTestHelper;
import org.apache.ambari.server.orm.dao.HostRoleCommandDAO;
import org.apache.ambari.server.orm.dao.RepositoryVersionDAO;
import org.apache.ambari.server.orm.dao.RequestDAO;
import org.apache.ambari.server.orm.dao.StackDAO;
import org.apache.ambari.server.orm.dao.StageDAO;
import org.apache.ambari.server.orm.dao.UpgradeDAO;
import org.apache.ambari.server.orm.entities.HostRoleCommandEntity;
import org.apache.ambari.server.orm.entities.RepositoryVersionEntity;
import org.apache.ambari.server.orm.entities.RequestEntity;
import org.apache.ambari.server.orm.entities.StackEntity;
import org.apache.ambari.server.orm.entities.StageEntity;
import org.apache.ambari.server.orm.entities.StageEntityPK;
import org.apache.ambari.server.orm.entities.UpgradeEntity;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.RepositoryVersionState;
import org.apache.ambari.server.state.StackId;
import org.apache.ambari.server.state.stack.upgrade.Direction;
import org.apache.ambari.server.state.stack.upgrade.UpgradeType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import com.google.inject.Guice;
import com.google.inject.Injector;
/**
* Tests {@link org.apache.ambari.server.state.services.RetryUpgradeActionService}.
*/
public class RetryUpgradeActionServiceTest {
private Injector injector;
private StackDAO stackDAO;
private Clusters clusters;
private RepositoryVersionDAO repoVersionDAO;
private UpgradeDAO upgradeDAO;
private RequestDAO requestDAO;
private StageDAO stageDAO;
private HostRoleCommandDAO hostRoleCommandDAO;
private OrmTestHelper helper;
// Instance variables shared by all tests
String clusterName = "c1";
Cluster cluster;
StackEntity stackEntity220;
StackId stack220;
Long upgradeRequestId = 1L;
Long stageId = 1L;
@Before
public void before() throws NoSuchFieldException, IllegalAccessException {
injector = Guice.createInjector(new InMemoryDefaultTestModule());
injector.getInstance(GuiceJpaInitializer.class);
stackDAO = injector.getInstance(StackDAO.class);
clusters = injector.getInstance(Clusters.class);
repoVersionDAO = injector.getInstance(RepositoryVersionDAO.class);
upgradeDAO = injector.getInstance(UpgradeDAO.class);
requestDAO = injector.getInstance(RequestDAO.class);
stageDAO = injector.getInstance(StageDAO.class);
hostRoleCommandDAO = injector.getInstance(HostRoleCommandDAO.class);
helper = injector.getInstance(OrmTestHelper.class);
}
@After
public void teardown() throws AmbariException, SQLException {
H2DatabaseCleaner.clearDatabaseAndStopPersistenceService(injector);
}
/**
* Test the gauva service allows retrying certain failed actions during a stack upgrade.
* Case 1: No cluster => no-op
* Case 2: Cluster and valid timeout, but no active upgrade => no-op
* Case 3: Cluster with an active upgrade, but no HOLDING_FAILED|HOLDING_TIMEDOUT commands => no-op
* Case 4: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that
* does NOT meet conditions to be retried => no-op
* Case 5: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that
* DOES meet conditions to be retried and has values for start time and original start time => retries the task
* * Case 6: Cluster with an active upgrade that contains a failed task in HOLDING_TIMEDOUT that
* DOES meet conditions to be retriedand does not have values for start time or original start time => retries the task
* Case 7: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that
* was already retried and has now expired => no-op
* Case 8: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED, but it is a critical task
* during Finalize Cluster, which should not be retried => no-op
* @throws Exception
*/
@Test
public void test() throws Exception {
int timeoutMins = 1;
RetryUpgradeActionService service = injector.getInstance(RetryUpgradeActionService.class);
service.startUp();
// Case 1: No cluster
service.runOneIteration();
// Case 2: Cluster and valid timeout, but no active upgrade
createCluster();
service.setMaxTimeout(timeoutMins);
service.runOneIteration();
// Case 3: Cluster with an active upgrade, but no HOLDING_FAILED|HOLDING_TIMEDOUT commands.
prepareUpgrade();
// Run the service
service.runOneIteration();
// Assert all commands in PENDING
List<HostRoleCommandEntity> commands = hostRoleCommandDAO.findAll();
Assert.assertTrue(!commands.isEmpty());
for (HostRoleCommandEntity hrc : commands) {
if (hrc.getStatus() == HostRoleStatus.PENDING) {
Assert.fail("Did not expect any HostRoleCommands to be PENDING");
}
}
// Case 4: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that does NOT meet conditions to be retried.
StageEntityPK primaryKey = new StageEntityPK();
primaryKey.setRequestId(upgradeRequestId);
primaryKey.setStageId(stageId);
StageEntity stageEntity = stageDAO.findByPK(primaryKey);
HostRoleCommandEntity hrc2 = new HostRoleCommandEntity();
hrc2.setStage(stageEntity);
hrc2.setStatus(HostRoleStatus.HOLDING_FAILED);
hrc2.setRole(Role.ZOOKEEPER_SERVER);
hrc2.setRoleCommand(RoleCommand.RESTART);
hrc2.setRetryAllowed(false);
hrc2.setAutoSkipOnFailure(false);
stageEntity.getHostRoleCommands().add(hrc2);
hostRoleCommandDAO.create(hrc2);
stageDAO.merge(stageEntity);
// Run the service
service.runOneIteration();
commands = hostRoleCommandDAO.findAll();
Assert.assertTrue(!commands.isEmpty() && commands.size() == 2);
for (HostRoleCommandEntity hrc : commands) {
if (hrc.getStatus() == HostRoleStatus.PENDING) {
Assert.fail("Did not expect any HostRoleCommands to be PENDING");
}
}
// Case 5: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that DOES meet conditions to be retried.
long now = System.currentTimeMillis();
hrc2.setRetryAllowed(true);
hrc2.setOriginalStartTime(now);
hostRoleCommandDAO.merge(hrc2);
// Run the service
service.runOneIteration();
// Ensure that task 2 transitioned from HOLDING_FAILED to PENDING
Assert.assertEquals(HostRoleStatus.PENDING, hostRoleCommandDAO.findByPK(hrc2.getTaskId()).getStatus());
// Case 6: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that DOES meet conditions to be retried.
hrc2.setStatus(HostRoleStatus.HOLDING_TIMEDOUT);
hrc2.setRetryAllowed(true);
hrc2.setOriginalStartTime(-1L);
hrc2.setStartTime(-1L);
hrc2.setLastAttemptTime(-1L);
hrc2.setEndTime(-1L);
hrc2.setAttemptCount((short) 0);
hostRoleCommandDAO.merge(hrc2);
// Run the service
service.runOneIteration();
// Ensure that task 2 transitioned from HOLDING_TIMEDOUT to PENDING
Assert.assertEquals(HostRoleStatus.PENDING, hostRoleCommandDAO.findByPK(hrc2.getTaskId()).getStatus());
// Case 7: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED that was already retried and has now expired.
now = System.currentTimeMillis();
hrc2.setOriginalStartTime(now - (timeoutMins * 60000) - 1);
hrc2.setStatus(HostRoleStatus.HOLDING_FAILED);
hostRoleCommandDAO.merge(hrc2);
// Run the service
service.runOneIteration();
Assert.assertEquals(HostRoleStatus.HOLDING_FAILED, hostRoleCommandDAO.findByPK(hrc2.getTaskId()).getStatus());
// Case 8: Cluster with an active upgrade that contains a failed task in HOLDING_FAILED, but it is a critical task
// during Finalize Cluster, which should not be retried.
now = System.currentTimeMillis();
hrc2.setOriginalStartTime(now);
hrc2.setStatus(HostRoleStatus.HOLDING_FAILED);
hrc2.setCustomCommandName("org.apache.ambari.server.serveraction.upgrades.FinalizeUpgradeAction");
hostRoleCommandDAO.merge(hrc2);
// Run the service
service.runOneIteration();
Assert.assertEquals(HostRoleStatus.HOLDING_FAILED, hostRoleCommandDAO.findByPK(hrc2.getTaskId()).getStatus());
}
/**
* Create a cluster for stack HDP 2.2.0
* @throws AmbariException
*/
private void createCluster() throws AmbariException {
stackEntity220 = stackDAO.find("HDP", "2.2.0");
stack220 = new StackId("HDP-2.2.0");
clusters.addCluster(clusterName, stack220);
cluster = clusters.getCluster("c1");
RepositoryVersionEntity repoVersionEntity = new RepositoryVersionEntity();
repoVersionEntity.setDisplayName("Initial Version");
repoVersionEntity.setOperatingSystems("");
repoVersionEntity.setStack(stackEntity220);
repoVersionEntity.setVersion("2.2.0.0");
repoVersionDAO.create(repoVersionEntity);
helper.getOrCreateRepositoryVersion(stack220, stack220.getStackVersion());
cluster.createClusterVersion(stack220, stack220.getStackVersion(), "admin", RepositoryVersionState.INSTALLING);
cluster.transitionClusterVersion(stack220, stack220.getStackVersion(), RepositoryVersionState.CURRENT);
}
/**
* Create a new repo version, plus the request and stage objects needed for a ROLLING stack upgrade.
* @throws AmbariException
*/
private void prepareUpgrade() throws AmbariException {
RepositoryVersionEntity repoVersionEntity = new RepositoryVersionEntity();
repoVersionEntity.setDisplayName("Version to Upgrade To");
repoVersionEntity.setOperatingSystems("");
repoVersionEntity.setStack(stackEntity220);
repoVersionEntity.setVersion("2.2.0.1");
repoVersionDAO.create(repoVersionEntity);
helper.getOrCreateRepositoryVersion(stack220, stack220.getStackVersion());
RequestEntity requestEntity = new RequestEntity();
requestEntity.setRequestId(upgradeRequestId);
requestEntity.setClusterId(cluster.getClusterId());
requestDAO.create(requestEntity);
// Create the stage and add it to the request
StageEntity stageEntity = new StageEntity();
stageEntity.setRequest(requestEntity);
stageEntity.setClusterId(cluster.getClusterId());
stageEntity.setRequestId(upgradeRequestId);
stageEntity.setStageId(stageId);
requestEntity.setStages(Collections.singletonList(stageEntity));
stageDAO.create(stageEntity);
requestDAO.merge(requestEntity);
UpgradeEntity upgrade = new UpgradeEntity();
upgrade.setId(1L);
upgrade.setRequestEntity(requestEntity);
upgrade.setClusterId(cluster.getClusterId());
upgrade.setUpgradePackage("some-name");
upgrade.setUpgradeType(UpgradeType.ROLLING);
upgrade.setDirection(Direction.UPGRADE);
upgrade.setFromVersion("2.2.0.0");
upgrade.setToVersion("2.2.0.1");
upgradeDAO.create(upgrade);
cluster.setUpgradeEntity(upgrade);
// Create the task and add it to the stage
HostRoleCommandEntity hrc1 = new HostRoleCommandEntity();
hrc1.setStage(stageEntity);
hrc1.setStatus(HostRoleStatus.COMPLETED);
hrc1.setRole(Role.ZOOKEEPER_SERVER);
hrc1.setRoleCommand(RoleCommand.RESTART);
stageEntity.setHostRoleCommands(new ArrayList<HostRoleCommandEntity>());
stageEntity.getHostRoleCommands().add(hrc1);
hostRoleCommandDAO.create(hrc1);
stageDAO.merge(stageEntity);
}
}