/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.brooklyn.core.mgmt.ha; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import org.apache.brooklyn.api.mgmt.ha.HighAvailabilityMode; import org.apache.brooklyn.api.mgmt.ha.ManagementNodeState; import org.apache.brooklyn.api.mgmt.ha.ManagementNodeSyncRecord; import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecord; import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecordPersister; import org.apache.brooklyn.core.BrooklynVersion; import org.apache.brooklyn.core.entity.Entities; import org.apache.brooklyn.core.mgmt.ha.HighAvailabilityManagerImpl; import org.apache.brooklyn.core.mgmt.ha.ManagementPlaneSyncRecordDeltaImpl; import org.apache.brooklyn.core.mgmt.ha.ManagementPlaneSyncRecordPersisterToObjectStore; import org.apache.brooklyn.core.mgmt.ha.HighAvailabilityManagerImpl.PromotionListener; import org.apache.brooklyn.core.mgmt.ha.dto.BasicManagementNodeSyncRecord; import org.apache.brooklyn.core.mgmt.ha.dto.BasicManagementNodeSyncRecord.Builder; import org.apache.brooklyn.core.mgmt.internal.ManagementContextInternal; import org.apache.brooklyn.core.mgmt.persist.BrooklynMementoPersisterToObjectStore; import org.apache.brooklyn.core.mgmt.persist.PersistMode; import org.apache.brooklyn.core.mgmt.persist.PersistenceObjectStore; import org.apache.brooklyn.core.mgmt.rebind.PersistenceExceptionHandlerImpl; import org.apache.brooklyn.core.test.entity.LocalManagementContextForTests; import org.apache.brooklyn.test.Asserts; import org.apache.brooklyn.util.time.Duration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import com.google.common.base.Ticker; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @Test public abstract class HighAvailabilityManagerTestFixture { @SuppressWarnings("unused") private static final Logger log = LoggerFactory.getLogger(HighAvailabilityManagerTestFixture.class); private ManagementPlaneSyncRecordPersister persister; protected ManagementContextInternal managementContext; private String ownNodeId; private HighAvailabilityManagerImpl manager; private Ticker ticker; private AtomicLong currentTime; // used to set the ticker's return value private RecordingPromotionListener promotionListener; private ClassLoader classLoader = getClass().getClassLoader(); private PersistenceObjectStore objectStore; @BeforeMethod(alwaysRun=true) public void setUp() throws Exception { currentTime = new AtomicLong(1000000000L); ticker = new Ticker() { // strictly not a ticker because returns millis UTC, but it works fine even so @Override public long read() { return currentTime.get(); } }; promotionListener = new RecordingPromotionListener(); managementContext = newLocalManagementContext(); ownNodeId = managementContext.getManagementNodeId(); objectStore = newPersistenceObjectStore(); objectStore.injectManagementContext(managementContext); objectStore.prepareForSharedUse(PersistMode.CLEAN, HighAvailabilityMode.DISABLED); persister = new ManagementPlaneSyncRecordPersisterToObjectStore(managementContext, objectStore, classLoader); ((ManagementPlaneSyncRecordPersisterToObjectStore)persister).preferRemoteTimestampInMemento(); BrooklynMementoPersisterToObjectStore persisterObj = new BrooklynMementoPersisterToObjectStore( objectStore, managementContext.getBrooklynProperties(), classLoader); managementContext.getRebindManager().setPersister(persisterObj, PersistenceExceptionHandlerImpl.builder().build()); manager = ((HighAvailabilityManagerImpl)managementContext.getHighAvailabilityManager()) .setPollPeriod(getPollPeriod()) .setHeartbeatTimeout(Duration.THIRTY_SECONDS) .setPromotionListener(promotionListener) .setLocalTicker(ticker) .setRemoteTicker(getRemoteTicker()) .setPersister(persister); persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder() .node(newManagerMemento(ownNodeId, ManagementNodeState.HOT_STANDBY)) .build()); } protected ManagementContextInternal newLocalManagementContext() { return LocalManagementContextForTests.newInstance(); } protected abstract PersistenceObjectStore newPersistenceObjectStore(); @AfterMethod(alwaysRun=true) public void tearDown() throws Exception { if (manager != null) manager.stop(); if (managementContext != null) Entities.destroyAll(managementContext); if (objectStore != null) objectStore.deleteCompletely(); } // The web-console could still be polling (e.g. if have just restarted brooklyn), before the persister is set. // Must not throw NPE, but instead return something sensible (e.g. an empty state record). @Test public void testGetManagementPlaneSyncStateDoesNotThrowNpeBeforePersisterSet() throws Exception { HighAvailabilityManagerImpl manager2 = new HighAvailabilityManagerImpl(managementContext) .setPollPeriod(Duration.millis(10)) .setHeartbeatTimeout(Duration.THIRTY_SECONDS) .setPromotionListener(promotionListener) .setLocalTicker(ticker) .setRemoteTicker(ticker); try { ManagementPlaneSyncRecord state = manager2.loadManagementPlaneSyncRecord(true); assertNotNull(state); } finally { manager2.stop(); } } // Can get a log.error about our management node's heartbeat being out of date. Caused by // poller first writing a heartbeat record, and then the clock being incremented. But the // next poll fixes it. public void testPromotes() throws Exception { persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder() .node(newManagerMemento(ownNodeId, ManagementNodeState.HOT_STANDBY)) .node(newManagerMemento("node1", ManagementNodeState.MASTER)) .setMaster("node1") .build()); manager.start(HighAvailabilityMode.AUTO); // Simulate passage of time; ticker used by this HA-manager so it will "correctly" publish // its own heartbeat with the new time; but node1's record is now out-of-date. tickerAdvance(Duration.seconds(31)); // Expect to be notified of our promotion, as the only other node promotionListener.assertCalledEventually(); } @Test(groups="Integration") // because one second wait in succeedsContinually public void testDoesNotPromoteIfMasterTimeoutNotExpired() throws Exception { persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder() .node(newManagerMemento(ownNodeId, ManagementNodeState.HOT_STANDBY)) .node(newManagerMemento("node1", ManagementNodeState.MASTER)) .setMaster("node1") .build()); manager.start(HighAvailabilityMode.AUTO); tickerAdvance(Duration.seconds(25)); // Expect not to be notified, as 25s < 30s timeout // (it's normally a fake clock so won't hit 30, even waiting 1s below - but in "IntegrationTest" subclasses it is real!) Asserts.succeedsContinually(new Runnable() { @Override public void run() { assertTrue(promotionListener.callTimestamps.isEmpty(), "calls="+promotionListener.callTimestamps); }}); } public void testGetManagementPlaneStatus() throws Exception { // with the name zzzzz the mgr created here should never be promoted by the alphabetical strategy! tickerAdvance(Duration.FIVE_SECONDS); persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder() .node(newManagerMemento(ownNodeId, ManagementNodeState.STANDBY)) .node(newManagerMemento("zzzzzzz_node1", ManagementNodeState.STANDBY)) .build()); persister.loadSyncRecord(); long zzzTime = tickerCurrentMillis(); tickerAdvance(Duration.FIVE_SECONDS); manager.start(HighAvailabilityMode.AUTO); ManagementPlaneSyncRecord memento = manager.loadManagementPlaneSyncRecord(true); // Note can assert timestamp because not "real" time; it's using our own Ticker assertEquals(memento.getMasterNodeId(), ownNodeId); assertEquals(memento.getManagementNodes().keySet(), ImmutableSet.of(ownNodeId, "zzzzzzz_node1")); assertEquals(memento.getManagementNodes().get(ownNodeId).getNodeId(), ownNodeId); assertEquals(memento.getManagementNodes().get(ownNodeId).getStatus(), ManagementNodeState.MASTER); assertEquals(memento.getManagementNodes().get(ownNodeId).getLocalTimestamp(), tickerCurrentMillis()); assertEquals(memento.getManagementNodes().get("zzzzzzz_node1").getNodeId(), "zzzzzzz_node1"); assertEquals(memento.getManagementNodes().get("zzzzzzz_node1").getStatus(), ManagementNodeState.STANDBY); assertEquals(memento.getManagementNodes().get("zzzzzzz_node1").getLocalTimestamp(), zzzTime); } @Test(groups="Integration", invocationCount=50) //because we have had non-deterministic failures public void testGetManagementPlaneStatusManyTimes() throws Exception { testGetManagementPlaneStatus(); } @Test public void testGetManagementPlaneSyncStateInfersTimedOutNodeAsFailed() throws Exception { persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder() .node(newManagerMemento(ownNodeId, ManagementNodeState.HOT_STANDBY)) .node(newManagerMemento("node1", ManagementNodeState.MASTER)) .setMaster("node1") .build()); manager.start(HighAvailabilityMode.HOT_STANDBY); ManagementPlaneSyncRecord state = manager.loadManagementPlaneSyncRecord(true); assertEquals(state.getManagementNodes().get("node1").getStatus(), ManagementNodeState.MASTER); assertEquals(state.getManagementNodes().get(ownNodeId).getStatus(), ManagementNodeState.HOT_STANDBY); // Simulate passage of time; ticker used by this HA-manager so it will "correctly" publish // its own heartbeat with the new time; but node1's record is now out-of-date. tickerAdvance(Duration.seconds(31)); ManagementPlaneSyncRecord state2 = manager.loadManagementPlaneSyncRecord(true); assertEquals(state2.getManagementNodes().get("node1").getStatus(), ManagementNodeState.FAILED); assertNotEquals(state.getManagementNodes().get(ownNodeId).getStatus(), ManagementNodeState.FAILED); } protected Duration getPollPeriod() { return Duration.millis(10); } protected long tickerCurrentMillis() { return ticker.read(); } protected long tickerAdvance(Duration duration) { currentTime.addAndGet(duration.toMilliseconds()); return tickerCurrentMillis(); } protected Ticker getRemoteTicker() { return ticker; } protected ManagementNodeSyncRecord newManagerMemento(String nodeId, ManagementNodeState status) { Builder rb = BasicManagementNodeSyncRecord.builder(); rb.brooklynVersion(BrooklynVersion.get()).nodeId(nodeId).status(status); rb.localTimestamp(tickerCurrentMillis()); if (getRemoteTicker()!=null) rb.remoteTimestamp(getRemoteTicker().read()); return rb.build(); } public static class RecordingPromotionListener implements PromotionListener { public final List<Long> callTimestamps = Lists.newCopyOnWriteArrayList(); @Override public void promotingToMaster() { callTimestamps.add(System.currentTimeMillis()); } public void assertNotCalled() { assertTrue(callTimestamps.isEmpty(), "calls="+callTimestamps); } public void assertCalled() { assertFalse(callTimestamps.isEmpty(), "calls="+callTimestamps); } public void assertCalledEventually() { Asserts.succeedsEventually(new Runnable() { @Override public void run() { assertCalled(); }}); } } }