/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.brooklyn.core.mgmt.ha;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import javax.annotation.Nullable;
import org.apache.brooklyn.api.entity.Application;
import org.apache.brooklyn.api.entity.Entity;
import org.apache.brooklyn.api.location.Location;
import org.apache.brooklyn.api.mgmt.Task;
import org.apache.brooklyn.api.mgmt.ha.HighAvailabilityManager;
import org.apache.brooklyn.api.mgmt.ha.HighAvailabilityMode;
import org.apache.brooklyn.api.mgmt.ha.ManagementNodeState;
import org.apache.brooklyn.api.mgmt.ha.ManagementNodeSyncRecord;
import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecord;
import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecordPersister;
import org.apache.brooklyn.api.mgmt.ha.MementoCopyMode;
import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecordPersister.Delta;
import org.apache.brooklyn.api.mgmt.rebind.RebindManager;
import org.apache.brooklyn.config.ConfigKey;
import org.apache.brooklyn.core.BrooklynFeatureEnablement;
import org.apache.brooklyn.core.BrooklynVersion;
import org.apache.brooklyn.core.catalog.internal.BasicBrooklynCatalog;
import org.apache.brooklyn.core.catalog.internal.CatalogDto;
import org.apache.brooklyn.core.config.ConfigKeys;
import org.apache.brooklyn.core.entity.EntityInternal;
import org.apache.brooklyn.core.mgmt.BrooklynTaskTags;
import org.apache.brooklyn.core.mgmt.ha.BasicMasterChooser.AlphabeticMasterChooser;
import org.apache.brooklyn.core.mgmt.ha.dto.BasicManagementNodeSyncRecord;
import org.apache.brooklyn.core.mgmt.ha.dto.ManagementPlaneSyncRecordImpl;
import org.apache.brooklyn.core.mgmt.ha.dto.ManagementPlaneSyncRecordImpl.Builder;
import org.apache.brooklyn.core.mgmt.internal.BrooklynObjectManagementMode;
import org.apache.brooklyn.core.mgmt.internal.LocalEntityManager;
import org.apache.brooklyn.core.mgmt.internal.LocationManagerInternal;
import org.apache.brooklyn.core.mgmt.internal.ManagementContextInternal;
import org.apache.brooklyn.core.mgmt.internal.ManagementTransitionMode;
import org.apache.brooklyn.core.mgmt.persist.BrooklynPersistenceUtils;
import org.apache.brooklyn.core.mgmt.persist.PersistenceActivityMetrics;
import org.apache.brooklyn.core.mgmt.persist.BrooklynPersistenceUtils.CreateBackupMode;
import org.apache.brooklyn.core.mgmt.rebind.RebindManagerImpl;
import org.apache.brooklyn.core.server.BrooklynServerConfig;
import org.apache.brooklyn.util.collections.MutableList;
import org.apache.brooklyn.util.collections.MutableMap;
import org.apache.brooklyn.util.core.task.ScheduledTask;
import org.apache.brooklyn.util.core.task.Tasks;
import org.apache.brooklyn.util.exceptions.Exceptions;
import org.apache.brooklyn.util.exceptions.ReferenceWithError;
import org.apache.brooklyn.util.text.Strings;
import org.apache.brooklyn.util.time.Duration;
import org.apache.brooklyn.util.time.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Ticker;
import com.google.common.collect.Iterables;
/**
* This is the guts of the high-availability solution in Brooklyn.
* <p>
* Multiple brooklyn nodes can be started to form a single management plane, where one node is
* designated master and the others are "warm standbys". On termination or failure of the master,
* the standbys deterministically decide which standby should become master (see {@link MasterChooser}).
* That standby promotes itself.
* <p>
* The management nodes communicate their health/status via the {@link ManagementPlaneSyncRecordPersister}.
* For example, if using {@link ManagementPlaneSyncRecordPersisterToObjectStore} with a shared blobstore or
* filesystem/NFS mount, then each management-node periodically writes its state.
* This acts as a heartbeat, being read by the other management-nodes.
* <p>
* Promotion to master involves:
* <ol>
* <li>notifying the other management-nodes that it is now master
* <li>calling {@link RebindManager#rebind(ClassLoader, org.apache.brooklyn.api.mgmt.rebind.RebindExceptionHandler, ManagementNodeState)} to read all persisted entity state, and thus reconstitute the entities.
* </ol>
* <p>
* Future improvements in this area will include brooklyn-managing-brooklyn to decide + promote
* the standby.
*
* @since 0.7.0
*
* @author aled
*/
@Beta
public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
public final ConfigKey<Duration> POLL_PERIOD = ConfigKeys.newConfigKey(Duration.class, "brooklyn.ha.pollPeriod",
"How often nodes should poll to detect whether master is healthy", Duration.seconds(1));
public final ConfigKey<Duration> HEARTBEAT_TIMEOUT = ConfigKeys.newConfigKey(Duration.class, "brooklyn.ha.heartbeatTimeout",
"Maximum allowable time for detection of a peer's heartbeat; if no sign of master after this time, "
+ "another node may promote itself", Duration.THIRTY_SECONDS);
@VisibleForTesting /* only used in tests currently */
public static interface PromotionListener {
public void promotingToMaster();
}
private static final Logger LOG = LoggerFactory.getLogger(HighAvailabilityManagerImpl.class);
private final ManagementContextInternal managementContext;
private volatile String ownNodeId;
private volatile ManagementPlaneSyncRecordPersister persister;
private volatile PromotionListener promotionListener;
private volatile MasterChooser masterChooser = new AlphabeticMasterChooser();
private volatile Ticker localTickerUtc = new Ticker() {
// strictly not a ticker because returns millis UTC, but it works fine even so
@Override
public long read() {
return System.currentTimeMillis();
}
};
private volatile Ticker optionalRemoteTickerUtc = null;
private volatile Task<?> pollingTask;
private volatile boolean disabled;
private volatile boolean running;
private volatile ManagementNodeState nodeState = ManagementNodeState.INITIALIZING;
private volatile boolean nodeStateTransitionComplete = false;
private volatile long priority = 0;
private final static int MAX_NODE_STATE_HISTORY = 200;
private final List<Map<String,Object>> nodeStateHistory = MutableList.of();
private volatile transient Duration pollPeriodLocalOverride;
private volatile transient Duration heartbeatTimeoutOverride;
private volatile ManagementPlaneSyncRecord lastSyncRecord;
private volatile PersistenceActivityMetrics managementStateWritePersistenceMetrics = new PersistenceActivityMetrics();
private volatile PersistenceActivityMetrics managementStateReadPersistenceMetrics = new PersistenceActivityMetrics();
private final long startTimeUtc;
public HighAvailabilityManagerImpl(ManagementContextInternal managementContext) {
this.managementContext = managementContext;
startTimeUtc = localTickerUtc.read();
}
@Override
public HighAvailabilityManagerImpl setPersister(ManagementPlaneSyncRecordPersister persister) {
this.persister = checkNotNull(persister, "persister");
return this;
}
@Override
public ManagementPlaneSyncRecordPersister getPersister() {
return persister;
}
protected synchronized Duration getPollPeriod() {
if (pollPeriodLocalOverride!=null) return pollPeriodLocalOverride;
return managementContext.getBrooklynProperties().getConfig(POLL_PERIOD);
}
/** Overrides {@link #POLL_PERIOD} from brooklyn config,
* including e.g. {@link Duration#PRACTICALLY_FOREVER} to disable polling;
* or <code>null</code> to clear a local override */
public HighAvailabilityManagerImpl setPollPeriod(Duration val) {
this.pollPeriodLocalOverride = val;
if (running) {
registerPollTask();
}
return this;
}
public HighAvailabilityManagerImpl setMasterChooser(MasterChooser val) {
this.masterChooser = checkNotNull(val, "masterChooser");
return this;
}
public synchronized Duration getHeartbeatTimeout() {
if (heartbeatTimeoutOverride!=null) return heartbeatTimeoutOverride;
return managementContext.getBrooklynProperties().getConfig(HEARTBEAT_TIMEOUT);
}
/** Overrides {@link #HEARTBEAT_TIMEOUT} from brooklyn config,
* including e.g. {@link Duration#PRACTICALLY_FOREVER} to prevent failover due to heartbeat absence;
* or <code>null</code> to clear a local override */
public HighAvailabilityManagerImpl setHeartbeatTimeout(Duration val) {
this.heartbeatTimeoutOverride = val;
return this;
}
/** A ticker that reads in milliseconds, for populating local timestamps.
* Defaults to System.currentTimeMillis(); may be overridden e.g. for testing. */
public HighAvailabilityManagerImpl setLocalTicker(Ticker val) {
this.localTickerUtc = checkNotNull(val);
return this;
}
/** A ticker that reads in milliseconds, for overriding remote timestamps.
* Defaults to null which means to use the remote timestamp.
* Only for testing as this records the remote timestamp in the object.
* <p>
* If this is supplied, one must also set {@link ManagementPlaneSyncRecordPersisterToObjectStore#useRemoteTimestampInMemento()}. */
@VisibleForTesting
public HighAvailabilityManagerImpl setRemoteTicker(Ticker val) {
this.optionalRemoteTickerUtc = val;
return this;
}
public HighAvailabilityManagerImpl setPromotionListener(PromotionListener val) {
this.promotionListener = checkNotNull(val, "promotionListener");
return this;
}
@Override
public boolean isRunning() {
return running;
}
@Override
public void disabled() {
disabled = true;
ownNodeId = managementContext.getManagementNodeId();
// this is notionally the master, just not running; see javadoc for more info
stop(ManagementNodeState.MASTER);
}
@Override
public void start(HighAvailabilityMode startMode) {
nodeStateTransitionComplete = true;
disabled = false;
running = true;
changeMode(startMode, true, true);
}
@Override
public void changeMode(HighAvailabilityMode startMode) {
changeMode(startMode, false, false);
}
@VisibleForTesting
@Beta
public void changeMode(HighAvailabilityMode startMode, boolean preventElectionOnExplicitStandbyMode, boolean failOnExplicitModesIfUnusual) {
if (!running) {
// if was not running then start as disabled mode, then proceed as normal
LOG.info("HA changing mode to "+startMode+" from "+getInternalNodeState()+" when not running, forcing an intermediate start as DISABLED then will convert to "+startMode);
start(HighAvailabilityMode.DISABLED);
}
if (getNodeState()==ManagementNodeState.FAILED || getNodeState()==ManagementNodeState.INITIALIZING) {
if (startMode!=HighAvailabilityMode.DISABLED) {
// if coming from FAILED (or INITIALIZING because we skipped start call) then treat as initializing
setInternalNodeState(ManagementNodeState.INITIALIZING);
}
}
ownNodeId = managementContext.getManagementNodeId();
// TODO Small race in that we first check, and then we'll do checkMaster() on first poll,
// so another node could have already become master or terminated in that window.
ManagementNodeSyncRecord existingMaster = hasHealthyMaster();
boolean weAreRecognisedAsMaster = existingMaster!=null && ownNodeId.equals(existingMaster.getNodeId());
boolean weAreMasterLocally = getInternalNodeState()==ManagementNodeState.MASTER;
// catch error in some tests where mgmt context has a different HA manager
if (managementContext.getHighAvailabilityManager()!=this)
throw new IllegalStateException("Cannot start an HA manager on a management context with a different HA manager!");
if (weAreMasterLocally) {
// demotion may be required; do this before triggering an election
switch (startMode) {
case MASTER:
case AUTO:
case DISABLED:
// no action needed, will do anything necessary below (or above)
break;
case HOT_STANDBY:
case HOT_BACKUP:
case STANDBY:
demoteTo(ManagementNodeState.of(startMode).get()); break;
default:
throw new IllegalStateException("Unexpected high availability mode "+startMode+" requested for "+this);
}
}
ManagementNodeState oldState = getInternalNodeState();
// now do election
switch (startMode) {
case AUTO:
// don't care; let's start and see if we promote ourselves
if (getInternalNodeState()==ManagementNodeState.INITIALIZING) {
setInternalNodeState(ManagementNodeState.STANDBY);
}
publishAndCheck(true);
switch (getInternalNodeState()) {
case HOT_BACKUP:
if (!nodeStateTransitionComplete) throw new IllegalStateException("Cannot switch to AUTO when in the middle of a transition to "+getInternalNodeState());
// else change us to standby, desiring to go to hot standby, and continue to below
setInternalNodeState(ManagementNodeState.STANDBY);
startMode = HighAvailabilityMode.HOT_BACKUP;
case HOT_STANDBY:
case STANDBY:
if (getInternalNodeState()==ManagementNodeState.STANDBY && oldState==ManagementNodeState.INITIALIZING && startMode!=HighAvailabilityMode.HOT_BACKUP
&& BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_DEFAULT_STANDBY_IS_HOT_PROPERTY)) {
// auto requested; not promoted; so it should become hot standby
startMode = HighAvailabilityMode.HOT_STANDBY;
}
ManagementPlaneSyncRecord newState = loadManagementPlaneSyncRecord(true);
String masterNodeId = newState.getMasterNodeId();
ManagementNodeSyncRecord masterNodeDetails = newState.getManagementNodes().get(masterNodeId);
LOG.info("Management node "+ownNodeId+" running as HA " + getInternalNodeState() + " autodetected"
+ (startMode == HighAvailabilityMode.HOT_STANDBY || startMode == HighAvailabilityMode.HOT_BACKUP ?
" (will change to "+startMode+")" : "")
+ ", " +
(Strings.isBlank(masterNodeId) ? "no master currently (other node should promote itself soon)" : "master "
+ (existingMaster==null ? "(new) " : "")
+ "is "+masterNodeId +
(masterNodeDetails==null || masterNodeDetails.getUri()==null ? " (no url)" : " at "+masterNodeDetails.getUri())));
break;
case MASTER:
LOG.info("Management node "+ownNodeId+" running as HA MASTER autodetected");
break;
default:
throw new IllegalStateException("Management node "+ownNodeId+" set to HA AUTO, encountered unexpected mode "+getInternalNodeState());
}
break;
case MASTER:
if (!failOnExplicitModesIfUnusual || existingMaster==null) {
promoteToMaster();
if (existingMaster!=null) {
LOG.info("Management node "+ownNodeId+" running as HA MASTER explicitly");
} else {
LOG.info("Management node "+ownNodeId+" running as HA MASTER explicitly, stealing from "+existingMaster);
}
} else if (!weAreRecognisedAsMaster) {
throw new IllegalStateException("Master already exists; cannot run as master (master "+existingMaster.toVerboseString()+"); "
+ "to trigger a promotion, set a priority and demote the current master");
} else {
LOG.info("Management node "+ownNodeId+" already running as HA MASTER, when set explicitly");
}
break;
case HOT_BACKUP:
setInternalNodeState(ManagementNodeState.HOT_BACKUP);
// then continue into next block
case STANDBY:
case HOT_STANDBY:
if (startMode!=HighAvailabilityMode.HOT_BACKUP) {
if (ManagementNodeState.isHotProxy(getInternalNodeState()) && startMode==HighAvailabilityMode.HOT_STANDBY) {
// if was hot_backup, we can immediately go hot_standby
setInternalNodeState(ManagementNodeState.HOT_STANDBY);
} else {
// from any other state, set standby, then perhaps switch to hot_standby later on (or might become master in the next block)
setInternalNodeState(ManagementNodeState.STANDBY);
}
}
if (ManagementNodeState.isStandby(getInternalNodeState())) {
if (!preventElectionOnExplicitStandbyMode) {
publishAndCheck(true);
}
if (failOnExplicitModesIfUnusual && existingMaster==null) {
LOG.error("Management node "+ownNodeId+" detected no master when "+startMode+" requested and existing master required; failing.");
throw new IllegalStateException("No existing master; cannot start as "+startMode);
}
}
String message = "Management node "+ownNodeId+" running as HA "+getNodeState()+" (";
if (getNodeState().toString().equals(startMode.toString()))
message += "explicitly requested";
else if (startMode==HighAvailabilityMode.HOT_STANDBY && getNodeState()==ManagementNodeState.STANDBY)
message += "caller requested "+startMode+", will attempt rebind for HOT_STANDBY next";
else
message += "caller requested "+startMode;
if (getNodeState()==ManagementNodeState.MASTER) {
message += " but election re-promoted this node)";
} else {
ManagementPlaneSyncRecord newState = loadManagementPlaneSyncRecord(true);
if (Strings.isBlank(newState.getMasterNodeId())) {
message += "); no master currently";
if (startMode != HighAvailabilityMode.HOT_BACKUP) message += " (subsequent election may repair)";
} else {
message += "); master "+newState.getMasterNodeId();
}
}
LOG.info(message);
break;
case DISABLED:
// safe just to run even if we weren't master
LOG.info("Management node "+ownNodeId+" HA DISABLED (was "+getInternalNodeState()+")");
demoteTo(ManagementNodeState.FAILED);
if (pollingTask!=null) pollingTask.cancel(true);
break;
default:
throw new IllegalStateException("Unexpected high availability mode "+startMode+" requested for "+this);
}
if ((startMode==HighAvailabilityMode.HOT_STANDBY || startMode==HighAvailabilityMode.HOT_BACKUP)) {
if (!ManagementNodeState.isHotProxy(oldState)) {
// now transition to hot proxy
nodeStateTransitionComplete = false;
if (startMode==HighAvailabilityMode.HOT_STANDBY) {
// if it should be hot standby, then we may need to promote
// inform the world that we are transitioning (but not eligible for promotion while going in to hot standby)
// (no harm in doing this twice)
publishHealth();
}
try {
activateHotProxy(ManagementNodeState.of(startMode).get()).get();
// error above now throws
nodeStateTransitionComplete = true;
publishHealth();
if (getNodeState()==ManagementNodeState.HOT_STANDBY || getNodeState()==ManagementNodeState.HOT_BACKUP) {
LOG.info("Management node "+ownNodeId+" now running as HA "+getNodeState()+"; "
+ managementContext.getApplications().size()+" application"+Strings.s(managementContext.getApplications().size())+" loaded");
} else {
// shouldn't come here, we should have gotten an error above
LOG.warn("Management node "+ownNodeId+" unable to promote to "+startMode+" (currently "+getNodeState()+"); "
+ "(see log for further details)");
}
} catch (Exception e) {
LOG.warn("Management node "+ownNodeId+" unable to promote to "+startMode+" (currently "+getNodeState()+"); rethrowing: "+Exceptions.collapseText(e));
nodeStateTransitionComplete = true;
throw Exceptions.propagate(e);
}
} else {
// transitioning among hot proxy states - tell the rebind manager
managementContext.getRebindManager().stopReadOnly();
managementContext.getRebindManager().startReadOnly(ManagementNodeState.of(startMode).get());
nodeStateTransitionComplete = true;
}
} else {
nodeStateTransitionComplete = true;
}
if (startMode!=HighAvailabilityMode.DISABLED)
registerPollTask();
}
@Override
public void setPriority(long priority) {
this.priority = priority;
if (persister!=null) publishHealth();
}
@Override
public long getPriority() {
return priority;
}
@Override
public void stop() {
LOG.debug("Stopping "+this);
stop(ManagementNodeState.TERMINATED);
}
private void stop(ManagementNodeState newState) {
boolean wasRunning = running;
running = false;
setInternalNodeState(newState);
if (pollingTask != null) pollingTask.cancel(true);
if (wasRunning) {
try {
publishHealth();
} catch (Exception e) {
Exceptions.propagateIfFatal(e);
LOG.error("Problem publishing manager-node health on termination (continuing)", e);
}
}
}
/** returns the node state this node is trying to be in */
public ManagementNodeState getTransitionTargetNodeState() {
return getInternalNodeState();
}
protected ManagementNodeState getInternalNodeState() {
return nodeState;
}
protected void setInternalNodeState(ManagementNodeState newState) {
ManagementNodeState oldState = getInternalNodeState();
synchronized (nodeStateHistory) {
if (this.nodeState != newState) {
nodeStateHistory.add(0, MutableMap.<String,Object>of("state", newState, "timestamp", currentTimeMillis()));
while (nodeStateHistory.size()>MAX_NODE_STATE_HISTORY) {
nodeStateHistory.remove(nodeStateHistory.size()-1);
}
}
((RebindManagerImpl)managementContext.getRebindManager()).setAwaitingInitialRebind(running &&
(ManagementNodeState.isHotProxy(newState) || newState==ManagementNodeState.MASTER));
this.nodeState = newState;
}
if (ManagementNodeState.isHotProxy(oldState) && !ManagementNodeState.isHotProxy(newState)) {
// could perhaps promote standby items on some transitions; but for now we stop the old read-only and re-load them
// TODO ideally there'd be an incremental rebind as well as an incremental persist
managementContext.getRebindManager().stopReadOnly();
clearManagedItems(ManagementTransitionMode.transitioning(BrooklynObjectManagementMode.LOADED_READ_ONLY, BrooklynObjectManagementMode.UNMANAGED_PERSISTED));
}
}
@Override
public ManagementNodeState getNodeState() {
ManagementNodeState myNodeState = getInternalNodeState();
if (myNodeState==ManagementNodeState.FAILED) return getInternalNodeState();
// if target is master then we claim already being master, to prevent other nodes from taking it
// (we may fail subsequently of course)
if (myNodeState==ManagementNodeState.MASTER) return myNodeState;
if (!nodeStateTransitionComplete) return ManagementNodeState.INITIALIZING;
return myNodeState;
}
public ManagementPlaneSyncRecord getLastManagementPlaneSyncRecord() {
return lastSyncRecord;
}
@SuppressWarnings("unchecked")
protected void registerPollTask() {
final Runnable job = new Runnable() {
private boolean lastFailed;
@Override public void run() {
try {
publishAndCheck(false);
lastFailed = false;
} catch (Exception e) {
if (running) {
if (lastFailed) {
if (LOG.isDebugEnabled()) LOG.debug("Recurring problem in HA-poller: "+e, e);
} else {
LOG.error("Problem in HA-poller: "+e, e);
lastFailed = true;
}
} else {
if (LOG.isDebugEnabled()) LOG.debug("Problem in HA-poller, but no longer running: "+e, e);
}
} catch (Throwable t) {
LOG.error("Problem in HA-poller: "+t, t);
throw Exceptions.propagate(t);
}
}
};
Callable<Task<?>> taskFactory = new Callable<Task<?>>() {
@Override public Task<?> call() {
return Tasks.builder().dynamic(false).body(job).displayName("HA poller task").tag(BrooklynTaskTags.TRANSIENT_TASK_TAG)
.description("polls HA status to see whether this node should promote").build();
}
};
Duration pollPeriod = getPollPeriod();
LOG.debug("Registering poll task for "+this+", period "+pollPeriod);
if (pollPeriod.equals(Duration.PRACTICALLY_FOREVER)) {
// don't schedule - used for tests
// (scheduling fires off one initial task in the background before the delay,
// which affects tests that want to know exactly when publishing happens;
// TODO would be nice if scheduled task had a "no initial submission" flag )
} else {
if (pollingTask!=null) pollingTask.cancel(true);
ScheduledTask task = new ScheduledTask(MutableMap.of("period", pollPeriod, "displayName", "scheduled:[HA poller task]"), taskFactory);
pollingTask = managementContext.getExecutionManager().submit(task);
}
}
/** invoked manually when initializing, and periodically thereafter */
@VisibleForTesting
public synchronized void publishAndCheck(boolean initializing) {
publishHealth();
checkMaster(initializing);
}
protected synchronized void publishHealth() {
if (persister == null) {
LOG.info("Cannot publish management-node health as no persister");
return;
}
Stopwatch timer = Stopwatch.createStarted();
try {
ManagementNodeSyncRecord memento = createManagementNodeSyncRecord(false);
Delta delta = ManagementPlaneSyncRecordDeltaImpl.builder().node(memento).build();
persister.delta(delta);
managementStateWritePersistenceMetrics.noteSuccess(Duration.of(timer));
if (LOG.isTraceEnabled()) LOG.trace("Published management-node health: {}", memento);
} catch (Throwable t) {
managementStateWritePersistenceMetrics.noteFailure(Duration.of(timer));
managementStateWritePersistenceMetrics.noteError(t.toString());
LOG.debug("Error publishing management-node health (rethrowing): "+t);
throw Exceptions.propagate(t);
}
}
public void publishClearNonMaster() {
ManagementPlaneSyncRecord plane = getLastManagementPlaneSyncRecord();
if (plane==null || persister==null) {
LOG.warn("Cannot clear HA node records; HA not active (or not yet loaded)");
return;
}
org.apache.brooklyn.core.mgmt.ha.ManagementPlaneSyncRecordDeltaImpl.Builder db = ManagementPlaneSyncRecordDeltaImpl.builder();
for (Map.Entry<String,ManagementNodeSyncRecord> node: plane.getManagementNodes().entrySet()) {
// only keep a node if it both claims master and is recognised as master;
// else ex-masters who died are kept around!
if (!ManagementNodeState.MASTER.equals(node.getValue().getStatus()) ||
!Objects.equal(plane.getMasterNodeId(), node.getValue().getNodeId())) {
db.removedNodeId(node.getKey());
}
}
persister.delta(db.build());
// then get, so model is updated
loadManagementPlaneSyncRecord(true);
}
protected synchronized void publishDemotion(boolean demotingFromMaster) {
checkState(getNodeState() != ManagementNodeState.MASTER, "node status must not be master when demoting", getNodeState());
if (persister == null) {
LOG.info("Cannot publish management-node health as no persister");
return;
}
ManagementNodeSyncRecord memento = createManagementNodeSyncRecord(false);
ManagementPlaneSyncRecordDeltaImpl.Builder deltaBuilder = ManagementPlaneSyncRecordDeltaImpl.builder()
.node(memento);
if (demotingFromMaster) {
deltaBuilder.clearMaster(ownNodeId);
}
Delta delta = deltaBuilder.build();
persister.delta(delta);
if (LOG.isTraceEnabled()) LOG.trace("Published management-node health: {}", memento);
}
/**
* Publishes (via {@link #persister}) the state of this management node with itself set to master.
*/
protected synchronized void publishPromotionToMaster() {
checkState(getNodeState() == ManagementNodeState.MASTER, "node status must be master on publish, but is %s", getNodeState());
if (persister == null) {
LOG.info("Cannot publish management-node health as no persister");
return;
}
ManagementNodeSyncRecord memento = createManagementNodeSyncRecord(false);
Delta delta = ManagementPlaneSyncRecordDeltaImpl.builder()
.node(memento)
.setMaster(ownNodeId)
.build();
persister.delta(delta);
if (LOG.isTraceEnabled()) LOG.trace("Published management-node health: {}", memento);
}
protected boolean isHeartbeatOk(ManagementNodeSyncRecord masterNode, ManagementNodeSyncRecord meNode) {
if (masterNode==null) return false;
if (meNode==null) {
// we can't confirm it's healthy, but it appears so as far as we can tell
return true;
}
Long timestampMaster = masterNode.getRemoteTimestamp();
Long timestampMe = meNode.getRemoteTimestamp();
if (timestampMaster==null || timestampMe==null) return false;
return (timestampMe - timestampMaster) <= getHeartbeatTimeout().toMilliseconds();
}
protected ManagementNodeSyncRecord hasHealthyMaster() {
ManagementPlaneSyncRecord memento = loadManagementPlaneSyncRecord(false);
String nodeId = memento.getMasterNodeId();
ManagementNodeSyncRecord masterMemento = (nodeId == null) ? null : memento.getManagementNodes().get(nodeId);
ManagementNodeSyncRecord ourMemento = memento.getManagementNodes().get(ownNodeId);
boolean result = masterMemento != null && masterMemento.getStatus() == ManagementNodeState.MASTER
&& isHeartbeatOk(masterMemento, ourMemento);
if (LOG.isDebugEnabled()) LOG.debug("Healthy-master check result={}; masterId={}; masterMemento={}; ourMemento={}",
new Object[] {result, nodeId, (masterMemento == null ? "<none>" : masterMemento.toVerboseString()), (ourMemento == null ? "<none>" : ourMemento.toVerboseString())});
return (result ? masterMemento : null);
}
/**
* Looks up the state of all nodes in the management plane, and checks if the master is still ok.
* If it's not then determines which node should be promoted to master. If it is ourself, then promotes.
*/
protected void checkMaster(boolean initializing) {
ManagementPlaneSyncRecord memento = loadManagementPlaneSyncRecord(false);
if (getNodeState()==ManagementNodeState.FAILED || getNodeState()==ManagementNodeState.HOT_BACKUP) {
// if failed or hot backup then we can't promote ourselves, so no point in checking who is master
return;
}
String currMasterNodeId = memento.getMasterNodeId();
ManagementNodeSyncRecord currMasterNodeRecord = memento.getManagementNodes().get(currMasterNodeId);
ManagementNodeSyncRecord ownNodeRecord = memento.getManagementNodes().get(ownNodeId);
ManagementNodeSyncRecord newMasterNodeRecord = null;
boolean demotingSelfInFavourOfOtherMaster = false;
if (currMasterNodeRecord != null && currMasterNodeRecord.getStatus() == ManagementNodeState.MASTER && isHeartbeatOk(currMasterNodeRecord, ownNodeRecord)) {
// master seems healthy
if (ownNodeId.equals(currMasterNodeId)) {
if (LOG.isTraceEnabled()) LOG.trace("Existing master healthy (us): master={}", currMasterNodeRecord.toVerboseString());
return;
} else {
if (ownNodeRecord!=null && ownNodeRecord.getStatus() == ManagementNodeState.MASTER) {
LOG.error("Management node "+ownNodeId+" detected master change, stolen from us, deferring to "+currMasterNodeId);
newMasterNodeRecord = currMasterNodeRecord;
demotingSelfInFavourOfOtherMaster = true;
} else {
if (LOG.isTraceEnabled()) LOG.trace("Existing master healthy (remote): master={}", currMasterNodeRecord.toVerboseString());
return;
}
}
} else if (ownNodeRecord == null || !isHeartbeatOk(ownNodeRecord, ownNodeRecord)) {
// our heartbeats are also out-of-date! perhaps something wrong with persistence? just log, and don't over-react!
if (ownNodeRecord == null) {
LOG.error("No management node memento for self ("+ownNodeId+"); perhaps persister unwritable? "
+ "Master ("+currMasterNodeId+") reported failed but no-op as cannot tell conclusively");
} else {
LOG.error("This management node ("+ownNodeId+") memento heartbeats out-of-date; perhaps perister unwritable? "
+ "Master ("+currMasterNodeId+") reported failed but no-op as cannot tell conclusively"
+ ": self="+ownNodeRecord.toVerboseString());
}
return;
} else if (ownNodeId.equals(currMasterNodeId)) {
// we are supposed to be the master, but seem to be unhealthy!
LOG.warn("This management node ("+ownNodeId+") supposed to be master but reportedly unhealthy? "
+ "no-op as expect other node to fix: self="+ownNodeRecord.toVerboseString());
return;
}
if (demotingSelfInFavourOfOtherMaster) {
LOG.debug("Master-change for this node only, demoting "+ownNodeRecord.toVerboseString()+" in favour of official master "+newMasterNodeRecord.toVerboseString());
demoteTo(
BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_DEFAULT_STANDBY_IS_HOT_PROPERTY) ?
ManagementNodeState.HOT_STANDBY : ManagementNodeState.STANDBY);
return;
} else {
LOG.debug("Detected master heartbeat timeout. Initiating a new master election. Master was " + currMasterNodeRecord);
}
// Need to choose a new master
newMasterNodeRecord = masterChooser.choose(memento, getHeartbeatTimeout(), ownNodeId);
String newMasterNodeId = (newMasterNodeRecord == null) ? null : newMasterNodeRecord.getNodeId();
URI newMasterNodeUri = (newMasterNodeRecord == null) ? null : newMasterNodeRecord.getUri();
boolean weAreNewMaster = ownNodeId.equals(newMasterNodeId);
if (LOG.isDebugEnabled()) {
LOG.debug("Management node master-change required: newMaster={}; oldMaster={}; plane={}, self={}; heartbeatTimeout={}",
new Object[] {
(newMasterNodeRecord == null ? "<none>" : newMasterNodeRecord.toVerboseString()),
(currMasterNodeRecord == null ? currMasterNodeId+" (no memento)": currMasterNodeRecord.toVerboseString()),
memento,
ownNodeRecord.toVerboseString(),
getHeartbeatTimeout()
});
}
String message = "Management node "+ownNodeId+" detected ";
String currMasterSummary = currMasterNodeId + "(" + (currMasterNodeRecord==null ? "<none>" : timestampString(currMasterNodeRecord.getRemoteTimestamp())) + ")";
if (weAreNewMaster && (ownNodeRecord.getStatus() == ManagementNodeState.MASTER)) {
LOG.warn(message + "we must reassert master status, as was stolen and then failed at "+
(currMasterNodeRecord==null ? "a node which has gone away" : currMasterSummary));
publishPromotionToMaster();
publishHealth();
return;
}
if (!initializing) {
if (weAreNewMaster) {
message += "we should be master, changing from ";
}
else if (currMasterNodeRecord==null && newMasterNodeId==null) message += "master change attempted but no candidates ";
else message += "master change, from ";
message += currMasterSummary + " to "
+ (newMasterNodeId == null ? "<none>" :
(weAreNewMaster ? "us " : "")
+ newMasterNodeId + " (" + timestampString(newMasterNodeRecord.getRemoteTimestamp()) + ")"
+ (newMasterNodeUri!=null ? " "+newMasterNodeUri : "") );
// always log, if you're looking at a standby node it's useful to see the new master's URL
LOG.info(message);
}
// New master is ourself: promote
if (weAreNewMaster) {
promoteToMaster();
}
}
private static String timestampString(Long remoteTimestamp) {
if (remoteTimestamp==null) return null;
return remoteTimestamp+" / "+Time.makeTimeStringRounded( Duration.sinceUtc(remoteTimestamp))+" ago";
}
protected void promoteToMaster() {
if (!running) {
LOG.warn("Ignoring promote-to-master request, as HighAvailabilityManager is not running");
return;
}
if (promotionListener != null) {
try {
promotionListener.promotingToMaster();
} catch (Exception e) {
Exceptions.propagateIfFatal(e);
LOG.warn("Problem in promption-listener (continuing)", e);
}
}
setInternalNodeState(ManagementNodeState.MASTER);
publishPromotionToMaster();
try {
managementContext.getRebindManager().rebind(managementContext.getCatalogClassLoader(), null, getInternalNodeState());
} catch (Exception e) {
LOG.error("Management node "+managementContext.getManagementNodeId()+" enountered problem during rebind when promoting self to master; demoting to FAILED and rethrowing: "+e);
demoteTo(ManagementNodeState.FAILED);
throw Exceptions.propagate(e);
}
managementContext.getRebindManager().start();
}
protected void backupOnDemotionIfNeeded() {
if (managementContext.getBrooklynProperties().getConfig(BrooklynServerConfig.PERSISTENCE_BACKUPS_REQUIRED_ON_DEMOTION)) {
BrooklynPersistenceUtils.createBackup(managementContext, CreateBackupMode.DEMOTION, MementoCopyMode.LOCAL);
}
}
/** @deprecated since 0.7.0, use {@link #demoteTo(ManagementNodeState)} */ @Deprecated
protected void demoteToFailed() {
demoteTo(ManagementNodeState.FAILED);
}
/** @deprecated since 0.7.0, use {@link #demoteTo(ManagementNodeState)} */ @Deprecated
protected void demoteToStandby(boolean hot) {
demoteTo(hot ? ManagementNodeState.HOT_STANDBY : ManagementNodeState.STANDBY);
}
protected void demoteTo(ManagementNodeState toState) {
if (toState!=ManagementNodeState.FAILED && !running) {
LOG.warn("Ignoring demote-from-master request, as HighAvailabilityManager is no longer running");
return;
}
boolean wasMaster = (getInternalNodeState() == ManagementNodeState.MASTER);
if (wasMaster) backupOnDemotionIfNeeded();
// TODO target may be RO ?
ManagementTransitionMode mode = ManagementTransitionMode.transitioning(
wasMaster ? BrooklynObjectManagementMode.MANAGED_PRIMARY : BrooklynObjectManagementMode.LOADED_READ_ONLY,
BrooklynObjectManagementMode.UNMANAGED_PERSISTED);
nodeStateTransitionComplete = false;
switch (toState) {
case FAILED:
case HOT_BACKUP:
case STANDBY:
setInternalNodeState(toState); break;
case HOT_STANDBY:
setInternalNodeState(ManagementNodeState.STANDBY); break;
default:
throw new IllegalStateException("Illegal target state: "+toState);
}
onDemotionStopItems(mode);
nodeStateTransitionComplete = true;
publishDemotion(wasMaster);
if (toState==ManagementNodeState.HOT_BACKUP || toState==ManagementNodeState.HOT_STANDBY) {
nodeStateTransitionComplete = false;
try {
activateHotProxy(toState).get();
} finally {
nodeStateTransitionComplete = true;
}
publishHealth();
}
}
protected void onDemotionStopItems(ManagementTransitionMode mode) {
// stop persistence and remove all apps etc
managementContext.getRebindManager().stopPersistence();
managementContext.getRebindManager().stopReadOnly();
clearManagedItems(mode);
// tasks are cleared as part of unmanaging entities above
}
/** clears all managed items from the management context; same items destroyed as in the course of a rebind cycle */
protected void clearManagedItems(ManagementTransitionMode mode) {
// start with the root applications
for (Application app: managementContext.getApplications()) {
if (((EntityInternal)app).getManagementSupport().isDeployed()) {
((LocalEntityManager)((EntityInternal)app).getManagementContext().getEntityManager()).unmanage(app, mode);
}
}
// for active management, call above will remove recursively at present,
// but for read-only, and if we stop recursively, go through them all
for (Entity entity: managementContext.getEntityManager().getEntities()) {
((LocalEntityManager)managementContext.getEntityManager()).unmanage(entity, mode);
}
// again, for locations, call unmanage on parents first
for (Location loc: managementContext.getLocationManager().getLocations()) {
if (loc.getParent()==null)
((LocationManagerInternal)managementContext.getLocationManager()).unmanage(loc, mode);
}
for (Location loc: managementContext.getLocationManager().getLocations()) {
((LocationManagerInternal)managementContext.getLocationManager()).unmanage(loc, mode);
}
((BasicBrooklynCatalog)managementContext.getCatalog()).reset(CatalogDto.newEmptyInstance("<reset-by-ha-status-change>"));
}
/** @deprecated since 0.7.0, use {@link #activateHotProxy(ManagementNodeState)} */ @Deprecated
protected boolean attemptHotStandby() {
return activateHotProxy(ManagementNodeState.HOT_STANDBY).getWithoutError();
}
/** Starts hot standby or hot backup, in foreground
* <p>
* In the case of the former, the caller is responsible for publishing health afterwards,
* but if it fails, this method will {@link #demoteTo(ManagementNodeState)} {@link ManagementNodeState#FAILED}.
* <p>
* @return whether the requested {@link ManagementNodeState} was possible;
* (if not, errors should be stored elsewhere), callers may want to rethrow */
protected ReferenceWithError<Boolean> activateHotProxy(ManagementNodeState toState) {
try {
Preconditions.checkState(nodeStateTransitionComplete==false, "Must be in transitioning state to go into "+toState);
setInternalNodeState(toState);
managementContext.getRebindManager().startReadOnly(toState);
return ReferenceWithError.newInstanceWithoutError(true);
} catch (Exception e) {
Exceptions.propagateIfFatal(e);
LOG.warn("Unable to change "+ownNodeId+" to "+toState+", switching to FAILED: "+e, e);
demoteTo(ManagementNodeState.FAILED);
return ReferenceWithError.newInstanceThrowingError(false, e);
}
}
@Override
public ManagementPlaneSyncRecord loadManagementPlaneSyncRecord(boolean useLocalKnowledgeForThisNode) {
ManagementPlaneSyncRecord record = loadManagementPlaneSyncRecordInternal(useLocalKnowledgeForThisNode);
lastSyncRecord = record;
return record;
}
private ManagementPlaneSyncRecord loadManagementPlaneSyncRecordInternal(boolean useLocalKnowledgeForThisNode) {
if (disabled) {
// if HA is disabled, then we are the only node - no persistence; just load a memento to describe this node
Builder builder = ManagementPlaneSyncRecordImpl.builder()
.node(createManagementNodeSyncRecord(true));
if (getTransitionTargetNodeState() == ManagementNodeState.MASTER) {
builder.masterNodeId(ownNodeId);
}
return builder.build();
}
if (persister == null) {
// e.g. web-console may be polling before we've started up
LOG.debug("High availablity manager has no persister; returning empty record");
return ManagementPlaneSyncRecordImpl.builder().build();
}
int maxLoadAttempts = 5;
Exception lastException = null;
Stopwatch timer = Stopwatch.createStarted();
for (int i = 0; i < maxLoadAttempts; i++) {
try {
ManagementPlaneSyncRecord result = persister.loadSyncRecord();
if (useLocalKnowledgeForThisNode) {
// Report this node's most recent state, and detect AWOL nodes
ManagementNodeSyncRecord me = BasicManagementNodeSyncRecord.builder()
.from(result.getManagementNodes().get(ownNodeId), true)
.from(createManagementNodeSyncRecord(false), true)
.build();
Iterable<ManagementNodeSyncRecord> allNodes = result.getManagementNodes().values();
if (me.getRemoteTimestamp()!=null)
allNodes = Iterables.transform(allNodes, new MarkAwolNodes(me));
Builder builder = ManagementPlaneSyncRecordImpl.builder()
.masterNodeId(result.getMasterNodeId())
.nodes(allNodes);
builder.node(me);
if (getTransitionTargetNodeState() == ManagementNodeState.MASTER) {
builder.masterNodeId(ownNodeId);
}
result = builder.build();
}
if (i>0) {
managementStateReadPersistenceMetrics.noteError("Succeeded only on attempt "+(i+1)+": "+lastException);
}
managementStateReadPersistenceMetrics.noteSuccess(Duration.of(timer));
return result;
} catch (IOException e) {
if (i < (maxLoadAttempts - 1)) {
if (LOG.isDebugEnabled()) LOG.debug("Problem loading mangement-plane memento attempt "+(i+1)+"/"+maxLoadAttempts+"; retrying", e);
}
lastException = e;
}
}
String message = "Failed to load mangement-plane memento "+maxLoadAttempts+" consecutive times";
managementStateReadPersistenceMetrics.noteError(message+": "+lastException);
managementStateReadPersistenceMetrics.noteFailure(Duration.of(timer));
throw new IllegalStateException(message, lastException);
}
protected ManagementNodeSyncRecord createManagementNodeSyncRecord(boolean useLocalTimestampAsRemoteTimestamp) {
long timestamp = currentTimeMillis();
org.apache.brooklyn.core.mgmt.ha.dto.BasicManagementNodeSyncRecord.Builder builder = BasicManagementNodeSyncRecord.builder()
.brooklynVersion(BrooklynVersion.get())
.nodeId(ownNodeId)
.status(getNodeState())
.priority(getPriority())
.localTimestamp(timestamp)
.uri(managementContext.getManagementNodeUri().orNull());
if (useLocalTimestampAsRemoteTimestamp)
builder.remoteTimestamp(timestamp);
else if (optionalRemoteTickerUtc!=null) {
builder.remoteTimestamp(optionalRemoteTickerUtc.read());
}
return builder.build();
}
/**
* Gets the current time, using the {@link #localTickerUtc}. Normally this is equivalent of {@link System#currentTimeMillis()},
* but in test environments a custom {@link Ticker} can be injected via {@link #setLocalTicker(Ticker)} to allow testing of
* specific timing scenarios.
*/
protected long currentTimeMillis() {
return localTickerUtc.read();
}
/**
* Infers the health of a node - if it last reported itself as healthy (standby or master), but we haven't heard
* from it in a long time then report that node as failed; otherwise report its health as-is.
*/
private class MarkAwolNodes implements Function<ManagementNodeSyncRecord, ManagementNodeSyncRecord> {
private final ManagementNodeSyncRecord referenceNode;
private MarkAwolNodes(ManagementNodeSyncRecord referenceNode) {
this.referenceNode = referenceNode;
}
@Nullable
@Override
public ManagementNodeSyncRecord apply(@Nullable ManagementNodeSyncRecord input) {
if (input == null) return null;
if (!(input.getStatus() == ManagementNodeState.STANDBY || input.getStatus() == ManagementNodeState.HOT_STANDBY || input.getStatus() == ManagementNodeState.MASTER || input.getStatus() == ManagementNodeState.HOT_BACKUP)) return input;
if (isHeartbeatOk(input, referenceNode)) return input;
return BasicManagementNodeSyncRecord.builder()
.from(input)
.status(ManagementNodeState.FAILED)
.build();
}
}
@Override
public String toString() {
return super.toString()+"[node:"+ownNodeId+";running="+running+"]";
}
@Override
public Map<String,Object> getMetrics() {
Map<String,Object> result = MutableMap.of();
result.put("state", getNodeState());
result.put("uptime", Time.makeTimeStringRounded(Duration.millis(currentTimeMillis()-startTimeUtc)));
result.put("currentTimeUtc", currentTimeMillis());
result.put("startTimeUtc", startTimeUtc);
result.put("highAvailability", MutableMap.<String,Object>of(
"priority", getPriority(),
"pollPeriod", getPollPeriod().toMilliseconds(),
"heartbeatTimeout", getHeartbeatTimeout().toMilliseconds(),
"history", nodeStateHistory));
result.putAll(managementContext.getRebindManager().getMetrics());
result.put("managementStatePersistence",
MutableMap.of("read", managementStateReadPersistenceMetrics, "write", managementStateWritePersistenceMetrics));
return result;
}
@Override
public long getLastStateChange() {
if (nodeStateHistory.size() > 0) {
return (Long)nodeStateHistory.get(0).get("timestamp");
} else {
return 0;
}
}
}