/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.brooklyn.core.mgmt.ha;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.brooklyn.api.mgmt.ManagementContext;
import org.apache.brooklyn.api.mgmt.ha.HighAvailabilityMode;
import org.apache.brooklyn.api.mgmt.ha.ManagementNodeState;
import org.apache.brooklyn.api.mgmt.ha.ManagementNodeSyncRecord;
import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecord;
import org.apache.brooklyn.api.mgmt.ha.ManagementPlaneSyncRecordPersister;
import org.apache.brooklyn.core.mgmt.ha.dto.BasicManagementNodeSyncRecord;
import org.apache.brooklyn.core.mgmt.ha.dto.ManagementPlaneSyncRecordImpl;
import org.apache.brooklyn.core.mgmt.persist.MementoSerializer;
import org.apache.brooklyn.core.mgmt.persist.PersistenceObjectStore;
import org.apache.brooklyn.core.mgmt.persist.RetryingMementoSerializer;
import org.apache.brooklyn.core.mgmt.persist.StoreObjectAccessorLocking;
import org.apache.brooklyn.core.mgmt.persist.XmlMementoSerializer;
import org.apache.brooklyn.core.mgmt.persist.PersistenceObjectStore.StoreObjectAccessorWithLock;
import org.apache.brooklyn.util.exceptions.Exceptions;
import org.apache.brooklyn.util.text.Strings;
import org.apache.brooklyn.util.time.Duration;
import org.apache.brooklyn.util.time.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;
/**
* Structure of files is:
* <ul>
* <li>{@code plane/} - top-level directory
* <ul>
* <li>{@code master} - contains the id of the management-node that is currently master
* <li>{@code change.log} - log of changes made
* <li>{@code nodes/} - sub-directory, containing one file per management-node
* <ul>
* <li>{@code a9WiuVKp} - file named after the management-node's id, containing the management node's current state
* <li>{@code E1eDXQF3}
* </ul>
* </ul>
* </ul>
*
* All writes are done synchronously.
*
* @since 0.7.0
*
* @author aled
*/
@Beta
public class ManagementPlaneSyncRecordPersisterToObjectStore implements ManagementPlaneSyncRecordPersister {
// TODO Multiple node appending to change.log could cause strange interleaving, or perhaps even data loss?
// But this file is not critical to functionality.
// TODO Should ManagementPlaneSyncRecordPersister.Delta be different so can tell what is a significant event,
// and thus log it in change.log - currently only subset of significant things being logged.
private static final Logger LOG = LoggerFactory.getLogger(ManagementPlaneSyncRecordPersisterToObjectStore.class);
private static final Duration SHUTDOWN_TIMEOUT = Duration.TEN_SECONDS;
private static final Duration SYNC_WRITE_TIMEOUT = Duration.TEN_SECONDS;
public static final String NODES_SUB_PATH = "nodes";
// TODO Leak if we go through lots of managers; but tiny!
private final ConcurrentMap<String, StoreObjectAccessorWithLock> nodeWriters = Maps.newConcurrentMap();
private StoreObjectAccessorWithLock masterWriter;
private StoreObjectAccessorWithLock changeLogWriter;
private ManagementContext mgmt;
private final PersistenceObjectStore objectStore;
private final MementoSerializer<Object> serializer;
private static final int MAX_SERIALIZATION_ATTEMPTS = 5;
private boolean started = false;
private volatile boolean running = true;
protected final AtomicLong checkpointLogCount = new AtomicLong();
private static final int INITIAL_LOG_WRITES = 5;
@VisibleForTesting
/** allows, when testing, to be able to override file times / blobstore times with time from the ticker */
private boolean preferRemoteTimestampInMemento = false;
/**
* @param mgmt not used much at present but handy to ensure we know it so that obj store is prepared
* @param objectStore the objectStore use to read/write management-plane data;
* this must have been {@link PersistenceObjectStore#prepareForSharedUse(org.apache.brooklyn.core.mgmt.persist.PersistMode, HighAvailabilityMode)}
* @param classLoader ClassLoader to use when deserializing data
*/
public ManagementPlaneSyncRecordPersisterToObjectStore(ManagementContext mgmt, PersistenceObjectStore objectStore, ClassLoader classLoader) {
this.mgmt = mgmt;
this.objectStore = checkNotNull(objectStore, "objectStore");
MementoSerializer<Object> rawSerializer = new XmlMementoSerializer<Object>(checkNotNull(classLoader, "classLoader"));
this.serializer = new RetryingMementoSerializer<Object>(rawSerializer, MAX_SERIALIZATION_ATTEMPTS);
objectStore.createSubPath(NODES_SUB_PATH);
LOG.debug("ManagementPlaneMemento-persister will use store "+objectStore);
}
protected synchronized void init() {
if (!started) {
started = true;
//Leading slash causes problems in SL, it's not a correct file name so remove it.
//But once removed we can't load the master file from existing persistence stores.
//Try to detect if the old file exists, if so use old-style names, otherwise use the correct names.
masterWriter = new StoreObjectAccessorLocking(objectStore.newAccessor("/master"));
if (masterWriter.get() != null) {
changeLogWriter = new StoreObjectAccessorLocking(objectStore.newAccessor("/change.log"));
} else {
masterWriter = new StoreObjectAccessorLocking(objectStore.newAccessor("master"));
changeLogWriter = new StoreObjectAccessorLocking(objectStore.newAccessor("change.log"));
}
}
}
@VisibleForTesting
public void preferRemoteTimestampInMemento() {
preferRemoteTimestampInMemento = true;
}
@Override
public void stop() {
running = false;
try {
for (StoreObjectAccessorWithLock writer : nodeWriters.values()) {
try {
writer.waitForCurrentWrites(SHUTDOWN_TIMEOUT);
} catch (TimeoutException e) {
LOG.warn("Timeout during shutdown, waiting for write of "+writer+"; continuing");
}
}
try {
masterWriter.waitForCurrentWrites(SHUTDOWN_TIMEOUT);
} catch (TimeoutException e) {
LOG.warn("Timeout during shutdown, waiting for write of "+masterWriter+"; continuing");
}
} catch (InterruptedException e) {
throw Exceptions.propagate(e);
}
}
@Override
public ManagementPlaneSyncRecord loadSyncRecord() throws IOException {
if (!running) {
throw new IllegalStateException("Persister not running; cannot load memento from "+ objectStore.getSummaryName());
}
init();
// Note this is called a lot - every time we check the heartbeats
if (LOG.isTraceEnabled()) LOG.trace("Loading management-plane memento from {}", objectStore.getSummaryName());
Stopwatch stopwatch = Stopwatch.createStarted();
ManagementPlaneSyncRecordImpl.Builder builder = ManagementPlaneSyncRecordImpl.builder();
// Be careful about order: if the master-file says nodeX then nodeX's file must have an up-to-date timestamp.
// Therefore read master file first, followed by the other node-files.
String masterNodeId = masterWriter.get();
if (masterNodeId == null) {
LOG.debug("No master-memento deserialized from file "+masterWriter+"; ignoring and continuing (normal on startup, should cause an error later in live operation)");
} else {
builder.masterNodeId(masterNodeId);
}
// Load node-files
List<String> nodeFiles = objectStore.listContentsWithSubPath(NODES_SUB_PATH);
LOG.trace("Loading nodes from {}; {} nodes.",
new Object[]{objectStore.getSummaryName(), nodeFiles.size()});
for (String nodeFile : nodeFiles) {
PersistenceObjectStore.StoreObjectAccessor objectAccessor = objectStore.newAccessor(nodeFile);
String nodeContents = null;
Exception problem = null;
try {
nodeContents = objectAccessor.get();
} catch (Exception e) {
Exceptions.propagateIfFatal(e);
problem = e;
}
if (problem!=null || Strings.isBlank(nodeContents)) {
// happens if node has gone away, or if FileBasedObjectStore.moveFile is not atomic,
// i.e. it has deleted but not updated it yet
if (objectAccessor.exists()) {
throw Exceptions.propagate(new IllegalStateException("Node record "+nodeFile+" could not be read when "+mgmt.getManagementNodeId()+" was scanning", problem));
} else {
LOG.warn("Node record "+nodeFile+" went away while "+mgmt.getManagementNodeId()+" was scanning, ignoring (it has probably been terminated)");
// if file was deleted, silently ignore
continue;
}
}
ManagementNodeSyncRecord memento = (ManagementNodeSyncRecord) serializer.fromString(nodeContents);
if (memento == null) {
// shouldn't happen
throw Exceptions.propagate(new IllegalStateException("Node record "+nodeFile+" could not be deserialized when "+mgmt.getManagementNodeId()+" was scanning: "+nodeContents, problem));
} else {
if (memento.getRemoteTimestamp()!=null && preferRemoteTimestampInMemento) {
// in test mode, the remote timestamp is stored in the file
} else {
if (memento.getRemoteTimestamp()!=null) {
LOG.debug("Ignoring remote timestamp in memento file ("+memento+"); looks like this data has been manually copied in");
}
Date lastModifiedDate = objectAccessor.getLastModifiedDate();
((BasicManagementNodeSyncRecord)memento).setRemoteTimestamp(lastModifiedDate!=null ? lastModifiedDate.getTime() : null);
}
builder.node(memento);
}
}
if (LOG.isDebugEnabled()) LOG.trace("Loaded management-plane memento; {} nodes, took {}",
nodeFiles.size(),
Time.makeTimeStringRounded(stopwatch.elapsed(TimeUnit.MILLISECONDS)));
return builder.build();
}
@Override
public void delta(Delta delta) {
if (!running) {
if (LOG.isDebugEnabled()) LOG.debug("Persister not running; ignoring checkpointed delta of manager-memento");
return;
}
init();
Stopwatch stopwatch = Stopwatch.createStarted();
if (LOG.isTraceEnabled()) LOG.trace("Checkpointing delta of manager-memento; updating {}", delta);
for (ManagementNodeSyncRecord m : delta.getNodes()) {
persist(m);
}
for (String id : delta.getRemovedNodeIds()) {
deleteNode(id);
}
switch (delta.getMasterChange()) {
case NO_CHANGE:
break; // no-op
case SET_MASTER:
persistMaster(checkNotNull(delta.getNewMasterOrNull()), null);
break;
case CLEAR_MASTER:
persistMaster("", delta.getExpectedMasterToClear());
break; // no-op
default:
throw new IllegalStateException("Unknown state for master-change: "+delta.getMasterChange());
}
if (LOG.isDebugEnabled() && shouldLogCheckpoint()) LOG.debug("Checkpointed delta of manager-memento in "+Time.makeTimeStringRounded(stopwatch)+": "+delta);
}
private void persistMaster(String nodeId, String optionalExpectedId) {
if (optionalExpectedId!=null) {
String currentRemoteMaster = masterWriter.get();
if (currentRemoteMaster==null) {
// okay to have nothing at remote
} else if (!currentRemoteMaster.trim().equals(optionalExpectedId.trim())) {
LOG.warn("Master at server is "+(Strings.isBlank(currentRemoteMaster) ? "<none>" : currentRemoteMaster)+"; expected "+optionalExpectedId+" "
+ (Strings.isNonBlank(nodeId) ? "and would set as "+nodeId : "and would clear")
+ ", so not applying (yet)");
return;
}
}
masterWriter.put(nodeId);
try {
masterWriter.waitForCurrentWrites(SYNC_WRITE_TIMEOUT);
} catch (Exception e) {
throw Exceptions.propagate(e);
}
changeLogWriter.append(Time.makeDateString() + ": set master to " + nodeId + "\n");
try {
changeLogWriter.waitForCurrentWrites(SYNC_WRITE_TIMEOUT);
} catch (Exception e) {
throw Exceptions.propagate(e);
}
}
@Override
@VisibleForTesting
public void waitForWritesCompleted(Duration timeout) throws InterruptedException, TimeoutException {
for (StoreObjectAccessorWithLock writer : nodeWriters.values()) {
writer.waitForCurrentWrites(timeout);
}
masterWriter.waitForCurrentWrites(timeout);
}
public void checkpoint(ManagementPlaneSyncRecord record) {
init();
for (ManagementNodeSyncRecord node : record.getManagementNodes().values()) {
// Check included in case the node in the memento is the one being initialised by
// BrooklynLauncher in the copy state command.
if (!ManagementNodeState.INITIALIZING.equals(node.getStatus()) && node.getNodeId() != null) {
persist(node);
}
}
}
private void persist(ManagementNodeSyncRecord node) {
StoreObjectAccessorWithLock writer = getOrCreateNodeWriter(node.getNodeId());
boolean fileExists = writer.exists();
writer.put(serializer.toString(node));
try {
writer.waitForCurrentWrites(SYNC_WRITE_TIMEOUT);
} catch (Exception e) {
throw Exceptions.propagate(e);
}
if (!fileExists) {
changeLogWriter.append(Time.makeDateString()+": created node "+node.getNodeId()+"\n");
}
if (node.getStatus() == ManagementNodeState.TERMINATED || node.getStatus() == ManagementNodeState.FAILED) {
changeLogWriter.append(Time.makeDateString()+": set node "+node.getNodeId()+" status to "+node.getStatus()+"\n");
}
}
private void deleteNode(String nodeId) {
getOrCreateNodeWriter(nodeId).delete();
changeLogWriter.append(Time.makeDateString()+": deleted node "+nodeId+"\n");
}
private StoreObjectAccessorWithLock getOrCreateNodeWriter(String nodeId) {
PersistenceObjectStore.StoreObjectAccessorWithLock writer = nodeWriters.get(nodeId);
if (writer == null) {
nodeWriters.putIfAbsent(nodeId,
new StoreObjectAccessorLocking(objectStore.newAccessor(NODES_SUB_PATH+"/"+nodeId)));
writer = nodeWriters.get(nodeId);
}
return writer;
}
protected boolean shouldLogCheckpoint() {
long logCount = checkpointLogCount.incrementAndGet();
return (logCount < INITIAL_LOG_WRITES) || (logCount % 1000 == 0);
}
}