/* * ToroDB * Copyright © 2014 8Kdata Technology (www.8kdata.com) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.torodb.mongodb.repl; import com.torodb.concurrent.ExecutorServiceShutdownHelper; import com.torodb.core.annotations.TorodbIdleService; import com.torodb.core.concurrent.ConcurrentToolsFactory; import com.torodb.core.services.IdleTorodbService; import com.torodb.core.supervision.Supervisor; import com.torodb.mongodb.commands.pojos.MemberState; import com.torodb.mongodb.repl.guice.MongoDbRepl; import com.torodb.mongodb.repl.oplogreplier.OplogApplierService; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jooq.lambda.fi.util.function.CheckedFunction; import org.jooq.lambda.fi.util.function.CheckedSupplier; import java.util.Locale; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadFactory; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; @ThreadSafe public class ReplCoordinatorStateMachine extends IdleTorodbService { private static final Logger LOGGER = LogManager.getLogger(ReplCoordinatorStateMachine.class); private static final String THREAD_PREFIX = "repl-coord-"; private final ExecutorServiceShutdownHelper shutdownHelper; private final ConcurrentToolsFactory concurrentToolsFactory; private final ReplMetrics metrics; private final RecoveryService.RecoveryServiceFactory recoveryServiceFactory; private final OplogApplierService.OplogApplierServiceFactory oplogReplierFactory; private final Supervisor supervisor; private ExecutorService executorService; @Nonnull private ReplCoordinatorState state; private RecoveryService recoveryService; private OplogApplierService oplogReplierService; @Inject public ReplCoordinatorStateMachine( @TorodbIdleService ThreadFactory threadFactory, @MongoDbRepl Supervisor supervisor, ConcurrentToolsFactory concurrentToolsFactory, ExecutorServiceShutdownHelper shutdownHelper, RecoveryService.RecoveryServiceFactory recoveryServiceFactory, OplogApplierService.OplogApplierServiceFactory oplogReplierFactory, ReplMetrics metrics) { super(threadFactory); this.shutdownHelper = shutdownHelper; this.state = ReplCoordinatorState.STARTUP; this.recoveryServiceFactory = recoveryServiceFactory; this.oplogReplierFactory = oplogReplierFactory; this.metrics = metrics; this.supervisor = supervisor; this.concurrentToolsFactory = concurrentToolsFactory; } @Override protected void startUp() throws Exception { this.executorService = concurrentToolsFactory .createExecutorServiceWithMaxThreads(THREAD_PREFIX + "idle", 1); CompletableFuture.runAsync(() -> setState(ReplCoordinatorState.IDLE), executorService).join(); } @Override protected void shutDown() throws Exception { CompletableFuture.runAsync(this::shutDownPrivate, executorService).join(); shutdownHelper.shutdown(executorService); } private void shutDownPrivate() { switch (state) { case RECOVERY: stopRecoveryModePrivate(); break; case SECONDARY: stopSecondaryModePrivate(); break; default: break; } setState(ReplCoordinatorState.TERMINATED); } CompletableFuture<StateChange> fromRecoveryToSecondary(OplogApplierService.Callback callback) { return CompletableFuture.supplyAsync( () -> fromRecoveryToSecondaryPrivate(callback), executorService ); } CompletableFuture<StateChange> fromSecondaryToRecovery(RecoveryService.Callback callback) { return CompletableFuture.supplyAsync( () -> fromSecondaryToRecoveryPrivate(callback), executorService ); } CompletableFuture<StateChange> startRecoveryMode( RecoveryService.Callback serviceCallback) { return CompletableFuture.supplyAsync( () -> startModeWrapper( ReplCoordinatorState.RECOVERY, serviceCallback, this::startRecoveryModePrivate ), executorService ); } CompletableFuture<StateChange> stopRecoveryMode() { return CompletableFuture.supplyAsync( () -> stopModeWrapper( ReplCoordinatorState.RECOVERY, this::stopRecoveryModePrivate ), executorService ); } CompletableFuture<StateChange> startSecondaryMode( OplogApplierService.Callback serviceCallback) { return CompletableFuture.supplyAsync( () -> startModeWrapper( ReplCoordinatorState.SECONDARY, serviceCallback, this::startSecondaryModePrivate ), executorService ); } CompletableFuture<StateChange> stopSecondaryMode() { return CompletableFuture.supplyAsync( () -> stopModeWrapper( ReplCoordinatorState.SECONDARY, this::stopSecondaryModePrivate ), executorService ); } /** * Changes the name of the current thread and returns the older one. * * @param postfix * @return */ private String changeThreadName(String postfix) { Thread currentThread = Thread.currentThread(); String oldThreadName = currentThread.getName(); currentThread.setName(THREAD_PREFIX + postfix); return oldThreadName; } private void restoreThreadName(String oldName) { Thread currentThread = Thread.currentThread(); currentThread.setName(oldName); } StateChange fromRecoveryToSecondaryPrivate(OplogApplierService.Callback callback) { StateChange result; result = stopModeWrapper( ReplCoordinatorState.RECOVERY, this::stopRecoveryModePrivate ); if (result.success()) { result = startModeWrapper( ReplCoordinatorState.SECONDARY, callback, this::startSecondaryModePrivate ); } return result; } StateChange fromSecondaryToRecoveryPrivate(RecoveryService.Callback callback) { StateChange result; result = stopModeWrapper( ReplCoordinatorState.SECONDARY, this::stopSecondaryModePrivate ); if (result.success()) { result = startModeWrapper( ReplCoordinatorState.RECOVERY, callback, this::startRecoveryModePrivate ); } return result; } /** * A wrapper method that, given the required information to transist to a new state, checks * preconditions and set several generic things. * * @param <C> * @param triedState the state is trying to be started * @param callback the argument (usually a callback) the new state requires * @param startStateFunction the function that starts the new state * @return */ private <C> StateChange startModeWrapper(ReplCoordinatorState triedState, C callback, CheckedFunction<C, StateChange> startStateFunction) { String oldThreadName = changeThreadName("starting-" + triedState.name() .toLowerCase(Locale.ENGLISH)); ReplCoordinatorState oldState = state; try { if (oldState == ReplCoordinatorState.IDLE) { LOGGER.info("Starting {} mode", triedState.name() .toUpperCase(Locale.ENGLISH)); return startStateFunction.apply(callback); } switch (oldState) { default: throw new AssertionError("Unexpected " + ReplCoordinatorState.class.getSimpleName() + ": " + oldState); case STARTUP: LOGGER.debug("Trying to start the mode {} when the " + "current mode is {}. {} service must be started " + "before any change are accepted.", triedState, oldState, serviceName()); break; case RECOVERY: case SECONDARY: LOGGER.debug("Trying to start the mode {} when the " + "current mode is {}. Stop that state before " + "trying to change it", triedState, oldState); break; case TERMINATED: LOGGER.debug("Trying to start the mode {} when the " + "current mode is {}. No more state changes are " + "acepted ", triedState, oldState); break; } return new StateChange(oldState, triedState, new RejectionCause(RejectionType.ILLEGAL_CHANGE)); } catch (Throwable ex) { LOGGER.warn("Unexpected error while being on " + state + " state " + "and trying to start " + triedState, ex); setState(ReplCoordinatorState.ERROR); supervisor.onError(this, ex); return new StateChange(oldState, triedState, state, new RejectionCause(RejectionType.UNEXPECTED_ERROR, ex)); } finally { restoreThreadName(oldThreadName); } } private StateChange stopModeWrapper(ReplCoordinatorState toStopState, CheckedSupplier<StateChange> stopStateFunction) { String oldThreadName = changeThreadName("stopping-" + toStopState.name() .toLowerCase(Locale.ENGLISH)); ReplCoordinatorState oldState = state; try { if (state == toStopState) { LOGGER.info("Stopping {} mode", toStopState.name() .toUpperCase(Locale.ENGLISH)); return stopStateFunction.get(); } else { LOGGER.debug("Trying to stop the state {} while being on " + "state {}", toStopState, state); return new StateChange(oldState, toStopState, new RejectionCause(RejectionType.ILLEGAL_CHANGE)); } } catch (Throwable ex) { LOGGER.debug("Unexpected error while being on " + state + " state" + " and trying to stop it", ex); setState(ReplCoordinatorState.ERROR); supervisor.onError(this, ex); return new StateChange(oldState, toStopState, state, new RejectionCause(RejectionType.UNEXPECTED_ERROR, ex)); } finally { restoreThreadName(oldThreadName); } } private StateChange startRecoveryModePrivate( RecoveryService.Callback serviceCallback) { assert state == ReplCoordinatorState.RECOVERY; assert oplogReplierService == null || !oplogReplierService.isRunning(); final ReplCoordinatorState triedState = ReplCoordinatorState.RECOVERY; recoveryService = recoveryServiceFactory .createRecoveryService(serviceCallback); recoveryService.startAsync(); recoveryService.awaitRunning(); setState(triedState); return new StateChange(ReplCoordinatorState.RECOVERY, triedState); } private StateChange startSecondaryModePrivate( OplogApplierService.Callback serviceCallback) { assert state == ReplCoordinatorState.SECONDARY; assert recoveryService == null || !recoveryService.isRunning(); final ReplCoordinatorState triedState = ReplCoordinatorState.SECONDARY; oplogReplierService = oplogReplierFactory .createOplogApplier(serviceCallback); oplogReplierService.startAsync(); oplogReplierService.awaitRunning(); setState(triedState); return new StateChange(ReplCoordinatorState.SECONDARY, triedState); } private StateChange stopRecoveryModePrivate() { assert state == ReplCoordinatorState.RECOVERY; LOGGER.debug("Shutting down recovery service"); recoveryService.stopAsync(); recoveryService.awaitTerminated(); LOGGER.debug("Recovery service has been shutted down"); recoveryService = null; setState(ReplCoordinatorState.IDLE); return new StateChange(ReplCoordinatorState.RECOVERY, ReplCoordinatorState.IDLE); } private StateChange stopSecondaryModePrivate() { assert state == ReplCoordinatorState.SECONDARY; LOGGER.debug("Shutting down secondary service"); oplogReplierService.stopAsync(); oplogReplierService.awaitTerminated(); LOGGER.debug("Secondary service has been shutted down"); oplogReplierService = null; setState(ReplCoordinatorState.IDLE); return new StateChange(ReplCoordinatorState.SECONDARY, ReplCoordinatorState.IDLE); } private void setState(@Nonnull ReplCoordinatorState state) { this.state = state; MemberState rsMemberState; switch (state) { case RECOVERY: assert recoveryService != null; assert oplogReplierService == null; rsMemberState = MemberState.RS_RECOVERING; break; case SECONDARY: assert recoveryService == null; assert oplogReplierService != null; rsMemberState = MemberState.RS_SECONDARY; break; default: assert recoveryService == null; assert oplogReplierService == null; rsMemberState = MemberState.RS_UNKNOWN; } metrics.getMemberState().setValue(rsMemberState.name()); metrics.getMemberStateCounters().get(rsMemberState).inc(); } public static class StateChange { private final ReplCoordinatorState oldState; private final ReplCoordinatorState triedState; private final ReplCoordinatorState newState; private final Optional<RejectionCause> rejectionCause; public StateChange(ReplCoordinatorState oldState, ReplCoordinatorState triedState) { assert oldState != triedState : "There was not change"; this.oldState = oldState; this.triedState = triedState; this.newState = triedState; this.rejectionCause = Optional.empty(); } public StateChange(ReplCoordinatorState oldState, ReplCoordinatorState triedState, RejectionCause rejectionCause) { this.oldState = oldState; this.newState = oldState; this.triedState = triedState; this.rejectionCause = Optional.of(rejectionCause); } public StateChange(ReplCoordinatorState oldState, ReplCoordinatorState triedState, ReplCoordinatorState newState, RejectionCause rejectionCause) { this.oldState = oldState; this.triedState = triedState; this.newState = newState; this.rejectionCause = Optional.of(rejectionCause); } ReplCoordinatorState getOldState() { return oldState; } ReplCoordinatorState getNewState() { return newState; } ReplCoordinatorState getTriedState() { return triedState; } Optional<RejectionCause> getRejectionCause() { return rejectionCause; } boolean hasChanged() { return getOldState() != getNewState(); } boolean onTriedState() { return getNewState() == getTriedState(); } boolean success() { return !getRejectionCause().isPresent(); } } public static enum RejectionType { /** * The change that was tried to be applied is not valid on the current state. */ ILLEGAL_CHANGE, /** * The state that was tried to be applied is the same than the older one. */ NO_CHANGE, /** * It was impossible to start the new state because an unexpected error happened. */ UNEXPECTED_ERROR, /** * An error happened when trying to start the new state. */ CANNOT_START_NEW_STATE, /** * An error happened when trying to stop the old state. */ CANNOT_STOP_OLD_STATE; } public static class RejectionCause { private final RejectionType rejectionType; private final Optional<Throwable> cause; public RejectionCause(RejectionType rejectionType) { this.rejectionType = rejectionType; this.cause = Optional.empty(); } public RejectionCause(RejectionType rejectionType, @Nullable Throwable cause) { this.rejectionType = rejectionType; this.cause = Optional.ofNullable(cause); } public RejectionType getRejectionType() { return rejectionType; } public Optional<Throwable> getCause() { return cause; } } }