/*
* ToroDB
* Copyright © 2014 8Kdata Technology (www.8kdata.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.torodb.mongodb.repl;
import com.eightkdata.mongowp.OpTime;
import com.eightkdata.mongowp.Status;
import com.eightkdata.mongowp.client.core.MongoClient;
import com.eightkdata.mongowp.client.core.MongoClientFactory;
import com.eightkdata.mongowp.client.core.MongoConnection;
import com.eightkdata.mongowp.client.core.MongoConnection.RemoteCommandResponse;
import com.eightkdata.mongowp.client.core.UnreachableMongoServerException;
import com.eightkdata.mongowp.exceptions.MongoException;
import com.eightkdata.mongowp.exceptions.OplogOperationUnsupported;
import com.eightkdata.mongowp.exceptions.OplogStartMissingException;
import com.eightkdata.mongowp.server.api.oplog.OplogOperation;
import com.eightkdata.mongowp.server.api.pojos.MongoCursor;
import com.eightkdata.mongowp.server.api.tools.Empty;
import com.google.common.base.Supplier;
import com.google.common.net.HostAndPort;
import com.google.inject.assistedinject.Assisted;
import com.torodb.core.annotations.TorodbRunnableService;
import com.torodb.core.exceptions.user.UserException;
import com.torodb.core.services.RunnableTorodbService;
import com.torodb.core.supervision.Supervisor;
import com.torodb.core.supervision.SupervisorDecision;
import com.torodb.core.transaction.RollbackException;
import com.torodb.mongodb.commands.signatures.diagnostic.ListDatabasesCommand;
import com.torodb.mongodb.commands.signatures.diagnostic.ListDatabasesCommand.ListDatabasesReply;
import com.torodb.mongodb.core.MongodServer;
import com.torodb.mongodb.repl.OplogManager.OplogManagerPersistException;
import com.torodb.mongodb.repl.OplogManager.ReadOplogTransaction;
import com.torodb.mongodb.repl.OplogManager.WriteOplogTransaction;
import com.torodb.mongodb.repl.exceptions.NoSyncSourceFoundException;
import com.torodb.mongodb.repl.guice.MongoDbRepl;
import com.torodb.mongodb.repl.oplogreplier.ApplierContext;
import com.torodb.mongodb.repl.oplogreplier.OplogApplier;
import com.torodb.mongodb.repl.oplogreplier.OplogApplier.UnexpectedOplogApplierException;
import com.torodb.mongodb.repl.oplogreplier.RollbackReplicationException;
import com.torodb.mongodb.repl.oplogreplier.StopReplicationException;
import com.torodb.mongodb.repl.oplogreplier.fetcher.LimitedOplogFetcher;
import com.torodb.mongodb.repl.oplogreplier.fetcher.OplogFetcher;
import com.torodb.mongodb.utils.DbCloner;
import com.torodb.mongodb.utils.DbCloner.CloneOptions;
import com.torodb.mongodb.utils.DbCloner.CloningException;
import com.torodb.torod.SharedWriteTorodTransaction;
import com.torodb.torod.TorodConnection;
import com.torodb.torod.TorodServer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ThreadFactory;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import javax.inject.Inject;
/**
*
*/
public class RecoveryService extends RunnableTorodbService {
private static final int MAX_ATTEMPTS = 10;
private static final Logger LOGGER = LogManager.getLogger(RecoveryService.class);
private final Callback callback;
private final OplogManager oplogManager;
private final SyncSourceProvider syncSourceProvider;
private final OplogReaderProvider oplogReaderProvider;
private final DbCloner cloner;
private final MongoClientFactory remoteClientFactory;
private final MongodServer server;
private final OplogApplier oplogApplier;
private final ReplicationFilters replFilters;
@Inject
public RecoveryService(
@TorodbRunnableService ThreadFactory threadFactory,
@Assisted Callback callback,
OplogManager oplogManager,
SyncSourceProvider syncSourceProvider,
OplogReaderProvider oplogReaderProvider,
@MongoDbRepl DbCloner cloner,
MongoClientFactory remoteClientFactory,
MongodServer server,
OplogApplier oplogApplier,
ReplicationFilters replFilters) {
super(callback, threadFactory);
this.callback = callback;
this.oplogManager = oplogManager;
this.syncSourceProvider = syncSourceProvider;
this.oplogReaderProvider = oplogReaderProvider;
this.cloner = cloner;
this.remoteClientFactory = remoteClientFactory;
this.server = server;
this.oplogApplier = oplogApplier;
this.replFilters = replFilters;
}
@Override
protected void startUp() {
LOGGER.info("Starting RECOVERY service");
}
@Override
protected Logger getLogger() {
return LOGGER;
}
@Override
protected void runProtected() throws Exception {
callback.waitUntilStartPermision();
try {
int attempt = 0;
boolean finished = false;
while (!finished && attempt < MAX_ATTEMPTS && isRunning()) {
attempt++;
if (attempt > 1) {
long millisToSleep = getMillisToSleep(attempt);
LOGGER.debug("Waiting {} millis after the {}th attempt", millisToSleep, attempt - 1);
Thread.sleep(millisToSleep);
}
try {
finished = initialSync();
} catch (TryAgainException ex) {
LOGGER.warn("Error while trying to recover (attempt: "
+ attempt + ")", ex);
} catch (FatalErrorException ex) {
LOGGER.error("Fatal error while trying to recover", ex);
}
}
if (!finished) {
callback.recoveryFailed(this);
} else {
callback.recoveryFinished(this);
}
} catch (Throwable ex) {
callback.recoveryFailed(this, ex);
}
}
private boolean initialSync() throws TryAgainException, FatalErrorException {
/*
* 1. store that data is inconsistent 2. decide a sync source 3. lastRemoteOptime1 = get the
* last optime of the sync source 4. clone all databases except local 5. lastRemoteOptime2 = get
* the last optime of the sync source 6. apply remote oplog from lastRemoteOptime1 to
* lastRemoteOptime2 7. lastRemoteOptime3 = get the last optime of the sync source 8. apply
* remote oplog from lastRemoteOptime2 to lastRemoteOptime3 9. rebuild indexes 10. store
* lastRemoteOptime3 as the last applied operation optime 11. store that data is consistent 12.
* change replication state to SECONDARY
*/
//TODO: Support fastsync (used to restore a node by copying the data from other up-to-date node)
LOGGER.info("Starting initial sync");
callback.setConsistentState(false);
HostAndPort syncSource;
try {
syncSource = syncSourceProvider.newSyncSource();
LOGGER.info("Using node " + syncSource + " to replicate from");
} catch (NoSyncSourceFoundException ex) {
throw new TryAgainException("No sync source");
}
MongoClient remoteClient;
try {
remoteClient = remoteClientFactory.createClient(syncSource);
} catch (UnreachableMongoServerException ex) {
throw new TryAgainException(ex);
}
try {
LOGGER.debug("Remote client obtained");
MongoConnection remoteConnection = remoteClient.openConnection();
try (OplogReader reader = oplogReaderProvider.newReader(remoteConnection)) {
OplogOperation lastClonedOp = reader.getLastOp();
OpTime lastRemoteOptime1 = lastClonedOp.getOpTime();
try (WriteOplogTransaction oplogTransaction = oplogManager.createWriteTransaction()) {
LOGGER.info("Remote database cloning started");
oplogTransaction.truncate();
LOGGER.info("Local databases dropping started");
Status<?> status = dropDatabases();
if (!status.isOk()) {
throw new TryAgainException("Error while trying to drop collections: "
+ status);
}
LOGGER.info("Local databases dropping finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Remote database cloning started");
cloneDatabases(remoteClient);
LOGGER.info("Remote database cloning finished");
oplogTransaction.forceNewValue(lastClonedOp.getHash(), lastClonedOp.getOpTime());
}
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
TorodServer torodServer = server.getTorodServer();
try (TorodConnection connection = torodServer.openConnection();
SharedWriteTorodTransaction trans = connection.openWriteTransaction(false)) {
OpTime lastRemoteOptime2 = reader.getLastOp().getOpTime();
LOGGER.info("First oplog application started");
applyOplog(reader, lastRemoteOptime1, lastRemoteOptime2);
trans.commit();
LOGGER.info("First oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
OplogOperation lastOperation = reader.getLastOp();
OpTime lastRemoteOptime3 = lastOperation.getOpTime();
LOGGER.info("Second oplog application started");
applyOplog(reader, lastRemoteOptime2, lastRemoteOptime3);
trans.commit();
LOGGER.info("Second oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Index rebuild started");
rebuildIndexes();
trans.commit();
LOGGER.info("Index rebuild finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
trans.commit();
}
} catch (OplogStartMissingException ex) {
throw new TryAgainException(ex);
} catch (OplogOperationUnsupported ex) {
throw new TryAgainException(ex);
} catch (MongoException | RollbackException ex) {
throw new TryAgainException(ex);
} catch (OplogManagerPersistException ex) {
throw new FatalErrorException();
} catch (UserException ex) {
throw new FatalErrorException(ex);
}
callback.setConsistentState(true);
LOGGER.info("Initial sync finished");
} finally {
remoteClient.close();
}
return true;
}
private void enableDataImportMode() throws UserException {
LOGGER.debug("Starting data import mode");
server.getTorodServer().enableDataImportMode();
LOGGER.trace("Data import mode started");
}
private void disableDataImportMode() throws UserException {
LOGGER.debug("Ending data import mode");
server.getTorodServer().disableDataImportMode();
LOGGER.trace("Data import mode ended");
}
@Override
protected void shutDown() {
LOGGER.info("Recived a request to stop the recovering service");
}
private Status<?> dropDatabases() throws RollbackException, UserException, RollbackException {
try (TorodConnection conn = server.getTorodServer().openConnection();
SharedWriteTorodTransaction trans = conn.openWriteTransaction(false)) {
List<String> dbs = trans.getDatabases();
for (String dbName : dbs) {
if (!dbName.equals("local")) {
trans.dropDatabase(dbName);
}
}
trans.commit();
}
return Status.ok();
}
private void cloneDatabases(@Nonnull MongoClient remoteClient) throws CloningException,
MongoException, UserException {
enableDataImportMode();
try {
Stream<String> dbNames;
try (MongoConnection remoteConnection = remoteClient.openConnection()) {
RemoteCommandResponse<ListDatabasesReply> remoteResponse = remoteConnection.execute(
ListDatabasesCommand.INSTANCE,
"admin",
true,
Empty.getInstance()
);
if (!remoteResponse.isOk()) {
throw remoteResponse.asMongoException();
}
dbNames = remoteResponse.getCommandReply().get().getDatabases().stream().map(db -> db
.getName());
}
dbNames.filter(this::isReplicable)
.forEach(databaseName -> {
MyWritePermissionSupplier writePermissionSupplier =
new MyWritePermissionSupplier(databaseName);
CloneOptions options = new CloneOptions(
true,
true,
true,
false,
databaseName,
Collections.<String>emptySet(),
writePermissionSupplier,
(colName) -> replFilters.getCollectionPredicate().test(databaseName, colName),
(collection, indexName, unique, keys) -> replFilters.getIndexPredicate().test(
databaseName, collection, indexName, unique, keys)
);
try {
cloner.cloneDatabase(databaseName, remoteClient, server, options);
} catch (MongoException ex) {
throw new CloningException(ex);
}
});
} finally {
disableDataImportMode();
}
}
/**
* Applies all the oplog operations stored on the remote server whose optime is higher than
* <em>from</em> but lower or equal than <em>to</em>.
*
* @param myOplog
* @param remoteOplog
* @param to
* @param from
*/
private void applyOplog(
OplogReader remoteOplog,
OpTime from,
OpTime to) throws TryAgainException, MongoException, FatalErrorException {
MongoCursor<OplogOperation> oplogCursor = remoteOplog.between(from, true, to, true);
if (!oplogCursor.hasNext()) {
throw new OplogStartMissingException(remoteOplog.getSyncSource());
}
OplogOperation firstOp = oplogCursor.next();
if (!firstOp.getOpTime().equals(from)) {
throw new TryAgainException("Remote oplog does not cointain our last operation");
}
OplogFetcher fetcher = new LimitedOplogFetcher(oplogCursor);
ApplierContext context = new ApplierContext.Builder()
.setReapplying(true)
.setUpdatesAsUpserts(true)
.build();
try {
oplogApplier.apply(fetcher, context)
.waitUntilFinished();
} catch (StopReplicationException | RollbackReplicationException | CancellationException
| UnexpectedOplogApplierException ex) {
throw new FatalErrorException(ex);
}
OpTime lastAppliedOptime;
try (ReadOplogTransaction oplogTrans = oplogManager.createReadTransaction()) {
lastAppliedOptime = oplogTrans.getLastAppliedOptime();
}
if (!lastAppliedOptime.equals(to)) {
LOGGER.warn("Unexpected optime for last operation to apply. "
+ "Expected " + to + ", but " + lastAppliedOptime
+ " found");
}
}
private void rebuildIndexes() {
//TODO: Check if this is necessary
LOGGER.debug("Rebuild index is not implemented yet, so indexes have not been rebuild");
}
private boolean isReplicable(String databaseName) {
return !databaseName.equals("local");
}
private long getMillisToSleep(int attempt) {
return attempt * 1000L;
}
private class MyWritePermissionSupplier implements Supplier<Boolean> {
private final String database;
public MyWritePermissionSupplier(String database) {
this.database = database;
}
@Override
public Boolean get() {
return callback.canAcceptWrites(database);
}
}
private static class TryAgainException extends Exception {
private static final long serialVersionUID = 1L;
public TryAgainException() {
}
public TryAgainException(String message) {
super(message);
}
public TryAgainException(String message, Throwable cause) {
super(message, cause);
}
public TryAgainException(Throwable cause) {
super(cause);
}
}
private static class FatalErrorException extends Exception {
private static final long serialVersionUID = 1L;
public FatalErrorException() {
}
public FatalErrorException(Throwable cause) {
super(cause);
}
}
static interface Callback extends Supervisor {
void waitUntilStartPermision();
void recoveryFinished(RecoveryService service);
void recoveryFailed(RecoveryService service);
void recoveryFailed(RecoveryService service, Throwable ex);
public void setConsistentState(boolean consistent);
public boolean canAcceptWrites(String database);
@Override
public default SupervisorDecision onError(Object supervised, Throwable error) {
recoveryFailed((RecoveryService) supervised, error);
return SupervisorDecision.IGNORE;
}
}
public static interface RecoveryServiceFactory {
RecoveryService createRecoveryService(Callback callback);
}
}