/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.activemq.artemis.core.replication; import java.io.FileInputStream; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; import io.netty.buffer.ByteBuf; import io.netty.buffer.PooledByteBufAllocator; import org.apache.activemq.artemis.api.core.ActiveMQBuffer; import org.apache.activemq.artemis.api.core.ActiveMQException; import org.apache.activemq.artemis.api.core.ActiveMQExceptionType; import org.apache.activemq.artemis.api.core.Pair; import org.apache.activemq.artemis.api.core.SimpleString; import org.apache.activemq.artemis.api.core.client.SessionFailureListener; import org.apache.activemq.artemis.core.io.SequentialFile; import org.apache.activemq.artemis.core.journal.EncodingSupport; import org.apache.activemq.artemis.core.journal.impl.JournalFile; import org.apache.activemq.artemis.core.paging.PagedMessage; import org.apache.activemq.artemis.core.persistence.OperationContext; import org.apache.activemq.artemis.core.persistence.Persister; import org.apache.activemq.artemis.core.persistence.impl.journal.AbstractJournalStorageManager; import org.apache.activemq.artemis.core.persistence.impl.journal.OperationContextImpl; import org.apache.activemq.artemis.core.protocol.core.Channel; import org.apache.activemq.artemis.core.protocol.core.ChannelHandler; import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection; import org.apache.activemq.artemis.core.protocol.core.Packet; import org.apache.activemq.artemis.core.protocol.core.impl.ChannelImpl.CHANNEL_ID; import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationAddMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationAddTXMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationCommitMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationDeleteMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationDeleteTXMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLargeMessageBeginMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLargeMessageEndMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLargeMessageWriteMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationPageEventMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationPageWriteMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationPrepareMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationResponseMessageV2; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationStartSyncMessage; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationSyncFileMessage; import org.apache.activemq.artemis.core.server.ActiveMQComponent; import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection; import org.apache.activemq.artemis.utils.ExecutorFactory; import org.apache.activemq.artemis.utils.ReusableLatch; import org.jboss.logging.Logger; /** * Manages replication tasks on the live server (that is the live server side of a "remote backup" * use case). * <p> * Its equivalent in the backup server is {@link ReplicationEndpoint}. * * @see ReplicationEndpoint */ public final class ReplicationManager implements ActiveMQComponent { private static final Logger logger = Logger.getLogger(ReplicationManager.class); public enum ADD_OPERATION_TYPE { UPDATE { @Override public boolean toBoolean() { return true; } }, ADD { @Override public boolean toBoolean() { return false; } }; public abstract boolean toBoolean(); public static ADD_OPERATION_TYPE toOperation(boolean isUpdate) { return isUpdate ? UPDATE : ADD; } } private final ResponseHandler responseHandler = new ResponseHandler(); private final Channel replicatingChannel; private boolean started; private volatile boolean enabled; private final AtomicBoolean writable = new AtomicBoolean(true); private final Queue<OperationContext> pendingTokens = new ConcurrentLinkedQueue<>(); private final ExecutorFactory executorFactory; private final Executor replicationStream; private SessionFailureListener failureListener; private CoreRemotingConnection remotingConnection; private final long timeout; private volatile boolean inSync = true; private final ReusableLatch synchronizationIsFinishedAcknowledgement = new ReusableLatch(0); /** * @param remotingConnection */ public ReplicationManager(CoreRemotingConnection remotingConnection, final long timeout, final ExecutorFactory executorFactory) { this.executorFactory = executorFactory; this.replicatingChannel = remotingConnection.getChannel(CHANNEL_ID.REPLICATION.id, -1); this.remotingConnection = remotingConnection; this.replicationStream = executorFactory.getExecutor(); this.timeout = timeout; } public void appendUpdateRecord(final byte journalID, final ADD_OPERATION_TYPE operation, final long id, final byte recordType, final Persister persister, final Object record) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationAddMessage(journalID, operation, id, recordType, persister, record)); } } public void appendDeleteRecord(final byte journalID, final long id) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationDeleteMessage(journalID, id)); } } public void appendAddRecordTransactional(final byte journalID, final ADD_OPERATION_TYPE operation, final long txID, final long id, final byte recordType, final Persister persister, final Object record) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationAddTXMessage(journalID, operation, txID, id, recordType, persister, record)); } } public void appendCommitRecord(final byte journalID, final long txID, boolean sync, final boolean lineUp) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationCommitMessage(journalID, false, txID), lineUp, true); } } public void appendDeleteRecordTransactional(final byte journalID, final long txID, final long id, final EncodingSupport record) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationDeleteTXMessage(journalID, txID, id, record)); } } public void appendDeleteRecordTransactional(final byte journalID, final long txID, final long id) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationDeleteTXMessage(journalID, txID, id, NullEncoding.instance)); } } public void appendPrepareRecord(final byte journalID, final long txID, final EncodingSupport transactionData) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationPrepareMessage(journalID, txID, transactionData)); } } public void appendRollbackRecord(final byte journalID, final long txID) throws Exception { if (enabled) { sendReplicatePacket(new ReplicationCommitMessage(journalID, true, txID)); } } /** * @param storeName * @param pageNumber */ public void pageClosed(final SimpleString storeName, final int pageNumber) { if (enabled) { sendReplicatePacket(new ReplicationPageEventMessage(storeName, pageNumber, false)); } } public void pageDeleted(final SimpleString storeName, final int pageNumber) { if (enabled) { sendReplicatePacket(new ReplicationPageEventMessage(storeName, pageNumber, true)); } } public void pageWrite(final PagedMessage message, final int pageNumber) { if (enabled) { sendReplicatePacket(new ReplicationPageWriteMessage(message, pageNumber)); } } public void largeMessageBegin(final long messageId) { if (enabled) { sendReplicatePacket(new ReplicationLargeMessageBeginMessage(messageId)); } } public void largeMessageDelete(final Long messageId) { if (enabled) { sendReplicatePacket(new ReplicationLargeMessageEndMessage(messageId)); } } public void largeMessageWrite(final long messageId, final byte[] body) { if (enabled) { sendReplicatePacket(new ReplicationLargeMessageWriteMessage(messageId, body)); } } @Override public synchronized boolean isStarted() { return started; } @Override public synchronized void start() throws ActiveMQException { if (started) { throw new IllegalStateException("ReplicationManager is already started"); } replicatingChannel.setHandler(responseHandler); failureListener = new ReplicatedSessionFailureListener(); remotingConnection.addFailureListener(failureListener); started = true; enabled = true; } @Override public void stop() throws Exception { synchronized (this) { if (!started) { logger.trace("Stopping being ignored as it hasn't been started"); return; } } // This is to avoid the write holding a lock while we are trying to close it if (replicatingChannel != null) { replicatingChannel.close(); replicatingChannel.getConnection().getTransportConnection().fireReady(true); } enabled = false; writable.set(true); clearReplicationTokens(); RemotingConnection toStop = remotingConnection; if (toStop != null) { toStop.removeFailureListener(failureListener); } remotingConnection = null; started = false; } /** * Completes any pending operations. * <p> * This can be necessary in case the live loses connection to the backup (network failure, or * backup crashing). */ public void clearReplicationTokens() { logger.trace("clearReplicationTokens initiating"); while (!pendingTokens.isEmpty()) { OperationContext ctx = pendingTokens.poll(); logger.trace("Calling ctx.replicationDone()"); try { ctx.replicationDone(); } catch (Throwable e) { ActiveMQServerLogger.LOGGER.errorCompletingCallbackOnReplicationManager(e); } } logger.trace("clearReplicationTokens finished"); } /** * A list of tokens that are still waiting for replications to be completed */ public Set<OperationContext> getActiveTokens() { LinkedHashSet<OperationContext> activeContexts = new LinkedHashSet<>(); // The same context will be replicated on the pending tokens... // as the multiple operations will be replicated on the same context for (OperationContext ctx : pendingTokens) { activeContexts.add(ctx); } return activeContexts; } private OperationContext sendReplicatePacket(final Packet packet) { return sendReplicatePacket(packet, true, true); } private OperationContext sendReplicatePacket(final Packet packet, boolean lineUp, boolean useExecutor) { if (!enabled) return null; boolean runItNow = false; final OperationContext repliToken = OperationContextImpl.getContext(executorFactory); if (lineUp) { repliToken.replicationLineUp(); } if (enabled) { pendingTokens.add(repliToken); if (useExecutor) { replicationStream.execute(() -> { if (enabled) { flowControl(packet.expectedEncodeSize()); replicatingChannel.send(packet); } }); } else { flowControl(packet.expectedEncodeSize()); replicatingChannel.send(packet); } } else { // Already replicating channel failed, so just play the action now runItNow = true; } // Execute outside lock if (runItNow) { repliToken.replicationDone(); } return repliToken; } /** * This was written as a refactoring of sendReplicatePacket. * In case you refactor this in any way, this method must hold a lock on replication lock. . */ private boolean flowControl(int size) { boolean flowWorked = replicatingChannel.getConnection().blockUntilWritable(size, timeout); if (!flowWorked) { try { ActiveMQServerLogger.LOGGER.slowReplicationResponse(); stop(); } catch (Exception e) { logger.warn(e.getMessage(), e); } } return flowWorked; } /** * @throws IllegalStateException By default, all replicated packets generate a replicated * response. If your packets are triggering this exception, it may be because the * packets were not sent with {@link #sendReplicatePacket(Packet)}. */ private void replicated() { OperationContext ctx = pendingTokens.poll(); if (ctx == null) { throw new IllegalStateException("Missing replication token on the queue."); } ctx.replicationDone(); } // Inner classes ------------------------------------------------- private final class ReplicatedSessionFailureListener implements SessionFailureListener { @Override public void connectionFailed(final ActiveMQException me, boolean failedOver) { if (me.getType() == ActiveMQExceptionType.DISCONNECTED) { // Backup has shut down - no need to log a stack trace ActiveMQServerLogger.LOGGER.replicationStopOnBackupShutdown(); } else { ActiveMQServerLogger.LOGGER.replicationStopOnBackupFail(me); } try { stop(); } catch (Exception e) { ActiveMQServerLogger.LOGGER.errorStoppingReplication(e); } } @Override public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) { connectionFailed(me, failedOver); } @Override public void beforeReconnect(final ActiveMQException me) { } } private final class ResponseHandler implements ChannelHandler { @Override public void handlePacket(final Packet packet) { if (packet.getType() == PacketImpl.REPLICATION_RESPONSE || packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) { replicated(); if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) { ReplicationResponseMessageV2 replicationResponseMessage = (ReplicationResponseMessageV2) packet; if (replicationResponseMessage.isSynchronizationIsFinishedAcknowledgement()) { synchronizationIsFinishedAcknowledgement.countDown(); } } } } } private static final class NullEncoding implements EncodingSupport { static final NullEncoding instance = new NullEncoding(); @Override public void decode(final ActiveMQBuffer buffer) { } @Override public void encode(final ActiveMQBuffer buffer) { } @Override public int getEncodeSize() { return 0; } } /** * Sends the whole content of the file to be duplicated. * * @throws ActiveMQException * @throws Exception */ public void syncJournalFile(JournalFile jf, AbstractJournalStorageManager.JournalContent content) throws Exception { if (!enabled) { return; } SequentialFile file = jf.getFile().cloneFile(); try { ActiveMQServerLogger.LOGGER.replicaSyncFile(file, file.size()); sendLargeFile(content, null, jf.getFileID(), file, Long.MAX_VALUE); } finally { if (file.isOpen()) file.close(); } } public void syncLargeMessageFile(SequentialFile file, long size, long id) throws Exception { if (enabled) { sendLargeFile(null, null, id, file, size); } } public void syncPages(SequentialFile file, long id, SimpleString queueName) throws Exception { if (enabled) sendLargeFile(null, queueName, id, file, Long.MAX_VALUE); } /** * Sends large files in reasonably sized chunks to the backup during replication synchronization. * * @param content journal type or {@code null} for large-messages and pages * @param pageStore page store name for pages, or {@code null} otherwise * @param id journal file id or (large) message id * @param file * @param maxBytesToSend maximum number of bytes to read and send from the file * @throws Exception */ private void sendLargeFile(AbstractJournalStorageManager.JournalContent content, SimpleString pageStore, final long id, SequentialFile file, long maxBytesToSend) throws Exception { if (!enabled) return; if (!file.isOpen()) { file.open(); } int size = 32 * 1024; final ByteBuf buffer = PooledByteBufAllocator.DEFAULT.directBuffer(size, size); try { try (final FileInputStream fis = new FileInputStream(file.getJavaFile()); final FileChannel channel = fis.getChannel()) { // We can afford having a single buffer here for this entire loop // because sendReplicatePacket will encode the packet as a NettyBuffer // through ActiveMQBuffer class leaving this buffer free to be reused on the next copy while (true) { buffer.clear(); ByteBuffer byteBuffer = buffer.writerIndex(size).readerIndex(0).nioBuffer(); final int bytesRead = channel.read(byteBuffer); int toSend = bytesRead; if (bytesRead > 0) { if (bytesRead >= maxBytesToSend) { toSend = (int) maxBytesToSend; maxBytesToSend = 0; } else { maxBytesToSend = maxBytesToSend - bytesRead; } } logger.debug("sending " + buffer.writerIndex() + " bytes on file " + file.getFileName()); // sending -1 or 0 bytes will close the file at the backup // We cannot simply send everything of a file through the executor, // otherwise we would run out of memory. // so we don't use the executor here sendReplicatePacket(new ReplicationSyncFileMessage(content, pageStore, id, toSend, buffer), true, false); if (bytesRead == -1 || bytesRead == 0 || maxBytesToSend == 0) break; } } } finally { buffer.release(); if (file.isOpen()) file.close(); } } /** * Reserve the following fileIDs in the backup server. * * @param datafiles * @param contentType * @throws ActiveMQException */ public void sendStartSyncMessage(JournalFile[] datafiles, AbstractJournalStorageManager.JournalContent contentType, String nodeID, boolean allowsAutoFailBack) throws ActiveMQException { if (enabled) sendReplicatePacket(new ReplicationStartSyncMessage(datafiles, contentType, nodeID, allowsAutoFailBack)); } /** * Informs backup that data synchronization is done. * <p> * So if 'live' fails, the (up-to-date) backup now may take over its duties. To do so, it must * know which is the live's {@code nodeID}. * * @param nodeID */ public void sendSynchronizationDone(String nodeID, long initialReplicationSyncTimeout) { if (enabled) { if (logger.isTraceEnabled()) { logger.trace("sendSynchronizationDone ::" + nodeID + ", " + initialReplicationSyncTimeout); } synchronizationIsFinishedAcknowledgement.countUp(); sendReplicatePacket(new ReplicationStartSyncMessage(nodeID)); try { if (!synchronizationIsFinishedAcknowledgement.await(initialReplicationSyncTimeout)) { logger.trace("sendSynchronizationDone wasn't finished in time"); throw ActiveMQMessageBundle.BUNDLE.replicationSynchronizationTimeout(initialReplicationSyncTimeout); } } catch (InterruptedException e) { logger.debug(e); } inSync = false; logger.trace("sendSynchronizationDone finished"); } } /** * Reserves several LargeMessage IDs in the backup. * <p> * Doing this before hand removes the need of synchronizing large-message deletes with the * largeMessageSyncList. * * @param largeMessages */ public void sendLargeMessageIdListMessage(Map<Long, Pair<String, Long>> largeMessages) { ArrayList<Long> idsToSend; idsToSend = new ArrayList<>(largeMessages.keySet()); if (enabled) sendReplicatePacket(new ReplicationStartSyncMessage(idsToSend)); } /** * Notifies the backup that the live server is stopping. * <p> * This notification allows the backup to skip quorum voting (or any other measure to avoid * 'split-brain') and do a faster fail-over. * * @return */ public OperationContext sendLiveIsStopping(final LiveStopping finalMessage) { logger.debug("LIVE IS STOPPING?!? message=" + finalMessage + " enabled=" + enabled); if (enabled) { logger.debug("LIVE IS STOPPING?!? message=" + finalMessage + " " + enabled); return sendReplicatePacket(new ReplicationLiveIsStoppingMessage(finalMessage)); } return null; } /** * Used while stopping the server to ensure that we freeze communications with the backup. * * @return remoting connection with the backup */ public CoreRemotingConnection getBackupTransportConnection() { return remotingConnection; } /** * @return */ public boolean isSynchronizing() { return inSync; } }