/*
* Copyright 2014 WANdisco
*
* WANdisco licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package c5db.replication;
import c5db.ReplicatorConstants;
import c5db.codec.ProtostuffDecoder;
import c5db.codec.ProtostuffEncoder;
import c5db.interfaces.C5Module;
import c5db.interfaces.DiscoveryModule;
import c5db.interfaces.LogModule;
import c5db.interfaces.ModuleInformationProvider;
import c5db.interfaces.ReplicationModule;
import c5db.interfaces.discovery.NodeInfoReply;
import c5db.interfaces.discovery.NodeInfoRequest;
import c5db.interfaces.replication.IndexCommitNotice;
import c5db.interfaces.replication.Replicator;
import c5db.interfaces.replication.ReplicatorInstanceEvent;
import c5db.interfaces.replication.ReplicatorLog;
import c5db.messages.generated.ModuleType;
import c5db.replication.generated.ReplicationWireMessage;
import c5db.replication.rpc.RpcRequest;
import c5db.replication.rpc.RpcWireReply;
import c5db.replication.rpc.RpcWireRequest;
import c5db.util.C5Futures;
import c5db.util.FiberOnly;
import c5db.util.FiberSupplier;
import com.google.common.util.concurrent.AbstractService;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import io.netty.bootstrap.Bootstrap;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelFutureListener;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelOption;
import io.netty.channel.ChannelPipeline;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.SimpleChannelInboundHandler;
import io.netty.channel.group.ChannelGroup;
import io.netty.channel.group.DefaultChannelGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.handler.codec.protobuf.ProtobufVarint32FrameDecoder;
import io.netty.handler.codec.protobuf.ProtobufVarint32LengthFieldPrepender;
import io.netty.util.concurrent.Future;
import io.netty.util.concurrent.GenericFutureListener;
import org.jetlang.channels.AsyncRequest;
import org.jetlang.channels.MemoryChannel;
import org.jetlang.channels.MemoryRequestChannel;
import org.jetlang.channels.Request;
import org.jetlang.channels.RequestChannel;
import org.jetlang.channels.Session;
import org.jetlang.core.Callback;
import org.jetlang.fibers.Fiber;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* An implementation of ReplicationModule using instances of ReplicatorInstance to handle each quorum.
* <p>
* TODO consider being symmetric in how we handle sent messages.
*/
public class ReplicatorService extends AbstractService implements ReplicationModule {
private static final Logger LOG = LoggerFactory.getLogger(ReplicatorService.class);
/**
* ************* C5Module informational methods ***********************************
*/
@Override
public ModuleType getModuleType() {
return ModuleType.Replication;
}
@Override
public boolean hasPort() {
return true;
}
@Override
public int port() {
return this.port;
}
@Override
public String acceptCommand(String commandString) {
return null;
}
@Override
public ListenableFuture<Replicator> createReplicator(final String quorumId,
final Collection<Long> peers) {
final SettableFuture<Replicator> future = SettableFuture.create();
ListenableFuture<ReplicatorLog> logFuture = logModule.getReplicatorLog(quorumId);
C5Futures.addCallback(logFuture,
(ReplicatorLog log) -> {
Replicator replicator = createReplicatorWithLog(log, quorumId, peers);
future.set(replicator);
},
future::setException,
fiber
);
return future;
}
private final int port;
private final ModuleInformationProvider moduleInformationProvider;
private final FiberSupplier fiberSupplier;
private final long nodeId;
// Netty infrastructure
private final EventLoopGroup bossGroup;
private final EventLoopGroup workerGroup;
private final ChannelGroup allChannels;
private final ServerBootstrap serverBootstrap = new ServerBootstrap();
private final Bootstrap outgoingBootstrap = new Bootstrap();
// ReplicatorInstances and objects shared among them
private final Map<String, ReplicatorInstance> replicatorInstances = new HashMap<>();
private final Persister persister;
private final RequestChannel<RpcRequest, RpcWireReply> outgoingRequests = new MemoryRequestChannel<>();
private final MemoryChannel<ReplicatorInstanceEvent> replicatorEventChannel = new MemoryChannel<>();
private final MemoryChannel<IndexCommitNotice> indexCommitNotices = new MemoryChannel<>();
// Connections to other servers by their node IDs
private final Map<Long, Channel> connections = new HashMap<>();
// Map of message ID -> Request
// TODO we need a way to remove these after a while, because if we fail to get a reply we will be unhappy.
private final Map<Long, Request<RpcRequest, RpcWireReply>> outstandingRPCs = new HashMap<>();
// Map of Session -> message ID
private final Map<Session, Long> outstandingRPCbySession = new HashMap<>();
// Initialized in the module start, by the time any messages or fiber executions trigger, these should be not-null
private DiscoveryModule discoveryModule = null;
private LogModule logModule = null;
private Channel listenChannel;
private Fiber fiber;
// Sequence number for sent messages
private long messageIdGen = 1;
/**
* ReplicatorService creates and starts fibers; it must be stopped (or failed) in
* order to dispose them.
*/
public ReplicatorService(EventLoopGroup bossGroup,
EventLoopGroup workerGroup,
long nodeId,
int port,
ModuleInformationProvider moduleInformationProvider,
FiberSupplier fiberSupplier,
QuorumFileReaderWriter quorumFileReaderWriter) {
this.bossGroup = bossGroup;
this.workerGroup = workerGroup;
this.nodeId = nodeId;
this.port = port;
this.moduleInformationProvider = moduleInformationProvider;
this.fiberSupplier = fiberSupplier;
this.allChannels = new DefaultChannelGroup(workerGroup.next());
this.persister = new Persister(quorumFileReaderWriter);
}
/**
* *************** Handlers for netty/messages from the wire/TCP ***********************
*/
@ChannelHandler.Sharable
private class MessageHandler extends SimpleChannelInboundHandler<ReplicationWireMessage> {
@Override
public void channelActive(ChannelHandlerContext ctx) throws Exception {
allChannels.add(ctx.channel());
super.channelActive(ctx);
}
@Override
protected void channelRead0(final ChannelHandlerContext ctx, final ReplicationWireMessage msg) throws Exception {
fiber.execute(() -> handleWireInboundMessage(ctx.channel(), msg));
}
}
@FiberOnly
private void handleWireInboundMessage(Channel channel, ReplicationWireMessage msg) {
long messageId = msg.getMessageId();
if (msg.getReceiverId() != nodeId) {
LOG.debug("Got messageId {} for {} but I am {}, ignoring!", messageId, msg.getReceiverId(), nodeId);
return;
}
if (msg.getInReply()) {
Request<RpcRequest, RpcWireReply> request = outstandingRPCs.get(messageId);
if (request == null) {
LOG.debug("Got a reply message_id {} which we don't track", messageId);
return;
}
outstandingRPCs.remove(messageId);
outstandingRPCbySession.remove(request.getSession());
request.reply(new RpcWireReply(msg));
} else {
handleWireRequestMessage(channel, msg);
}
}
@FiberOnly
private void handleWireRequestMessage(final Channel channel, final ReplicationWireMessage msg) {
RpcWireRequest wireRequest = new RpcWireRequest(msg);
String quorumId = wireRequest.quorumId;
ReplicatorInstance replInst = replicatorInstances.get(quorumId);
if (replInst == null) {
LOG.trace("Instance not found {} for message id {} from {} (normal during region bootstrap)",
quorumId,
msg.getMessageId(),
msg.getSenderId());
// TODO send RPC failure to the sender?
return;
}
AsyncRequest.withOneReply(fiber, replInst.getIncomingChannel(), wireRequest, reply -> {
if (!channel.isOpen()) {
// TODO cant signal comms failure, so just drop on the floor. Is there a better thing to do?
return;
}
ReplicationWireMessage b = reply.getWireMessage(
msg.getMessageId(),
nodeId,
msg.getSenderId(),
true
);
channel.writeAndFlush(b).addListener(
future -> {
if (!future.isSuccess()) {
LOG.warn("node {} error sending reply {} to node {} in response to request {}: {}",
nodeId, reply, wireRequest.from, wireRequest, future.cause());
}
});
});
}
/**
* ************* Handlers for Request<> from replicator instances ***********************************
*/
@FiberOnly
private void handleCancelledSession(Session session) {
Long messageId = outstandingRPCbySession.get(session);
outstandingRPCbySession.remove(session);
if (messageId == null) {
return;
}
LOG.trace("Removing cancelled RPC, message ID {}", messageId);
outstandingRPCs.remove(messageId);
}
@FiberOnly
private void handleOutgoingMessage(final Request<RpcRequest, RpcWireReply> message) {
final RpcRequest request = message.getRequest();
final long to = request.to;
if (to == nodeId) {
handleLoopBackMessage(message);
return;
}
// check to see if we have a connection:
Channel channel = connections.get(to);
if (channel != null && channel.isOpen()) {
sendMessageAsync(message, channel);
return;
} else if (channel != null) {
// stale?
LOG.debug("Removing stale !isOpen channel from connections.get() for peer {}", to);
connections.remove(to);
}
NodeInfoRequest nodeInfoRequest = new NodeInfoRequest(to, ModuleType.Replication);
LOG.debug("node {} sending node info request {} ", nodeId, nodeInfoRequest);
AsyncRequest.withOneReply(fiber, discoveryModule.getNodeInfo(), nodeInfoRequest, new Callback<NodeInfoReply>() {
@SuppressWarnings("RedundantCast")
@FiberOnly
@Override
public void onMessage(NodeInfoReply nodeInfoReply) {
if (!nodeInfoReply.found) {
LOG.debug("Can't find the info for the peer {}", to);
// TODO signal TCP/transport layer failure in a better way
//message.reply(null);
return;
}
LOG.debug("node {} got node info for node {} reply {} ", nodeId, to, nodeInfoReply);
// what if existing outgoing connection attempt?
Channel channel = connections.get(to);
if (channel != null && channel.isOpen()) {
sendMessageAsync(message, channel);
return;
} else if (channel != null) {
LOG.debug("Removing stale2 !isOpen channel from connections.get() for peer {}", to);
connections.remove(to);
}
// ok so we connect now:
ChannelFuture channelFuture = outgoingBootstrap.connect(nodeInfoReply.addresses.get(0), nodeInfoReply.port);
LOG.trace("Connecting to peer {} at address {} port {}", to, nodeInfoReply.addresses.get(0), nodeInfoReply.port);
// the channel might not be open, so defer the write.
connections.put(to, channelFuture.channel());
channelFuture.channel().closeFuture().addListener((ChannelFutureListener)
future ->
fiber.execute(() -> {
// remove only THIS channel. It might have been removed prior so.
connections.remove(to, future.channel());
}));
// funny hack, if the channel future is already open, we execute immediately!
channelFuture.addListener((ChannelFutureListener)
future -> {
if (future.isSuccess()) {
sendMessageAsync(message, future.channel());
}
});
}
},
// If the NodeInfoRequest times out:
ReplicatorConstants.REPLICATOR_NODE_INFO_REQUEST_TIMEOUT_MILLISECONDS, TimeUnit.MILLISECONDS,
() -> LOG.warn("node info request timeout {} ", nodeInfoRequest));
}
private void sendMessageAsync(final Request<RpcRequest, RpcWireReply> message, final Channel channel) {
fiber.execute(() -> {
RpcRequest request = message.getRequest();
long to = request.to;
long messageId = messageIdGen++;
outstandingRPCs.put(messageId, message);
outstandingRPCbySession.put(message.getSession(), messageId);
LOG.trace("Sending message id {} to {} / {}", messageId, to, request.quorumId);
ReplicationWireMessage wireMessage = request.getWireMessage(
messageId,
nodeId,
to,
false
);
channel.writeAndFlush(wireMessage).addListener(
future -> {
if (!future.isSuccess()) {
LOG.warn("Error sending from node {} request {}: {}", nodeId, request, future.cause());
}
});
});
}
private void handleLoopBackMessage(final Request<RpcRequest, RpcWireReply> origMessage) {
final long toFrom = nodeId; // I am me.
final RpcRequest request = origMessage.getRequest();
final String quorumId = request.quorumId;
// Funny thing we don't have a direct handle on who sent us this message, so we have to do this. Sok though.
final ReplicatorInstance repl = replicatorInstances.get(quorumId);
if (repl == null) {
// rare failure condition, whereby the replicator died AFTER it send messages.
return; // ignore the message.
}
final RpcWireRequest newRequest = new RpcWireRequest(toFrom, quorumId, request.message);
AsyncRequest.withOneReply(fiber, repl.getIncomingChannel(), newRequest, msg -> {
assert msg.message != null;
RpcWireReply newReply = new RpcWireReply(toFrom, toFrom, quorumId, msg.message);
origMessage.reply(newReply);
});
}
/**
* ********** Service startup/registration and shutdown/termination **************
*/
@Override
protected void doStart() {
// must start the fiber up early.
fiber = fiberSupplier.getNewFiber(this::failModule);
setupEventChannelSubscription();
fiber.start();
C5Futures.addCallback(getDependedOnModules(),
(ignore) -> {
ChannelInitializer<SocketChannel> initer = new ChannelInitializer<SocketChannel>() {
@Override
protected void initChannel(SocketChannel ch) throws Exception {
ChannelPipeline p = ch.pipeline();
p.addLast("frameDecode", new ProtobufVarint32FrameDecoder());
p.addLast("pbufDecode", new ProtostuffDecoder<>(ReplicationWireMessage.getSchema()));
p.addLast("frameEncode", new ProtobufVarint32LengthFieldPrepender());
p.addLast("pbufEncoder", new ProtostuffEncoder<ReplicationWireMessage>());
p.addLast(new MessageHandler());
}
};
serverBootstrap.group(bossGroup, workerGroup)
.channel(NioServerSocketChannel.class)
.option(ChannelOption.SO_REUSEADDR, true)
.option(ChannelOption.SO_BACKLOG, 100)
.childOption(ChannelOption.TCP_NODELAY, true)
.childHandler(initer);
//noinspection RedundantCast
serverBootstrap.bind(port).addListener((ChannelFutureListener)
future -> {
if (future.isSuccess()) {
LOG.info("successfully bound node {} port {} ", nodeId, port);
listenChannel = future.channel();
} else {
LOG.error("Unable to bind! ", future.cause());
failModule(future.cause());
}
});
outgoingBootstrap.group(workerGroup)
.channel(NioSocketChannel.class)
.option(ChannelOption.SO_REUSEADDR, true)
.option(ChannelOption.TCP_NODELAY, true)
.handler(initer);
//noinspection Convert2MethodRef
outgoingRequests.subscribe(fiber, message -> handleOutgoingMessage(message),
// Clean up cancelled requests.
message -> handleCancelledSession(message.getSession())
);
notifyStarted();
},
(Throwable t) -> {
LOG.error("ReplicatorService unable to retrieve modules!", t);
failModule(t);
}, fiber);
}
protected void failModule(Throwable t) {
LOG.error("ReplicatorService failure, shutting down all ReplicatorInstances", t);
try {
replicatorInstances.values().forEach(ReplicatorInstance::dispose);
replicatorInstances.clear();
fiber.dispose();
if (listenChannel != null) {
listenChannel.close();
}
allChannels.close();
} finally {
notifyFailed(t);
}
}
@Override
protected void doStop() {
fiber.execute(() -> {
final AtomicInteger countDown = new AtomicInteger(1);
GenericFutureListener<? extends Future<? super Void>> listener = future -> {
if (countDown.decrementAndGet() == 0) {
fiber.dispose();
fiber = null;
notifyStopped();
}
};
if (listenChannel != null) {
countDown.incrementAndGet();
listenChannel.close().addListener(listener);
}
allChannels.close().addListener(listener);
replicatorInstances.values().forEach(ReplicatorInstance::dispose);
replicatorInstances.clear();
});
}
private void setupEventChannelSubscription() {
replicatorEventChannel.subscribe(fiber, message -> {
if (message.eventType == ReplicatorInstanceEvent.EventType.QUORUM_FAILURE) {
LOG.error("replicator {} indicates failure, removing. Error {}", message.instance,
message.error);
replicatorInstances.remove(message.instance.getQuorumId());
} else {
LOG.debug("replicator indicates state change {}", message);
}
});
}
private ListenableFuture<Void> getDependedOnModules() {
SettableFuture<Void> doneFuture = SettableFuture.create();
List<ListenableFuture<C5Module>> moduleFutures = new ArrayList<>();
moduleFutures.add(moduleInformationProvider.getModule(ModuleType.Log));
moduleFutures.add(moduleInformationProvider.getModule(ModuleType.Discovery));
ListenableFuture<List<C5Module>> compositeModulesFuture = Futures.allAsList(moduleFutures);
LOG.warn("ReplicatorService now waiting for module dependency on Log & Discovery");
C5Futures.addCallback(compositeModulesFuture,
(List<C5Module> modules) -> {
this.logModule = (LogModule) modules.get(0);
this.discoveryModule = (DiscoveryModule) modules.get(1);
doneFuture.set(null);
},
this::failModule, fiber);
return doneFuture;
}
private Replicator createReplicatorWithLog(ReplicatorLog log, String quorumId, Collection<Long> peers) {
if (replicatorInstances.containsKey(quorumId)) {
LOG.debug("Replicator for quorum {} exists already", quorumId);
return replicatorInstances.get(quorumId);
}
if (!peers.contains(nodeId)) {
LOG.warn("Creating a replicator instance for quorum {} peers {} but it does not contain me ({})",
quorumId, peers, nodeId);
}
LOG.info("Creating replicator instance for {} peers {}", quorumId, peers);
MemoryChannel<Throwable> throwableChannel = new MemoryChannel<>();
Fiber instanceFiber = fiberSupplier.getNewFiber(throwableChannel::publish);
ReplicatorInstance instance =
new ReplicatorInstance(
instanceFiber,
nodeId,
quorumId,
log,
new DefaultSystemTimeReplicatorClock(),
persister,
outgoingRequests,
replicatorEventChannel,
indexCommitNotices,
Replicator.State.FOLLOWER
);
if (log.getLastIndex() == 0) {
instance.bootstrapQuorum(peers);
}
throwableChannel.subscribe(fiber, instance::failReplicatorInstance);
replicatorInstances.put(quorumId, instance);
instance.start();
return instance;
}
}