/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.transport.netty4;
import io.netty.bootstrap.Bootstrap;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.AdaptiveRecvByteBufAllocator;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelFutureListener;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelOption;
import io.netty.channel.FixedRecvByteBufAllocator;
import io.netty.channel.RecvByteBufAllocator;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.util.concurrent.Future;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Supplier;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.network.NetworkService;
import org.elasticsearch.common.network.NetworkService.TcpSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.FutureUtils;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.monitor.jvm.JvmInfo;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.ConnectTransportException;
import org.elasticsearch.transport.ConnectionProfile;
import org.elasticsearch.transport.TcpTransport;
import org.elasticsearch.transport.TransportRequestOptions;
import org.elasticsearch.transport.TransportServiceAdapter;
import org.elasticsearch.transport.TransportSettings;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import static org.elasticsearch.common.settings.Setting.byteSizeSetting;
import static org.elasticsearch.common.settings.Setting.intSetting;
import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap;
import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory;
/**
* There are 4 types of connections per node, low/med/high/ping. Low if for batch oriented APIs (like recovery or
* batch) with high payload that will cause regular request. (like search or single index) to take
* longer. Med is for the typical search / single doc index. And High for things like cluster state. Ping is reserved for
* sending out ping requests to other nodes.
*/
public class Netty4Transport extends TcpTransport<Channel> {
static {
Netty4Utils.setup();
}
public static final Setting<Integer> WORKER_COUNT =
new Setting<>("transport.netty.worker_count",
(s) -> Integer.toString(EsExecutors.numberOfProcessors(s) * 2),
(s) -> Setting.parseInt(s, 1, "transport.netty.worker_count"), Property.NodeScope, Property.Shared);
public static final Setting<ByteSizeValue> NETTY_MAX_CUMULATION_BUFFER_CAPACITY =
Setting.byteSizeSetting(
"transport.netty.max_cumulation_buffer_capacity",
new ByteSizeValue(-1),
Property.NodeScope,
Property.Shared);
public static final Setting<Integer> NETTY_MAX_COMPOSITE_BUFFER_COMPONENTS =
Setting.intSetting("transport.netty.max_composite_buffer_components", -1, -1, Property.NodeScope, Property.Shared);
public static final Setting<ByteSizeValue> NETTY_RECEIVE_PREDICTOR_SIZE = Setting.byteSizeSetting(
"transport.netty.receive_predictor_size", new ByteSizeValue(64, ByteSizeUnit.KB), Property.NodeScope);
public static final Setting<ByteSizeValue> NETTY_RECEIVE_PREDICTOR_MIN =
byteSizeSetting("transport.netty.receive_predictor_min", NETTY_RECEIVE_PREDICTOR_SIZE, Property.NodeScope);
public static final Setting<ByteSizeValue> NETTY_RECEIVE_PREDICTOR_MAX =
byteSizeSetting("transport.netty.receive_predictor_max", NETTY_RECEIVE_PREDICTOR_SIZE, Property.NodeScope);
public static final Setting<Integer> NETTY_BOSS_COUNT =
intSetting("transport.netty.boss_count", 1, 1, Property.NodeScope, Property.Shared);
protected final ByteSizeValue maxCumulationBufferCapacity;
protected final int maxCompositeBufferComponents;
protected final RecvByteBufAllocator recvByteBufAllocator;
protected final int workerCount;
protected final ByteSizeValue receivePredictorMin;
protected final ByteSizeValue receivePredictorMax;
// package private for testing
volatile Netty4OpenChannelsHandler serverOpenChannels;
protected volatile Bootstrap bootstrap;
protected final Map<String, ServerBootstrap> serverBootstraps = newConcurrentMap();
public Netty4Transport(Settings settings, ThreadPool threadPool, NetworkService networkService, BigArrays bigArrays,
NamedWriteableRegistry namedWriteableRegistry, CircuitBreakerService circuitBreakerService) {
super("netty", settings, threadPool, bigArrays, circuitBreakerService, namedWriteableRegistry, networkService);
Netty4Utils.setAvailableProcessors(EsExecutors.PROCESSORS_SETTING.get(settings));
this.workerCount = WORKER_COUNT.get(settings);
this.maxCumulationBufferCapacity = NETTY_MAX_CUMULATION_BUFFER_CAPACITY.get(settings);
this.maxCompositeBufferComponents = NETTY_MAX_COMPOSITE_BUFFER_COMPONENTS.get(settings);
// See AdaptiveReceiveBufferSizePredictor#DEFAULT_XXX for default values in netty..., we can use higher ones for us, even fixed one
this.receivePredictorMin = NETTY_RECEIVE_PREDICTOR_MIN.get(settings);
this.receivePredictorMax = NETTY_RECEIVE_PREDICTOR_MAX.get(settings);
if (receivePredictorMax.getBytes() == receivePredictorMin.getBytes()) {
recvByteBufAllocator = new FixedRecvByteBufAllocator((int) receivePredictorMax.getBytes());
} else {
recvByteBufAllocator = new AdaptiveRecvByteBufAllocator((int) receivePredictorMin.getBytes(),
(int) receivePredictorMin.getBytes(), (int) receivePredictorMax.getBytes());
}
}
TransportServiceAdapter transportServiceAdapter() {
return transportServiceAdapter;
}
@Override
protected void doStart() {
boolean success = false;
try {
bootstrap = createBootstrap();
if (NetworkService.NETWORK_SERVER.get(settings)) {
final Netty4OpenChannelsHandler openChannels = new Netty4OpenChannelsHandler(logger);
this.serverOpenChannels = openChannels;
// loop through all profiles and start them up, special handling for default one
for (Map.Entry<String, Settings> entry : buildProfileSettings().entrySet()) {
// merge fallback settings with default settings with profile settings so we have complete settings with default values
final Settings settings = Settings.builder()
.put(createFallbackSettings())
.put(entry.getValue()).build();
createServerBootstrap(entry.getKey(), settings);
bindServer(entry.getKey(), settings);
}
}
super.doStart();
success = true;
} finally {
if (success == false) {
doStop();
}
}
}
private Bootstrap createBootstrap() {
final Bootstrap bootstrap = new Bootstrap();
bootstrap.group(new NioEventLoopGroup(workerCount, daemonThreadFactory(settings, TRANSPORT_CLIENT_BOSS_THREAD_NAME_PREFIX)));
bootstrap.channel(NioSocketChannel.class);
bootstrap.handler(getClientChannelInitializer());
bootstrap.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, Math.toIntExact(defaultConnectionProfile.getConnectTimeout().millis()));
bootstrap.option(ChannelOption.TCP_NODELAY, TCP_NO_DELAY.get(settings));
bootstrap.option(ChannelOption.SO_KEEPALIVE, TCP_KEEP_ALIVE.get(settings));
final ByteSizeValue tcpSendBufferSize = TCP_SEND_BUFFER_SIZE.get(settings);
if (tcpSendBufferSize.getBytes() > 0) {
bootstrap.option(ChannelOption.SO_SNDBUF, Math.toIntExact(tcpSendBufferSize.getBytes()));
}
final ByteSizeValue tcpReceiveBufferSize = TCP_RECEIVE_BUFFER_SIZE.get(settings);
if (tcpReceiveBufferSize.getBytes() > 0) {
bootstrap.option(ChannelOption.SO_RCVBUF, Math.toIntExact(tcpReceiveBufferSize.getBytes()));
}
bootstrap.option(ChannelOption.RCVBUF_ALLOCATOR, recvByteBufAllocator);
final boolean reuseAddress = TCP_REUSE_ADDRESS.get(settings);
bootstrap.option(ChannelOption.SO_REUSEADDR, reuseAddress);
bootstrap.validate();
return bootstrap;
}
private Settings createFallbackSettings() {
Settings.Builder fallbackSettingsBuilder = Settings.builder();
List<String> fallbackBindHost = TransportSettings.BIND_HOST.get(settings);
if (fallbackBindHost.isEmpty() == false) {
fallbackSettingsBuilder.putArray("bind_host", fallbackBindHost);
}
List<String> fallbackPublishHost = TransportSettings.PUBLISH_HOST.get(settings);
if (fallbackPublishHost.isEmpty() == false) {
fallbackSettingsBuilder.putArray("publish_host", fallbackPublishHost);
}
boolean fallbackTcpNoDelay = settings.getAsBoolean("transport.netty.tcp_no_delay", TcpSettings.TCP_NO_DELAY.get(settings));
fallbackSettingsBuilder.put("tcp_no_delay", fallbackTcpNoDelay);
boolean fallbackTcpKeepAlive = settings.getAsBoolean("transport.netty.tcp_keep_alive", TcpSettings.TCP_KEEP_ALIVE.get(settings));
fallbackSettingsBuilder.put("tcp_keep_alive", fallbackTcpKeepAlive);
boolean fallbackReuseAddress = settings.getAsBoolean("transport.netty.reuse_address", TcpSettings.TCP_REUSE_ADDRESS.get(settings));
fallbackSettingsBuilder.put("reuse_address", fallbackReuseAddress);
ByteSizeValue fallbackTcpSendBufferSize = settings.getAsBytesSize("transport.netty.tcp_send_buffer_size",
TCP_SEND_BUFFER_SIZE.get(settings));
if (fallbackTcpSendBufferSize.getBytes() >= 0) {
fallbackSettingsBuilder.put("tcp_send_buffer_size", fallbackTcpSendBufferSize);
}
ByteSizeValue fallbackTcpBufferSize = settings.getAsBytesSize("transport.netty.tcp_receive_buffer_size",
TCP_RECEIVE_BUFFER_SIZE.get(settings));
if (fallbackTcpBufferSize.getBytes() >= 0) {
fallbackSettingsBuilder.put("tcp_receive_buffer_size", fallbackTcpBufferSize);
}
return fallbackSettingsBuilder.build();
}
private void createServerBootstrap(String name, Settings settings) {
if (logger.isDebugEnabled()) {
logger.debug("using profile[{}], worker_count[{}], port[{}], bind_host[{}], publish_host[{}], compress[{}], "
+ "connect_timeout[{}], connections_per_node[{}/{}/{}/{}/{}], receive_predictor[{}->{}]",
name, workerCount, settings.get("port"), settings.get("bind_host"), settings.get("publish_host"), compress,
defaultConnectionProfile.getConnectTimeout(),
defaultConnectionProfile.getNumConnectionsPerType(TransportRequestOptions.Type.RECOVERY),
defaultConnectionProfile.getNumConnectionsPerType(TransportRequestOptions.Type.BULK),
defaultConnectionProfile.getNumConnectionsPerType(TransportRequestOptions.Type.REG),
defaultConnectionProfile.getNumConnectionsPerType(TransportRequestOptions.Type.STATE),
defaultConnectionProfile.getNumConnectionsPerType(TransportRequestOptions.Type.PING),
receivePredictorMin, receivePredictorMax);
}
final ThreadFactory workerFactory = daemonThreadFactory(this.settings, TRANSPORT_SERVER_WORKER_THREAD_NAME_PREFIX, name);
final ServerBootstrap serverBootstrap = new ServerBootstrap();
serverBootstrap.group(new NioEventLoopGroup(workerCount, workerFactory));
serverBootstrap.channel(NioServerSocketChannel.class);
serverBootstrap.childHandler(getServerChannelInitializer(name, settings));
serverBootstrap.childOption(ChannelOption.TCP_NODELAY, TCP_NO_DELAY.get(settings));
serverBootstrap.childOption(ChannelOption.SO_KEEPALIVE, TCP_KEEP_ALIVE.get(settings));
final ByteSizeValue tcpSendBufferSize = TCP_SEND_BUFFER_SIZE.getDefault(settings);
if (tcpSendBufferSize != null && tcpSendBufferSize.getBytes() > 0) {
serverBootstrap.childOption(ChannelOption.SO_SNDBUF, Math.toIntExact(tcpSendBufferSize.getBytes()));
}
final ByteSizeValue tcpReceiveBufferSize = TCP_RECEIVE_BUFFER_SIZE.getDefault(settings);
if (tcpReceiveBufferSize != null && tcpReceiveBufferSize.getBytes() > 0) {
serverBootstrap.childOption(ChannelOption.SO_RCVBUF, Math.toIntExact(tcpReceiveBufferSize.bytesAsInt()));
}
serverBootstrap.option(ChannelOption.RCVBUF_ALLOCATOR, recvByteBufAllocator);
serverBootstrap.childOption(ChannelOption.RCVBUF_ALLOCATOR, recvByteBufAllocator);
final boolean reuseAddress = TCP_REUSE_ADDRESS.get(settings);
serverBootstrap.option(ChannelOption.SO_REUSEADDR, reuseAddress);
serverBootstrap.childOption(ChannelOption.SO_REUSEADDR, reuseAddress);
serverBootstrap.validate();
serverBootstraps.put(name, serverBootstrap);
}
protected ChannelHandler getServerChannelInitializer(String name, Settings settings) {
return new ServerChannelInitializer(name, settings);
}
protected ChannelHandler getClientChannelInitializer() {
return new ClientChannelInitializer();
}
protected final void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
final Throwable unwrapped = ExceptionsHelper.unwrap(cause, ElasticsearchException.class);
final Throwable t = unwrapped != null ? unwrapped : cause;
onException(ctx.channel(), t instanceof Exception ? (Exception) t : new ElasticsearchException(t));
}
@Override
public long serverOpen() {
Netty4OpenChannelsHandler channels = serverOpenChannels;
return channels == null ? 0 : channels.numberOfOpenChannels();
}
@Override
protected NodeChannels connectToChannels(DiscoveryNode node, ConnectionProfile profile) {
final Channel[] channels = new Channel[profile.getNumConnections()];
final NodeChannels nodeChannels = new NodeChannels(node, channels, profile, transportServiceAdapter::onConnectionClosed);
boolean success = false;
try {
final TimeValue connectTimeout;
final Bootstrap bootstrap;
final TimeValue defaultConnectTimeout = defaultConnectionProfile.getConnectTimeout();
if (profile.getConnectTimeout() != null && profile.getConnectTimeout().equals(defaultConnectTimeout) == false) {
bootstrap = this.bootstrap.clone(this.bootstrap.config().group());
bootstrap.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, Math.toIntExact(profile.getConnectTimeout().millis()));
connectTimeout = profile.getConnectTimeout();
} else {
connectTimeout = defaultConnectTimeout;
bootstrap = this.bootstrap;
}
final ArrayList<ChannelFuture> connections = new ArrayList<>(channels.length);
final InetSocketAddress address = node.getAddress().address();
for (int i = 0; i < channels.length; i++) {
connections.add(bootstrap.connect(address));
}
final Iterator<ChannelFuture> iterator = connections.iterator();
try {
for (int i = 0; i < channels.length; i++) {
assert iterator.hasNext();
ChannelFuture future = iterator.next();
future.awaitUninterruptibly((long) (connectTimeout.millis() * 1.5));
if (!future.isSuccess()) {
throw new ConnectTransportException(node, "connect_timeout[" + connectTimeout + "]", future.cause());
}
channels[i] = future.channel();
channels[i].closeFuture().addListener(new ChannelCloseListener(node));
}
assert iterator.hasNext() == false : "not all created connection have been consumed";
} catch (final RuntimeException e) {
for (final ChannelFuture future : Collections.unmodifiableList(connections)) {
FutureUtils.cancel(future);
if (future.channel() != null && future.channel().isOpen()) {
try {
future.channel().close();
} catch (Exception inner) {
e.addSuppressed(inner);
}
}
}
throw e;
}
success = true;
} finally {
if (success == false) {
try {
nodeChannels.close();
} catch (IOException e) {
logger.trace("exception while closing channels", e);
}
}
}
return nodeChannels;
}
private class ChannelCloseListener implements ChannelFutureListener {
private final DiscoveryNode node;
private ChannelCloseListener(DiscoveryNode node) {
this.node = node;
}
@Override
public void operationComplete(final ChannelFuture future) throws Exception {
onChannelClosed(future.channel());
NodeChannels nodeChannels = connectedNodes.get(node);
if (nodeChannels != null && nodeChannels.hasChannel(future.channel())) {
threadPool.generic().execute(() -> disconnectFromNode(node, future.channel(), "channel closed event"));
}
}
}
@Override
protected void sendMessage(Channel channel, BytesReference reference, ActionListener<Channel> listener) {
final ChannelFuture future = channel.writeAndFlush(Netty4Utils.toByteBuf(reference));
future.addListener(f -> {
if (f.isSuccess()) {
listener.onResponse(channel);
} else {
Throwable cause = f.cause();
// If the Throwable is an Error something has gone very wrong and Netty4MessageChannelHandler is
// going to cause that to bubble up and kill the process.
if (cause instanceof Exception) {
listener.onFailure((Exception) cause);
}
}
});
}
@Override
protected void closeChannels(final List<Channel> channels) throws IOException {
Netty4Utils.closeChannels(channels);
}
@Override
protected InetSocketAddress getLocalAddress(Channel channel) {
return (InetSocketAddress) channel.localAddress();
}
@Override
protected Channel bind(String name, InetSocketAddress address) {
return serverBootstraps.get(name).bind(address).syncUninterruptibly().channel();
}
ScheduledPing getPing() {
return scheduledPing;
}
@Override
protected boolean isOpen(Channel channel) {
return channel.isOpen();
}
@Override
@SuppressForbidden(reason = "debug")
protected void stopInternal() {
Releasables.close(serverOpenChannels, () -> {
final List<Tuple<String, Future<?>>> serverBootstrapCloseFutures = new ArrayList<>(serverBootstraps.size());
for (final Map.Entry<String, ServerBootstrap> entry : serverBootstraps.entrySet()) {
serverBootstrapCloseFutures.add(
Tuple.tuple(entry.getKey(), entry.getValue().config().group().shutdownGracefully(0, 5, TimeUnit.SECONDS)));
}
for (final Tuple<String, Future<?>> future : serverBootstrapCloseFutures) {
future.v2().awaitUninterruptibly();
if (!future.v2().isSuccess()) {
logger.debug(
(Supplier<?>) () -> new ParameterizedMessage(
"Error closing server bootstrap for profile [{}]", future.v1()), future.v2().cause());
}
}
serverBootstraps.clear();
if (bootstrap != null) {
bootstrap.config().group().shutdownGracefully(0, 5, TimeUnit.SECONDS).awaitUninterruptibly();
bootstrap = null;
}
});
}
protected class ClientChannelInitializer extends ChannelInitializer<Channel> {
@Override
protected void initChannel(Channel ch) throws Exception {
ch.pipeline().addLast("size", new Netty4SizeHeaderFrameDecoder());
// using a dot as a prefix means this cannot come from any settings parsed
ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, ".client"));
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
Netty4Utils.maybeDie(cause);
super.exceptionCaught(ctx, cause);
}
}
protected class ServerChannelInitializer extends ChannelInitializer<Channel> {
protected final String name;
protected final Settings settings;
protected ServerChannelInitializer(String name, Settings settings) {
this.name = name;
this.settings = settings;
}
@Override
protected void initChannel(Channel ch) throws Exception {
ch.pipeline().addLast("open_channels", Netty4Transport.this.serverOpenChannels);
ch.pipeline().addLast("size", new Netty4SizeHeaderFrameDecoder());
ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, name));
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
Netty4Utils.maybeDie(cause);
super.exceptionCaught(ctx, cause);
}
}
}