/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.discovery.zen.fd; import org.elasticsearch.transport.TransportService; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.discovery.zen.DiscoveryNodesProvider; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.*; import java.io.IOException; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.elasticsearch.transport.TransportRequestOptions.options; /** * A fault detection that pings the master periodically to see if its alive. */ public class MasterFaultDetection extends AbstractComponent { public static interface Listener { void onMasterFailure(DiscoveryNode masterNode, String reason); void onDisconnectedFromMaster(); } private final ThreadPool threadPool; private final TransportService transportService; private final DiscoveryNodesProvider nodesProvider; private final CopyOnWriteArrayList<Listener> listeners = new CopyOnWriteArrayList<Listener>(); private final boolean connectOnNetworkDisconnect; private final TimeValue pingInterval; private final TimeValue pingRetryTimeout; private final int pingRetryCount; // used mainly for testing, should always be true private final boolean registerConnectionListener; private final FDConnectionListener connectionListener; private volatile MasterPinger masterPinger; private final Object masterNodeMutex = new Object(); private volatile DiscoveryNode masterNode; private volatile int retryCount; private final AtomicBoolean notifiedMasterFailure = new AtomicBoolean(); public MasterFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, DiscoveryNodesProvider nodesProvider) { super(settings); this.threadPool = threadPool; this.transportService = transportService; this.nodesProvider = nodesProvider; this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true); this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1)); this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30)); this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3); this.registerConnectionListener = componentSettings.getAsBoolean("register_connection_listener", true); logger.debug("[master] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount); this.connectionListener = new FDConnectionListener(); if (registerConnectionListener) { transportService.addConnectionListener(connectionListener); } transportService.registerHandler(MasterPingRequestHandler.ACTION, new MasterPingRequestHandler()); } public DiscoveryNode masterNode() { return this.masterNode; } public void addListener(Listener listener) { listeners.add(listener); } public void removeListener(Listener listener) { listeners.remove(listener); } public void restart(DiscoveryNode masterNode, String reason) { synchronized (masterNodeMutex) { if (logger.isDebugEnabled()) { logger.debug("[master] restarting fault detection against master [{}], reason [{}]", masterNode, reason); } innerStop(); innerStart(masterNode); } } public void start(final DiscoveryNode masterNode, String reason) { synchronized (masterNodeMutex) { if (logger.isDebugEnabled()) { logger.debug("[master] starting fault detection against master [{}], reason [{}]", masterNode, reason); } innerStart(masterNode); } } private void innerStart(final DiscoveryNode masterNode) { this.masterNode = masterNode; this.retryCount = 0; this.notifiedMasterFailure.set(false); // try and connect to make sure we are connected try { transportService.connectToNode(masterNode); } catch (final Exception e) { // notify master failure (which stops also) and bail.. notifyMasterFailure(masterNode, "failed to perform initial connect [" + e.getMessage() + "]"); return; } if (masterPinger != null) { masterPinger.stop(); } this.masterPinger = new MasterPinger(); // start the ping process threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); } public void stop(String reason) { synchronized (masterNodeMutex) { if (masterNode != null) { if (logger.isDebugEnabled()) { logger.debug("[master] stopping fault detection against master [{}], reason [{}]", masterNode, reason); } } innerStop(); } } private void innerStop() { // also will stop the next ping schedule this.retryCount = 0; if (masterPinger != null) { masterPinger.stop(); masterPinger = null; } this.masterNode = null; } public void close() { stop("closing"); this.listeners.clear(); transportService.removeConnectionListener(connectionListener); transportService.removeHandler(MasterPingRequestHandler.ACTION); } private void handleTransportDisconnect(DiscoveryNode node) { synchronized (masterNodeMutex) { if (!node.equals(this.masterNode)) { return; } if (connectOnNetworkDisconnect) { try { transportService.connectToNode(node); // if all is well, make sure we restart the pinger if (masterPinger != null) { masterPinger.stop(); } this.masterPinger = new MasterPinger(); threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); } catch (Exception e) { logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); notifyMasterFailure(masterNode, "transport disconnected (with verified connect)"); } } else { logger.trace("[master] [{}] transport disconnected", node); notifyMasterFailure(node, "transport disconnected"); } } } private void notifyDisconnectedFromMaster() { threadPool.generic().execute(new Runnable() { @Override public void run() { for (Listener listener : listeners) { listener.onDisconnectedFromMaster(); } } }); } private void notifyMasterFailure(final DiscoveryNode masterNode, final String reason) { if (notifiedMasterFailure.compareAndSet(false, true)) { threadPool.generic().execute(new Runnable() { @Override public void run() { for (Listener listener : listeners) { listener.onMasterFailure(masterNode, reason); } } }); stop("master failure, " + reason); } } private class FDConnectionListener implements TransportConnectionListener { @Override public void onNodeConnected(DiscoveryNode node) { } @Override public void onNodeDisconnected(DiscoveryNode node) { handleTransportDisconnect(node); } } private class MasterPinger implements Runnable { private volatile boolean running = true; public void stop() { this.running = false; } @Override public void run() { if (!running) { // return and don't spawn... return; } final DiscoveryNode masterToPing = masterNode; if (masterToPing == null) { // master is null, should not happen, but we are still running, so reschedule threadPool.schedule(pingInterval, ThreadPool.Names.SAME, MasterPinger.this); return; } transportService.sendRequest(masterToPing, MasterPingRequestHandler.ACTION, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withHighType().withTimeout(pingRetryTimeout), new BaseTransportResponseHandler<MasterPingResponseResponse>() { @Override public MasterPingResponseResponse newInstance() { return new MasterPingResponseResponse(); } @Override public void handleResponse(MasterPingResponseResponse response) { if (!running) { return; } // reset the counter, we got a good result MasterFaultDetection.this.retryCount = 0; // check if the master node did not get switched on us..., if it did, we simply return with no reschedule if (masterToPing.equals(MasterFaultDetection.this.masterNode())) { if (!response.connectedToMaster) { logger.trace("[master] [{}] does not have us registered with it...", masterToPing); notifyDisconnectedFromMaster(); } // we don't stop on disconnection from master, we keep pinging it threadPool.schedule(pingInterval, ThreadPool.Names.SAME, MasterPinger.this); } } @Override public void handleException(TransportException exp) { if (!running) { return; } if (exp instanceof ConnectTransportException) { // ignore this one, we already handle it by registering a connection listener return; } synchronized (masterNodeMutex) { // check if the master node did not get switched on us... if (masterToPing.equals(MasterFaultDetection.this.masterNode())) { if (exp.getCause() instanceof NoLongerMasterException) { logger.debug("[master] pinging a master {} that is no longer a master", masterNode); notifyMasterFailure(masterToPing, "no longer master"); return; } else if (exp.getCause() instanceof NotMasterException) { logger.debug("[master] pinging a master {} that is not the master", masterNode); notifyMasterFailure(masterToPing, "not master"); return; } else if (exp.getCause() instanceof NodeDoesNotExistOnMasterException) { logger.debug("[master] pinging a master {} but we do not exists on it, act as if its master failure", masterNode); notifyMasterFailure(masterToPing, "do not exists on master, act as master failure"); return; } int retryCount = ++MasterFaultDetection.this.retryCount; logger.trace("[master] failed to ping [{}], retry [{}] out of [{}]", exp, masterNode, retryCount, pingRetryCount); if (retryCount >= pingRetryCount) { logger.debug("[master] failed to ping [{}], tried [{}] times, each with maximum [{}] timeout", masterNode, pingRetryCount, pingRetryTimeout); // not good, failure notifyMasterFailure(masterToPing, "failed to ping, tried [" + pingRetryCount + "] times, each with maximum [" + pingRetryTimeout + "] timeout"); } else { // resend the request, not reschedule, rely on send timeout transportService.sendRequest(masterToPing, MasterPingRequestHandler.ACTION, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withHighType().withTimeout(pingRetryTimeout), this); } } } } @Override public String executor() { return ThreadPool.Names.SAME; } }); } } static class NoLongerMasterException extends ElasticSearchIllegalStateException { @Override public Throwable fillInStackTrace() { return null; } } static class NotMasterException extends ElasticSearchIllegalStateException { @Override public Throwable fillInStackTrace() { return null; } } static class NodeDoesNotExistOnMasterException extends ElasticSearchIllegalStateException { @Override public Throwable fillInStackTrace() { return null; } } private class MasterPingRequestHandler extends BaseTransportRequestHandler<MasterPingRequest> { public static final String ACTION = "discovery/zen/fd/masterPing"; @Override public MasterPingRequest newInstance() { return new MasterPingRequest(); } @Override public void messageReceived(MasterPingRequest request, TransportChannel channel) throws Exception { DiscoveryNodes nodes = nodesProvider.nodes(); // check if we are really the same master as the one we seemed to be think we are // this can happen if the master got "kill -9" and then another node started using the same port if (!request.masterNodeId.equals(nodes.localNodeId())) { throw new NotMasterException(); } // if we are no longer master, fail... if (!nodes.localNodeMaster()) { throw new NoLongerMasterException(); } if (!nodes.nodeExists(request.nodeId)) { throw new NodeDoesNotExistOnMasterException(); } // send a response, and note if we are connected to the master or not channel.sendResponse(new MasterPingResponseResponse(nodes.nodeExists(request.nodeId))); } @Override public String executor() { return ThreadPool.Names.SAME; } } private static class MasterPingRequest extends TransportRequest { private String nodeId; private String masterNodeId; private MasterPingRequest() { } private MasterPingRequest(String nodeId, String masterNodeId) { this.nodeId = nodeId; this.masterNodeId = masterNodeId; } @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); nodeId = in.readString(); masterNodeId = in.readString(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeString(nodeId); out.writeString(masterNodeId); } } private static class MasterPingResponseResponse extends TransportResponse { private boolean connectedToMaster; private MasterPingResponseResponse() { } private MasterPingResponseResponse(boolean connectedToMaster) { this.connectedToMaster = connectedToMaster; } @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); connectedToMaster = in.readBoolean(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeBoolean(connectedToMaster); } } }