/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.internal.cluster.impl.operations;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.core.Member;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.cluster.ClusterService;
import com.hazelcast.internal.cluster.impl.ClusterDataSerializerHook;
import com.hazelcast.internal.cluster.impl.ClusterServiceImpl;
import com.hazelcast.internal.cluster.impl.SplitBrainJoinMessage;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.spi.impl.NodeEngineImpl;
import java.io.IOException;
/**
* Validate whether clusters may merge to recover from a split brain, based on configuration & cluster version.
*/
public class SplitBrainMergeValidationOp extends AbstractJoinOperation {
private SplitBrainJoinMessage request;
private SplitBrainJoinMessage response;
private transient Member suspectedCaller;
public SplitBrainMergeValidationOp() {
}
public SplitBrainMergeValidationOp(SplitBrainJoinMessage request) {
this.request = request;
}
@Override
public void run() {
ClusterServiceImpl service = getService();
NodeEngineImpl nodeEngine = (NodeEngineImpl) getNodeEngine();
Node node = nodeEngine.getNode();
if (!preCheck(node)) {
return;
}
if (!masterCheck()) {
return;
}
if (request != null) {
ILogger logger = getLogger();
try {
if (service.getClusterJoinManager().validateJoinMessage(request)) {
// Validate other cluster's major.minor version is same as this cluster.
// This way we ensure that all nodes of both clusters will be able to operate normally
// in the unified cluster which will be at the same cluster version as the current subclusters.
// If we only validated node codebase versions of master nodes, then we might end up with a
// unified cluster but some members might be kicked out due to incompatibility. For example
// assuming a 3.8.0 cluster with 2 nodes at codebase versions 3.8.0 & 3.9.0 (master) and another
// cluster with 3x3.9.0 nodes at cluster version 3.9.0: if we only validated on master nodes' codebase
// version, we would find them to be compatible and let the smaller cluster merge towards the bigger one.
// However we would end up with a cluster at cluster version 3.9.0 (including 4x3.9.0 nodes) and the
// 3.8.0-codebase node would be kicked out of the cluster. To enable the kicked-out node to join again
// the cluster, the user would be forced to upgrade the member to 3.9.0 codebase version.
// The implicit change of cluster version ("sneaky upgrade") and the change in membership would be a
// surprise to users and may cause unexpected issues.
if (service.getClusterVersion().equals(request.getClusterVersion())) {
response = node.createSplitBrainJoinMessage();
} else {
logger.info("Join check from " + getCallerAddress() + " failed validation due to incompatible version,"
+ "remote cluster version is " + request.getClusterVersion() + ", this cluster is "
+ service.getClusterVersion());
}
}
if (logger.isFineEnabled()) {
logger.fine("Returning " + response + " to " + getCallerAddress());
}
} catch (Exception e) {
if (logger.isFineEnabled()) {
logger.fine("Could not validate split-brain join message! -> " + e.getMessage());
}
}
}
}
private boolean masterCheck() {
ILogger logger = getLogger();
ClusterServiceImpl service = getService();
if (service.isMaster()) {
Member existingMember = service.getMembershipManager().getMember(request.getAddress(), request.getUuid());
if (existingMember != null) {
logger.info("Removing " + suspectedCaller + ", since it thinks it's already split from this cluster "
+ "and looking to merge.");
suspectedCaller = existingMember;
}
return true;
} else {
// ping master to check if it's still valid
service.getClusterHeartbeatManager().sendMasterConfirmation();
logger.info("Ignoring join check from " + getCallerAddress()
+ ", because this node is not master...");
return false;
}
}
@Override
public void afterRun() throws Exception {
if (suspectedCaller != null) {
ClusterServiceImpl service = getService();
// I am the master. I can remove the member directly
String reason = "Removing " + suspectedCaller + ", since it thinks it's already split from this cluster "
+ "and looking to merge.";
service.suspectMember(suspectedCaller, reason, true);
}
}
private boolean preCheck(Node node) {
ILogger logger = getLogger();
ClusterService clusterService = node.getClusterService();
if (!clusterService.isJoined()) {
logger.info("Ignoring join check from " + getCallerAddress()
+ ", because this node is not joined to a cluster yet...");
return false;
}
if (!node.isRunning()) {
logger.info("Ignoring join check from " + getCallerAddress() + ", because this node is not active...");
return false;
}
final ClusterState clusterState = clusterService.getClusterState();
if (!clusterState.isJoinAllowed()) {
logger.info("Ignoring join check from " + getCallerAddress() + ", because cluster is in "
+ clusterState + " state ...");
return false;
}
return true;
}
@Override
public Object getResponse() {
return response;
}
@Override
protected void readInternal(final ObjectDataInput in) throws IOException {
request = new SplitBrainJoinMessage();
request.readData(in);
}
@Override
protected void writeInternal(final ObjectDataOutput out) throws IOException {
request.writeData(out);
}
@Override
public int getId() {
return ClusterDataSerializerHook.SPLIT_BRAIN_MERGE_VALIDATION;
}
}