/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.internal.cluster.impl;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.Joiner;
import com.hazelcast.config.Config;
import com.hazelcast.core.Member;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.instance.Node;
import com.hazelcast.instance.NodeExtension;
import com.hazelcast.internal.cluster.ClusterService;
import com.hazelcast.internal.cluster.impl.operations.MergeClustersOp;
import com.hazelcast.internal.cluster.impl.operations.SplitBrainMergeValidationOp;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.nio.Connection;
import com.hazelcast.spi.NodeEngine;
import com.hazelcast.spi.Operation;
import com.hazelcast.spi.OperationService;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.util.Clock;
import com.hazelcast.util.EmptyStatement;
import com.hazelcast.util.FutureUtil;
import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.spi.impl.OperationResponseHandlerFactory.createEmptyResponseHandler;
import static com.hazelcast.util.FutureUtil.waitWithDeadline;
public abstract class AbstractJoiner implements Joiner {
private static final int JOIN_TRY_COUNT = 5;
private static final long MIN_WAIT_SECONDS_BEFORE_JOIN = 10;
private static final long SPLIT_BRAIN_CONN_TIMEOUT = 5000;
private static final long SPLIT_BRAIN_SLEEP_TIME = 10;
private static final int SPLIT_BRAIN_JOIN_CHECK_TIMEOUT_SECONDS = 10;
private static final int SPLIT_BRAIN_MERGE_TIMEOUT_SECONDS = 30;
protected final Config config;
protected final Node node;
protected final ClusterServiceImpl clusterService;
protected final ILogger logger;
// map blacklisted endpoints. Boolean value represents if blacklist is temporary or permanent
protected final ConcurrentMap<Address, Boolean> blacklistedAddresses = new ConcurrentHashMap<Address, Boolean>();
protected final ClusterJoinManager clusterJoinManager;
private final AtomicLong joinStartTime = new AtomicLong(Clock.currentTimeMillis());
private final AtomicInteger tryCount = new AtomicInteger(0);
private final long mergeNextRunDelayMs;
private volatile Address targetAddress;
private final FutureUtil.ExceptionHandler splitBrainMergeExceptionHandler = new FutureUtil.ExceptionHandler() {
@Override
public void handleException(Throwable throwable) {
if (throwable instanceof MemberLeftException) {
return;
}
logger.warning("Problem while waiting for merge operation result", throwable);
}
};
public AbstractJoiner(Node node) {
this.node = node;
this.logger = node.loggingService.getLogger(getClass());
this.config = node.config;
this.clusterService = node.getClusterService();
this.clusterJoinManager = clusterService.getClusterJoinManager();
mergeNextRunDelayMs = node.getProperties().getMillis(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS);
}
@Override
public final long getStartTime() {
return joinStartTime.get();
}
@Override
public void setTargetAddress(Address targetAddress) {
this.targetAddress = targetAddress;
}
@Override
public void blacklist(Address address, boolean permanent) {
logger.info(address + " is added to the blacklist.");
blacklistedAddresses.putIfAbsent(address, permanent);
}
@Override
public boolean unblacklist(Address address) {
if (blacklistedAddresses.remove(address, Boolean.FALSE)) {
logger.info(address + " is removed from the blacklist.");
return true;
}
return false;
}
@Override
public boolean isBlacklisted(Address address) {
return blacklistedAddresses.containsKey(address);
}
public abstract void doJoin();
@Override
public final void join() {
blacklistedAddresses.clear();
doJoin();
if (!clusterService.isJoined() && shouldResetHotRestartData()) {
logger.warning("Could not join to the cluster because hot restart data must be reset.");
node.getNodeExtension().getInternalHotRestartService().resetHotRestartData();
reset();
doJoin();
}
postJoin();
}
protected final boolean shouldRetry() {
return node.isRunning() && !clusterService.isJoined() && !shouldResetHotRestartData();
}
private boolean shouldResetHotRestartData() {
final NodeExtension nodeExtension = node.getNodeExtension();
return !nodeExtension.isStartCompleted()
&& nodeExtension.getInternalHotRestartService().isMemberExcluded(node.getThisAddress(), node.getThisUuid());
}
private void postJoin() {
blacklistedAddresses.clear();
if (logger.isFineEnabled()) {
logger.fine("PostJoin master: " + clusterService.getMasterAddress() + ", isMaster: " + clusterService.isMaster());
}
if (!node.isRunning()) {
return;
}
if (tryCount.incrementAndGet() == JOIN_TRY_COUNT) {
logger.warning("Join try count exceed limit, setting this node as master!");
clusterJoinManager.setThisMemberAsMaster();
}
if (clusterService.isJoined()) {
if (!clusterService.isMaster()) {
ensureConnectionToAllMembers();
}
if (clusterService.getSize() == 1) {
clusterService.printMemberList();
}
}
}
private void ensureConnectionToAllMembers() {
boolean allConnected = false;
if (clusterService.isJoined()) {
logger.fine("Waiting for all connections");
int connectAllWaitSeconds = node.getProperties().getSeconds(GroupProperty.CONNECT_ALL_WAIT_SECONDS);
int checkCount = 0;
while (checkCount++ < connectAllWaitSeconds && !allConnected) {
try {
//noinspection BusyWait
TimeUnit.SECONDS.sleep(1);
} catch (InterruptedException ignored) {
EmptyStatement.ignore(ignored);
}
allConnected = true;
Collection<Member> members = clusterService.getMembers();
for (Member member : members) {
if (!member.localMember() && node.connectionManager.getOrConnect(member.getAddress()) == null) {
allConnected = false;
if (logger.isFineEnabled()) {
logger.fine("Not-connected to " + member.getAddress());
}
}
}
}
}
}
protected final long getMaxJoinMillis() {
return node.getProperties().getMillis(GroupProperty.MAX_JOIN_SECONDS);
}
protected final long getMaxJoinTimeToMasterNode() {
// max join time to found master node,
// this should be significantly greater than MAX_WAIT_SECONDS_BEFORE_JOIN property
// hence we add 10 seconds more
return TimeUnit.SECONDS.toMillis(MIN_WAIT_SECONDS_BEFORE_JOIN)
+ node.getProperties().getMillis(GroupProperty.MAX_WAIT_SECONDS_BEFORE_JOIN);
}
@SuppressWarnings({"checkstyle:returncount", "checkstyle:npathcomplexity"})
protected boolean shouldMerge(SplitBrainJoinMessage joinMessage) {
if (logger.isFineEnabled()) {
logger.fine("Should merge to: " + joinMessage);
}
if (joinMessage == null) {
return false;
}
if (!checkValidSplitBrainJoinMessage(joinMessage)) {
return false;
}
if (!checkCompatibleSplitBrainJoinMessage(joinMessage)) {
return false;
}
if (!checkMergeTargetIsNotMember(joinMessage)) {
return false;
}
if (!checkClusterStateAllowsJoinBeforeMerge(joinMessage)) {
return false;
}
if (!checkMembershipIntersectionSetEmpty(joinMessage)) {
return false;
}
int targetDataMemberCount = joinMessage.getDataMemberCount();
int currentDataMemberCount = clusterService.getSize(DATA_MEMBER_SELECTOR);
if (targetDataMemberCount > currentDataMemberCount) {
logger.info("We are merging to " + joinMessage.getAddress()
+ ", because their data member count is bigger than ours ["
+ (targetDataMemberCount + " > " + currentDataMemberCount) + ']');
return true;
}
if (targetDataMemberCount < currentDataMemberCount) {
logger.info(joinMessage.getAddress() + " should merge to us "
+ ", because our data member count is bigger than theirs ["
+ (currentDataMemberCount + " > " + targetDataMemberCount) + ']');
return false;
}
// targetDataMemberCount == currentDataMemberCount
if (shouldMergeTo(node.getThisAddress(), joinMessage.getAddress())) {
logger.info("We are merging to " + joinMessage.getAddress()
+ ", both have the same data member count: " + currentDataMemberCount);
return true;
}
logger.info(joinMessage.getAddress() + " should merge to us "
+ ", both have the same data member count: " + currentDataMemberCount);
return false;
}
private boolean checkValidSplitBrainJoinMessage(SplitBrainJoinMessage joinMessage) {
try {
if (!clusterJoinManager.validateJoinMessage(joinMessage)) {
logger.fine("Cannot process split brain merge message from " + joinMessage.getAddress()
+ ", since join-message could not be validated.");
return false;
}
} catch (Exception e) {
logger.fine("failure during validating join message", e);
return false;
}
return true;
}
private boolean checkCompatibleSplitBrainJoinMessage(SplitBrainJoinMessage joinMessage) {
if (!clusterService.getClusterVersion().equals(joinMessage.getClusterVersion())) {
if (logger.isFineEnabled()) {
logger.fine("Should not merge to " + joinMessage.getAddress() + " because other cluster version is "
+ joinMessage.getClusterVersion() + " while this cluster version is "
+ clusterService.getClusterVersion());
}
return false;
}
return true;
}
private boolean checkMergeTargetIsNotMember(SplitBrainJoinMessage joinMessage) {
if (clusterService.getMember(joinMessage.getAddress()) != null) {
if (logger.isFineEnabled()) {
logger.fine("Should not merge to " + joinMessage.getAddress()
+ ", because it is already member of this cluster.");
}
return false;
}
return true;
}
private boolean checkClusterStateAllowsJoinBeforeMerge(SplitBrainJoinMessage joinMessage) {
ClusterState clusterState = clusterService.getClusterState();
if (!clusterState.isJoinAllowed()) {
if (logger.isFineEnabled()) {
logger.fine("Should not merge to " + joinMessage.getAddress() + ", because this cluster is in "
+ clusterState + " state.");
}
return false;
}
return true;
}
private boolean checkMembershipIntersectionSetEmpty(SplitBrainJoinMessage joinMessage) {
Collection<Address> targetMemberAddresses = joinMessage.getMemberAddresses();
Address joinMessageAddress = joinMessage.getAddress();
if (targetMemberAddresses.contains(node.getThisAddress())) {
// Join request is coming from master of the split and it thinks that I am its member.
// This is partial split case and we want to convert it to a full split.
// So it should remove me from its cluster.
MembersViewMetadata membersViewMetadata = new MembersViewMetadata(joinMessageAddress, joinMessage.getUuid(),
joinMessageAddress, joinMessage.getMemberListVersion());
clusterService.sendExplicitSuspicion(membersViewMetadata);
logger.info(node.getThisAddress() + " CANNOT merge to " + joinMessageAddress
+ ", because it thinks this-node as its member.");
return false;
}
Collection<Address> thisMemberAddresses = clusterService.getMemberAddresses();
for (Address address : thisMemberAddresses) {
if (targetMemberAddresses.contains(address)) {
logger.info(node.getThisAddress() + " CANNOT merge to " + joinMessageAddress
+ ", because it thinks " + address + " as its member. "
+ "But " + address + " is member of this cluster.");
return false;
}
}
return true;
}
/**
* Determines whether this address should merge to target address and called when two sides are equal on all aspects.
* This is a pure function that must produce always the same output when called with the same parameters.
* This logic should not be changed, otherwise compatibility will be broken.
*
* @param thisAddress this address
* @param targetAddress target address
* @return true if this address should merge to target, false otherwise
*/
private static boolean shouldMergeTo(Address thisAddress, Address targetAddress) {
String thisAddressStr = "[" + thisAddress.getHost() + "]:" + thisAddress.getPort();
String targetAddressStr = "[" + targetAddress.getHost() + "]:" + targetAddress.getPort();
if (thisAddressStr.equals(targetAddressStr)) {
throw new IllegalArgumentException("Addresses should be different! This: "
+ thisAddress + ", Target: " + targetAddress);
}
// Since strings are guaranteed to be different, result will always be non-zero.
int result = thisAddressStr.compareTo(targetAddressStr);
return result > 0;
}
protected SplitBrainJoinMessage sendSplitBrainJoinMessage(Address target) {
if (logger.isFineEnabled()) {
logger.fine("Sending SplitBrainJoinMessage to " + target);
}
Connection conn = node.connectionManager.getOrConnect(target, true);
long timeout = SPLIT_BRAIN_CONN_TIMEOUT;
while (conn == null) {
timeout -= SPLIT_BRAIN_SLEEP_TIME;
if (timeout < 0) {
return null;
}
try {
//noinspection BusyWait
Thread.sleep(SPLIT_BRAIN_SLEEP_TIME);
} catch (InterruptedException e) {
EmptyStatement.ignore(e);
return null;
}
conn = node.connectionManager.getConnection(target);
}
NodeEngine nodeEngine = node.nodeEngine;
Future future = nodeEngine.getOperationService().createInvocationBuilder(ClusterServiceImpl.SERVICE_NAME,
new SplitBrainMergeValidationOp(node.createSplitBrainJoinMessage()), target)
.setTryCount(1).invoke();
try {
return (SplitBrainJoinMessage) future.get(SPLIT_BRAIN_JOIN_CHECK_TIMEOUT_SECONDS, TimeUnit.SECONDS);
} catch (TimeoutException e) {
logger.fine("Timeout during join check!", e);
} catch (Exception e) {
logger.warning("Error during join check!", e);
}
return null;
}
@Override
public void reset() {
joinStartTime.set(Clock.currentTimeMillis());
tryCount.set(0);
}
protected void startClusterMerge(final Address targetAddress) {
ClusterServiceImpl clusterService = node.clusterService;
if (!prepareClusterState(clusterService)) {
return;
}
OperationService operationService = node.nodeEngine.getOperationService();
Collection<Member> memberList = clusterService.getMembers();
Collection<Future> futures = new ArrayList<Future>(memberList.size());
for (Member member : memberList) {
if (!member.localMember()) {
Operation op = new MergeClustersOp(targetAddress);
Future<Object> future =
operationService.invokeOnTarget(ClusterServiceImpl.SERVICE_NAME, op, member.getAddress());
futures.add(future);
}
}
waitWithDeadline(futures, SPLIT_BRAIN_MERGE_TIMEOUT_SECONDS, TimeUnit.SECONDS, splitBrainMergeExceptionHandler);
Operation op = new MergeClustersOp(targetAddress);
op.setNodeEngine(node.nodeEngine).setService(clusterService).setOperationResponseHandler(createEmptyResponseHandler());
operationService.run(op);
}
/**
* Prepares the cluster state for cluster merge by changing it to {@link ClusterState#FROZEN}. It expects the current
* cluster state to be {@link ClusterState#ACTIVE} or {@link ClusterState#NO_MIGRATION}.
* The method will keep trying to change the cluster state until {@link GroupProperty#MERGE_NEXT_RUN_DELAY_SECONDS} elapses
* or until the sleep period between two attempts has been interrupted.
*
* @param clusterService the cluster service used for state change
* @return true if the cluster state was successfully prepared
*/
private boolean prepareClusterState(ClusterServiceImpl clusterService) {
if (!preCheckClusterState(clusterService)) {
return false;
}
long until = Clock.currentTimeMillis() + mergeNextRunDelayMs;
while (Clock.currentTimeMillis() < until) {
ClusterState clusterState = clusterService.getClusterState();
if (!clusterState.isMigrationAllowed() && !clusterState.isJoinAllowed()
&& clusterState != ClusterState.IN_TRANSITION) {
return true;
}
// If state is IN_TRANSITION, then skip trying to change state.
// Otherwise transaction will print noisy warning logs.
if (clusterState != ClusterState.IN_TRANSITION) {
try {
clusterService.changeClusterState(ClusterState.FROZEN);
return true;
} catch (Exception e) {
String error = e.getClass().getName() + ": " + e.getMessage();
logger.warning("While changing cluster state to FROZEN! " + error);
}
}
try {
TimeUnit.SECONDS.sleep(1);
} catch (InterruptedException e) {
logger.warning("Interrupted while preparing cluster for merge!");
// restore interrupt flag
Thread.currentThread().interrupt();
return false;
}
}
logger.warning("Could not change cluster state to FROZEN in time. "
+ "Postponing merge process until next attempt.");
return false;
}
/**
* Returns true if the current cluster state allows join; either
* {@link ClusterState#ACTIVE} or {@link ClusterState#NO_MIGRATION}.
*/
private boolean preCheckClusterState(final ClusterService clusterService) {
final ClusterState initialState = clusterService.getClusterState();
if (!initialState.isJoinAllowed()) {
logger.warning("Could not prepare cluster state since it has been changed to " + initialState);
return false;
}
return true;
}
protected Address getTargetAddress() {
final Address target = targetAddress;
targetAddress = null;
return target;
}
}