/*
* Copyright 2008-2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.hasor.land.election;
import io.netty.util.Timeout;
import io.netty.util.TimerTask;
import net.hasor.core.EventListener;
import net.hasor.core.Init;
import net.hasor.core.Inject;
import net.hasor.core.InjectSettings;
import net.hasor.core.future.FutureCallback;
import net.hasor.land.bootstrap.LandContext;
import net.hasor.land.domain.ServerStatus;
import net.hasor.land.node.NodeData;
import net.hasor.land.node.Operation;
import net.hasor.land.node.RunLock;
import net.hasor.land.node.Server;
import net.hasor.land.replicator.DataContext;
import net.hasor.land.utils.TermUtils;
import net.hasor.rsf.RsfContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* 选举服务,负责选出 Leader
*
* @version : 2016年09月10日
* @author 赵永春(zyc@hasor.net)
*/
public class ElectionServiceManager implements ElectionService, EventListener<ServerStatus> {
protected Logger logger = LoggerFactory.getLogger(getClass());
@Inject
private Server server;
@Inject
private DataContext dataContext;
@Inject
private LandContext landContext;
@Inject
private RsfContext rsfContext;
private AtomicBoolean landStatus;
//
@InjectSettings("hasor.land.timeout")
private int baseTimeout; // 基准心跳时间
@InjectSettings("hasor.land.leaderHeartbeat")
private int leaderHeartbeat; // Leader 心跳时间
private AtomicBoolean followerTimer; // follower 定时器
private AtomicBoolean candidateTimer; // candidate定时器
private AtomicBoolean leaderTimer; // leader 定时器
//
//
@Init
public void start() {
this.landStatus = new AtomicBoolean(true);
this.followerTimer = new AtomicBoolean(false);
this.candidateTimer = new AtomicBoolean(false);
this.leaderTimer = new AtomicBoolean(false);
//
this.landContext.addStatusListener(this);
//
this.startFollowerTimer();
this.startCandidateTimer();
this.startLeaderTimer();
//
this.server.lockRun(new RunLock() {
public void run(Operation object) {
String selfServerID = landContext.getServerID();
String selfTerm = object.getCurrentTerm();
switchToFollow(object, selfServerID, selfTerm);
}
});
}
//
// --------------------------------------------------------------------------------------------
// .状态切换事件
// 当发生角色转换,负责更新各个定时器状态
public void onEvent(String event, ServerStatus eventData) {
this.followerTimer.set(false);
this.candidateTimer.set(false);
this.leaderTimer.set(false);
//
logger.info("Land[Status] - switchTo -> {}.", eventData);
if (eventData == ServerStatus.Follower) {
this.followerTimer.set(true);
return;
}
// .一旦成为候选人那么马上进行选举
if (eventData == ServerStatus.Candidate) {
this.candidateTimer.set(true);
return;
}
// .一旦成为 Leader 马上发送一个 term 以建立权威,然后通过心跳维持权威
if (eventData == ServerStatus.Leader) {
this.leaderTimer.set(true);
return;
}
}
// --------------------------------------------------------------------------------------------
// .follower
// startFollowerTimer 启动定时器
// processFollowerTimer 定时器的循环调用
// processFollower follower 逻辑代码
private void startFollowerTimer() {
if (!this.followerTimer.compareAndSet(false, true)) {
this.logger.error("Land[Follower] - followerTimer -> already started");
return;
}
this.logger.info("Land[Follower] - start followerTimer.");
final long lastLeaderHeartbeat = this.server.getLastHeartbeat();
this.landContext.atTime(new TimerTask() {
public void run(Timeout timeout) throws Exception {
processFollowerTimer(lastLeaderHeartbeat);
}
}, genTimeout());
}
private void processFollowerTimer(long lastLeaderHeartbeat) {
// .如果系统退出,那么结束定时器循环
if (!this.landStatus.get()) {
return;
}
// .执行 Follower 任务
try {
this.processFollower(lastLeaderHeartbeat);
} catch (Exception e) {
logger.error("Land[Follower] - " + e.getMessage(), e);
}
// .重启定时器
final long curLeaderHeartbeat = this.server.getLastHeartbeat();
this.landContext.atTime(new TimerTask() {
public void run(Timeout timeout) throws Exception {
processFollowerTimer(curLeaderHeartbeat);
}
}, genTimeout());
}
private void processFollower(final long lastLeaderHeartbeat) {
/* 确保 lockRun 中的方法在并发场景中是线程安全的 */
this.server.lockRun(new RunLock() {
public void run(Operation object) {
if (!followerTimer.get()) {
return;
}
// .如果已经不在处于追随者,那么放弃后续处理
if (object.getStatus() != ServerStatus.Follower) {
logger.info("Land[Follower] -> server mast be Follower, but ->" + object.getStatus());
return;
}
//
// .判断启动定时器之后是否收到最新的 Leader 心跳 ,如果收到了心跳,那么放弃后续处理维持 Follower 状态
// (新的Leader心跳时间比启动定时器之前心跳时间要新,即为收到了心跳)
boolean leaderHeartbeat = object.getLastHeartbeat() > lastLeaderHeartbeat;
if (leaderHeartbeat) {
printLeader();
return;
}
//
// .确保状态从 Follower 切换到 Candidate
logger.info("Land[Follower] -> initiate the election.");
if (object.getStatus() == ServerStatus.Follower) {
landContext.fireStatus(ServerStatus.Candidate);
}
}
});
//
}
// --------------------------------------------------------------------------------------------
// .candidate
// startCandidateTimer 启动定时器
// processCandidateTimer 定时器的循环调用
// processCandidate candidate 逻辑代码
private void startCandidateTimer() {
if (!this.candidateTimer.compareAndSet(false, true)) {
this.logger.error("Land[Candidate] - candidateTimer -> already started");
return;
}
this.logger.info("Land[Candidate] - start candidateTimer.");
this.landContext.atTime(new TimerTask() {
public void run(Timeout timeout) throws Exception {
processCandidateTimer();
}
}, this.genTimeout());
}
private void processCandidateTimer() {
// .如果系统退出,那么结束定时器循环
if (!this.landStatus.get()) {
return;
}
// .执行 Candidate 任务
try {
this.processCandidate();
} catch (Exception e) {
logger.error("Land[Candidate] - " + e.getMessage(), e);
}
// .重启定时器
this.landContext.atTime(new TimerTask() {
public void run(Timeout timeout) throws Exception {
processCandidateTimer();
}
}, genTimeout());
}
private void processCandidate() {
/* 确保 lockRun 中的方法在并发场景中是线程安全的 */
this.server.lockRun(new RunLock() {
public void run(Operation object) {
if (!candidateTimer.get()) {
return;
}
if (object.getStatus() != ServerStatus.Candidate) {
return;
}
// .候选人会继续保持着当前状态直到以下三件事情之一发生:
// (a) 他自己赢得了这次的选举, -> 成为 Leader
// (b) 其他的服务器成为领导者, -> 成为 Follower
// (c) 一段时间之后没有任何获胜的, -> 重新开始选举
//
//
// .term自增
object.incrementAndGetTerm();
object.clearVoted();
logger.info("Land[Candidate] -> solicit votes , current Trem is {}", object.getCurrentTerm());
//
// .发起选举然后收集选票
List<NodeData> nodeList = object.getOnlineNodes();
for (NodeData nodeData : nodeList) {
// .如果目标是自己,那么直接投给自己
if (nodeData.isSelf()) {
landContext.fireVotedFor(nodeData.getServerID());
object.applyVoted(nodeData.getServerID(), true);
continue;
}
// .征集选票(并发)
nodeData.collectVote(object, dataContext, new FutureCallback<CollectVoteResult>() {
public void completed(CollectVoteResult result) {
doVote(result);
}
public void failed(Throwable ex) {
doFailed(ex);
}
public void cancelled() {
}
});
}
//
//
}
});
}
// --------------------------------------------------------------------------------------------
// .leader
// startLeaderTimer 启动定时器
// processLeaderTimer 定时器的循环调用
// processLeader leader 逻辑代码
private void startLeaderTimer() {
if (!this.leaderTimer.compareAndSet(false, true)) {
this.logger.error("Land[Leader] - leaderTimer -> already started");
return;
}
this.logger.info("Land[Leader] - start leaderTimer.");
this.landContext.atTime(new TimerTask() {
public void run(Timeout timeout) throws Exception {
processLeaderTimer();
}
}, this.leaderHeartbeat);
}
private void processLeaderTimer() {
// .如果系统退出,那么结束定时器循环
if (!this.landStatus.get()) {
return;
}
// .执行 Leader 任务
try {
this.processLeader();
} catch (Exception e) {
logger.error("Land[Leader] - " + e.getMessage(), e);
}
// .重启定时器
this.landContext.atTime(new TimerTask() {
@Override
public void run(Timeout timeout) throws Exception {
processLeaderTimer();
}
}, this.leaderHeartbeat);
}
private void processLeader() {
/* 确保 lockRun 中的方法在并发场景中是线程安全的 */
this.server.lockRun(new RunLock() {
public void run(Operation object) {
if (!leaderTimer.get()) {
return;
}
//
printLeader();
//
// .发送心跳log以维持 Leader 权威
List<NodeData> nodeList = object.getOnlineNodes();
for (NodeData nodeData : nodeList) {
//
// .如果心跳目标是自己,那么直接更新心跳时间
if (nodeData.isSelf()) {
object.newLastLeaderHeartbeat();
continue;
}
// .发送Leader心跳包(并发)
nodeData.leaderHeartbeat(object, dataContext, new FutureCallback<LeaderBeatResult>() {
public void completed(LeaderBeatResult result) {
doHeartbeat(result);
}
public void failed(Throwable ex) {
doFailed(ex);
}
public void cancelled() {
}
});
}
}
});
}
// --------------------------------------------------------------------------------------------
// .拉选票
// collectVote 处理拉票操作
// doVote 投票结果处理
@Override
public CollectVoteResult collectVote(CollectVoteData voteData) {
final String selfTerm = this.server.getCurrentTerm();
final String remoteTerm = voteData.getTerm();
final String remoteServerID = voteData.getServerID();
//
final CollectVoteResult voteResult = new CollectVoteResult();
voteResult.setServerID(this.landContext.getServerID());
voteResult.setRemoteTerm(selfTerm);
//
// .无条件接受来自,自己的邀票
if (this.landContext.getServerID().equals(remoteServerID)) {
logger.info("Land[Vote] -> accept votes from self.");
voteResult.setVoteGranted(true);
return voteResult;
}
//
// .处理拉票操作(线程安全)
this.server.lockRun(new RunLock() {
public void run(Operation object) {
//
// .如果远程的term比自己大,那么成为 Follower
if (TermUtils.gtFirst(selfTerm, remoteTerm)) {
logger.info("Land[Vote] -> accept votes from {}.", remoteServerID);
voteResult.setVoteGranted(true);
switchToFollow(object, remoteServerID, remoteTerm);
return;
}
// .拒绝投给他
voteResult.setVoteGranted(false);
logger.info("Land[Vote] -> reject to {} votes. cause: currentTerm({}) > remoteTerm({})", //
remoteServerID, selfTerm, remoteTerm);
//
}
});
return voteResult;
}
public void doVote(final CollectVoteResult voteData) {
final String remoteTerm = voteData.getRemoteTerm();
final String remoteServerID = voteData.getServerID();
final boolean granted = voteData.isVoteGranted();
//
// .没有赢得选票,如果对方比自己大那么直接转换为 Follower
this.server.lockRun(new RunLock() {
public void run(Operation object) {
String localTerm = object.getCurrentTerm();
boolean gtFirst = TermUtils.gtFirst(localTerm, remoteTerm);
if (!granted && gtFirst) {
logger.info("Land[Vote] -> this server follower to {}. L:R is {}:{}", remoteServerID, localTerm, remoteTerm);
switchToFollow(object, remoteServerID, remoteTerm);
}
//
// .记录选票结果
object.applyVoted(remoteServerID, voteData.isVoteGranted());
}
//
});
// .赢得了选票 -> 计票 -> 尝试成为 Leader
if (granted) {
this.server.lockRun(new RunLock() {
public void run(Operation object) {
// .计票,尝试成为 Leader( 返回true表示赢得了这次的选举 )
if (!isTestToLeader(object))
return;
//
landContext.fireVotedFor(landContext.getServerID());
landContext.fireStatus(ServerStatus.Leader);
logger.info("Land[Vote] -> this server is elected leader.");
}
});
return;
}
}
// --------------------------------------------------------------------------------------------
// .Leader心跳
// leaderHeartbeat Leader进行心跳
// doHeartbeat 心跳结果处理
@Override
public LeaderBeatResult leaderHeartbeat(final LeaderBeatData beatResult) {
//
final String remoteTerm = beatResult.getCurrentTerm();
final String remoteServerID = beatResult.getServerID();
final LeaderBeatResult result = new LeaderBeatResult();
result.setServerID(this.landContext.getServerID());
//
// .更新 term 和 Leadeer 一致
if (remoteServerID.equals(this.server.getVotedFor())) {
this.server.lockRun(new RunLock() {
public void run(Operation object) {
String selfTerm = server.getCurrentTerm();
if (TermUtils.gtFirst(selfTerm, remoteTerm)) {
object.updateTermTo(remoteTerm);
logger.info("Land[Beat] -> follow leader update term to {}.", remoteTerm);
}
object.newLastLeaderHeartbeat();
}
});
result.setAccept(true);
return result;
}
//
// .确定是否是已知的Leader
List<NodeData> allNodes = this.server.getOnlineNodes();
NodeData atNode = null;
for (NodeData nodeData : allNodes) {
if (nodeData.getServerID().equalsIgnoreCase(remoteServerID)) {
atNode = nodeData;
break;
}
}
//
// .未知的 Server 想要成为 Leader 直接决绝。
if (atNode == null) {
result.setAccept(false);
return result;
}
//
// .检查这个 Leader 的 Term 是否够大
this.server.lockRun(new RunLock() {
public void run(Operation object) {
String selfTerm = server.getCurrentTerm();
if (TermUtils.gtFirst(selfTerm, remoteTerm)) {
switchToFollow(object, remoteServerID, remoteTerm);
logger.info("Land[Beat] -> follow the new leader {} , new term is {}", remoteServerID, remoteTerm);
result.setAccept(true);
} else {
logger.info("Land[Beat] -> refused to field {} leader heartbeat. L:R is {}:{}",//
remoteServerID, selfTerm, remoteTerm);
result.setAccept(false);
}
}
});
//
return result;
}
public void doHeartbeat(final LeaderBeatResult leaderBeatResult) {
//
// .更新自己的支持者列表
this.server.lockRun(new RunLock() {
public void run(Operation object) {
object.applyVoted(leaderBeatResult.getServerID(), leaderBeatResult.isAccept());
}
});
// .如果出现拒绝者,那么测试自己是否还有足够的支持者支持自己成为 Leader,如果支持者足够,那么自增 term。
if (!leaderBeatResult.isAccept()) {
this.server.lockRun(new RunLock() {
public void run(Operation object) {
if (isTestToLeader(object)) {
object.incrementAndGetTerm();
logger.info("Land[Beat] -> [{},{}] leader conflict, strengthen shelf. term update to {}",//
leaderBeatResult.getServerID(), landContext.getServerID(), object.getCurrentTerm());
}
}
});
}
return;
}
// --------------------------------------------------------------------------------------------
//
/** 10秒打印一次 Leader 的心跳 */
private long lastPrintLeaderLog;
private void printLeader() {
boolean printLeaderLog = this.lastPrintLeaderLog + 5000L < System.currentTimeMillis();
if (printLeaderLog) {
this.lastPrintLeaderLog = System.currentTimeMillis();
this.logger.info("Land[Leader] -> leader is {} , term is {}", this.server.getVotedFor(), this.server.getCurrentTerm());
}
}
/** 处理异常信息的打印 */
private void doFailed(Throwable ex) {
if (ex.getCause() != null) {
ex = ex.getCause();
}
logger.error(ex.getMessage());
}
/** 生成最长:“n ~ n + (150 ~ 300)” 的一个随机数。用作超时时间 */
public int genTimeout() {
return this.baseTimeout + new Random(System.currentTimeMillis()).nextInt(150) + 300;
}
/** 测试当前服务器是否可以成为 Leader,成为 Leader 的条件是得到半数选票。 */
private boolean isTestToLeader(Operation object) {
List<NodeData> nodeList = object.getOnlineNodes();
int grantedCount = nodeList.size();
int serverCount = 0;
for (NodeData nodeData : nodeList) {
serverCount++;
if (object.testVote(nodeData.getServerID())) {
grantedCount++;
}
}
return grantedCount * 2 > serverCount;
}
/** 成为 Follower 并追随一个 Leader */
public void switchToFollow(Operation object, String targetServer, String remoteTerm) {
object.updateTermTo(remoteTerm);
landContext.fireVotedFor(targetServer);
landContext.fireStatus(ServerStatus.Follower);
object.newLastLeaderHeartbeat();
}
}