/*
* Copyright 1999-2015 dangdang.com.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* </p>
*/
package com.dangdang.ddframe.job.lite.internal.failover;
import com.dangdang.ddframe.job.lite.internal.schedule.JobRegistry;
import com.dangdang.ddframe.job.lite.internal.schedule.JobScheduleController;
import com.dangdang.ddframe.job.lite.internal.sharding.ShardingNode;
import com.dangdang.ddframe.job.lite.internal.sharding.ShardingService;
import com.dangdang.ddframe.job.lite.internal.storage.JobNodeStorage;
import com.dangdang.ddframe.job.lite.internal.storage.LeaderExecutionCallback;
import com.dangdang.ddframe.job.reg.base.CoordinatorRegistryCenter;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* 作业失效转移服务.
*
* @author zhangliang
*/
@Slf4j
public final class FailoverService {
private final String jobName;
private final JobNodeStorage jobNodeStorage;
private final ShardingService shardingService;
public FailoverService(final CoordinatorRegistryCenter regCenter, final String jobName) {
this.jobName = jobName;
jobNodeStorage = new JobNodeStorage(regCenter, jobName);
shardingService = new ShardingService(regCenter, jobName);
}
/**
* 设置失效的分片项标记.
*
* @param item 崩溃的作业项
*/
public void setCrashedFailoverFlag(final int item) {
if (!isFailoverAssigned(item)) {
jobNodeStorage.createJobNodeIfNeeded(FailoverNode.getItemsNode(item));
}
}
private boolean isFailoverAssigned(final Integer item) {
return jobNodeStorage.isJobNodeExisted(FailoverNode.getExecutionFailoverNode(item));
}
/**
* 如果需要失效转移, 则执行作业失效转移.
*/
public void failoverIfNecessary() {
if (needFailover()) {
jobNodeStorage.executeInLeader(FailoverNode.LATCH, new FailoverLeaderExecutionCallback());
}
}
private boolean needFailover() {
return jobNodeStorage.isJobNodeExisted(FailoverNode.ITEMS_ROOT) && !jobNodeStorage.getJobNodeChildrenKeys(FailoverNode.ITEMS_ROOT).isEmpty()
&& !JobRegistry.getInstance().isJobRunning(jobName);
}
/**
* 更新执行完毕失效转移的分片项状态.
*
* @param items 执行完毕失效转移的分片项集合
*/
public void updateFailoverComplete(final Collection<Integer> items) {
for (int each : items) {
jobNodeStorage.removeJobNodeIfExisted(FailoverNode.getExecutionFailoverNode(each));
}
}
/**
* 获取作业服务器的失效转移分片项集合.
*
* @param jobInstanceId 作业运行实例主键
* @return 作业失效转移的分片项集合
*/
public List<Integer> getFailoverItems(final String jobInstanceId) {
List<String> items = jobNodeStorage.getJobNodeChildrenKeys(ShardingNode.ROOT);
List<Integer> result = new ArrayList<>(items.size());
for (String each : items) {
int item = Integer.parseInt(each);
String node = FailoverNode.getExecutionFailoverNode(item);
if (jobNodeStorage.isJobNodeExisted(node) && jobInstanceId.equals(jobNodeStorage.getJobNodeDataDirectly(node))) {
result.add(item);
}
}
Collections.sort(result);
return result;
}
/**
* 获取运行在本作业服务器的失效转移分片项集合.
*
* @return 运行在本作业服务器的失效转移分片项集合
*/
public List<Integer> getLocalFailoverItems() {
if (JobRegistry.getInstance().isShutdown(jobName)) {
return Collections.emptyList();
}
return getFailoverItems(JobRegistry.getInstance().getJobInstance(jobName).getJobInstanceId());
}
/**
* 获取运行在本作业服务器的被失效转移的序列号.
*
* @return 运行在本作业服务器的被失效转移的序列号
*/
public List<Integer> getLocalTakeOffItems() {
List<Integer> shardingItems = shardingService.getLocalShardingItems();
List<Integer> result = new ArrayList<>(shardingItems.size());
for (int each : shardingItems) {
if (jobNodeStorage.isJobNodeExisted(FailoverNode.getExecutionFailoverNode(each))) {
result.add(each);
}
}
return result;
}
/**
* 删除作业失效转移信息.
*/
public void removeFailoverInfo() {
for (String each : jobNodeStorage.getJobNodeChildrenKeys(ShardingNode.ROOT)) {
jobNodeStorage.removeJobNodeIfExisted(FailoverNode.getExecutionFailoverNode(Integer.parseInt(each)));
}
}
class FailoverLeaderExecutionCallback implements LeaderExecutionCallback {
@Override
public void execute() {
if (JobRegistry.getInstance().isShutdown(jobName) || !needFailover()) {
return;
}
int crashedItem = Integer.parseInt(jobNodeStorage.getJobNodeChildrenKeys(FailoverNode.ITEMS_ROOT).get(0));
log.debug("Failover job '{}' begin, crashed item '{}'", jobName, crashedItem);
jobNodeStorage.fillEphemeralJobNode(FailoverNode.getExecutionFailoverNode(crashedItem), JobRegistry.getInstance().getJobInstance(jobName).getJobInstanceId());
jobNodeStorage.removeJobNodeIfExisted(FailoverNode.getItemsNode(crashedItem));
// TODO 不应使用triggerJob, 而是使用executor统一调度
JobScheduleController jobScheduleController = JobRegistry.getInstance().getJobScheduleController(jobName);
if (null != jobScheduleController) {
jobScheduleController.triggerJob();
}
}
}
}