package com.vip.saturn.job.sharding.service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import com.vip.saturn.job.integrate.service.ReportAlarmService;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.leader.LeaderLatch;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vip.saturn.job.sharding.entity.Executor;
import com.vip.saturn.job.sharding.entity.Shard;
import com.vip.saturn.job.sharding.node.SaturnExecutorsNode;
/**
*
* @author hebelala
*/
public class NamespaceShardingService {
static Logger log = LoggerFactory.getLogger(NamespaceShardingService.class);
private static final int LOAD_LEVEL_DEFAULT = 1;
private CuratorFramework curatorFramework;
private AtomicInteger shardingCount;
private AtomicBoolean needAllSharding;
private ExecutorService executorService;
private String namespace;
private String hostValue;
private NamespaceShardingContentService namespaceShardingContentService;
private ReportAlarmService reportAlarmService;
private Object shutdownLock = new Object();
public NamespaceShardingService(CuratorFramework curatorFramework, String hostValue, ReportAlarmService reportAlarmService) {
this.curatorFramework = curatorFramework;
this.hostValue = hostValue;
this.reportAlarmService = reportAlarmService;
this.shardingCount = new AtomicInteger(0);
this.needAllSharding = new AtomicBoolean(false);
this.executorService = newSingleThreadExecutor();
this.namespace = curatorFramework.getNamespace();
this.namespaceShardingContentService = new NamespaceShardingContentService(curatorFramework);
}
private ExecutorService newSingleThreadExecutor() {
return Executors.newSingleThreadExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
return new Thread(r, namespace + "-" + r.getClass().getSimpleName());
}
});
}
private abstract class AbstractAsyncShardingTask implements Runnable {
protected abstract void logStartInfo();
/**
* Special enable jobs that need to be notified prior, not consider whether whose shards are changed.<br/>
* By default, notify enable jobs whose shards are changed.
*/
protected List<String> notifyEnableJobsPrior() {
return null;
}
@Override
public void run() {
logStartInfo();
boolean isAllShardingTask = this instanceof ExecuteAllShardingTask;
try {
// 如果当前变为非leader,则直接返回
if(!isLeadership()) {
return;
}
// 如果需要全量分片,且当前线程不是全量分片线程,则直接返回,没必要做分片
if(needAllSharding.get() && !isAllShardingTask) {
log.info("the {} will be ignored, because there will be {}", this.getClass().getSimpleName(), ExecuteAllShardingTask.class.getSimpleName());
return;
}
List<String> allJobs = getAllJobs();
List<String> allEnableJobs = getAllEnableJobs(allJobs);
List<Executor> oldOnlineExecutorList = getLastOnlineExecutorList();
List<Executor> customLastOnlineExecutorList = customLastOnlineExecutorList();
List<Executor> lastOnlineExecutorList = customLastOnlineExecutorList == null ? copyOnlineExecutorList(oldOnlineExecutorList) : customLastOnlineExecutorList;
List<Shard> shardList = new ArrayList<>();
// 摘取
if(pick(allJobs, allEnableJobs, shardList, lastOnlineExecutorList)) {
// 放回
putBackBalancing(allEnableJobs, shardList, lastOnlineExecutorList);
// 如果当前变为非leader,则返回
if (!isLeadership()) {
return;
}
// 持久化分片结果
if(shardingContentIsChanged(oldOnlineExecutorList, lastOnlineExecutorList)) {
namespaceShardingContentService.persistDirectly(lastOnlineExecutorList);
}
// notify the shards-changed jobs of all enable jobs.
Map<String, Map<String, List<Integer>>> enabledAndShardsChangedJobShardContent = getEnabledAndShardsChangedJobShardContent(isAllShardingTask, allEnableJobs, oldOnlineExecutorList, lastOnlineExecutorList);
namespaceShardingContentService.persistJobsNecessaryInTransaction(enabledAndShardsChangedJobShardContent);
// sharding count ++
increaseShardingCount();
}
} catch (Throwable t) {
log.error(t.getMessage(), t);
if(!isAllShardingTask) { // 如果当前不是全量分片,则需要全量分片来拯救异常
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
} else { // 如果当前是全量分片,则告警并关闭当前服务,重选leader来做事情
if(reportAlarmService != null) {
try {
reportAlarmService.allShardingError(namespace, hostValue);
} catch (Throwable t2) {
log.error(t2.getMessage(), t2);
}
}
try {
shutdown();
} catch (Throwable t3) {
log.error(t3.getMessage(), t3);
}
}
} finally {
if(isAllShardingTask) { // 如果是全量分片,不再进行全量分片
needAllSharding.set(false);
}
shardingCount.decrementAndGet();
}
}
private boolean shardingContentIsChanged(List<Executor> oldOnlineExecutorList, List<Executor> lastOnlineExecutorList) {
return !namespaceShardingContentService.toShardingContent(oldOnlineExecutorList).equals(namespaceShardingContentService.toShardingContent(lastOnlineExecutorList));
}
private List<Executor> copyOnlineExecutorList(List<Executor> oldOnlineExecutorList) {
List<Executor> newOnlineExecutorList = new ArrayList<>();
for (Executor oldExecutor : oldOnlineExecutorList) {
Executor newExecutor = new Executor();
newExecutor.setTotalLoadLevel(oldExecutor.getTotalLoadLevel());
newExecutor.setIp(oldExecutor.getIp());
newExecutor.setExecutorName(oldExecutor.getExecutorName());
if (oldExecutor.getJobNameList() != null) {
newExecutor.setJobNameList(new ArrayList<String>());
for (String jobName : oldExecutor.getJobNameList()) {
newExecutor.getJobNameList().add(jobName);
}
}
if (oldExecutor.getShardList() != null) {
newExecutor.setShardList(new ArrayList<Shard>());
for (Shard oldShard : oldExecutor.getShardList()) {
Shard newShard = new Shard();
newShard.setItem(oldShard.getItem());
newShard.setJobName(oldShard.getJobName());
newShard.setLoadLevel(oldShard.getLoadLevel());
newExecutor.getShardList().add(newShard);
}
}
newOnlineExecutorList.add(newExecutor);
}
return newOnlineExecutorList;
}
/**
* 修正lastOnlineExecutorList中的jobNameList
*/
protected boolean fixJobNameList(List<Executor> lastOnlineExecutorList, String jobName) throws Exception {
boolean fixed = false;
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if (executor.getJobNameList() == null) {
executor.setJobNameList(new ArrayList<String>());
}
List<String> jobNameList = executor.getJobNameList();
String jobServersExecutorStatusNodePath = SaturnExecutorsNode.getJobServersExecutorStatusNodePath(jobName, executor.getExecutorName());
if (curatorFramework.checkExists().forPath(jobServersExecutorStatusNodePath) != null) {
if (!jobNameList.contains(jobName)) {
jobNameList.add(jobName);
fixed = true;
}
} else {
if (jobNameList.contains(jobName)) {
jobNameList.remove(jobName);
fixed = true;
}
}
}
return fixed;
}
private void increaseShardingCount() throws Exception {
Integer _shardingCount = 1;
if (null != curatorFramework.checkExists().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH)) {
byte[] shardingCountData = curatorFramework.getData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH);
if(shardingCountData != null) {
try {
_shardingCount = Integer.parseInt(new String(shardingCountData, "UTF-8")) + 1;
} catch (NumberFormatException e) {
log.error(e.getMessage(), e);
}
}
curatorFramework.setData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes("UTF-8"));
} else {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes("UTF-8"));
}
}
/**
* Get the jobs, that are enabled, and whose shards are changed. Specially, return all enabled jobs when the current thread is all-shard-task<br/>
* Return the jobs and their shardContent.
*/
private Map<String, Map<String, List<Integer>>> getEnabledAndShardsChangedJobShardContent(boolean isAllShardingTask, List<String> allEnableJobs, List<Executor> oldOnlineExecutorList, List<Executor> lastOnlineExecutorList) throws Exception {
Map<String, Map<String, List<Integer>>> jobShardContent = new HashMap<>();
if (isAllShardingTask) {
for (String enableJob : allEnableJobs) {
Map<String, List<Integer>> lastShardingItems = namespaceShardingContentService.getShardingItems(lastOnlineExecutorList, enableJob);
jobShardContent.put(enableJob, lastShardingItems);
}
return jobShardContent;
}
List<String> enableJobsPrior = notifyEnableJobsPrior();
for (String enableJob : allEnableJobs) {
Map<String, List<Integer>> lastShardingItems = namespaceShardingContentService.getShardingItems(lastOnlineExecutorList, enableJob);
// notify prior jobs that are in all enable jobs
if(enableJobsPrior != null && enableJobsPrior.contains(enableJob)) {
jobShardContent.put(enableJob, lastShardingItems);
continue;
}
Map<String, List<Integer>> oldShardingItems = namespaceShardingContentService.getShardingItems(oldOnlineExecutorList, enableJob);
// just compare whether or not contains the same executorName, and it's shardList
boolean isChanged = false;
Iterator<Map.Entry<String, List<Integer>>> oldIterator = oldShardingItems.entrySet().iterator();
wl_loop:
while (oldIterator.hasNext()) {
Map.Entry<String, List<Integer>> next = oldIterator.next();
String executorName = next.getKey();
if(!lastShardingItems.containsKey(executorName)) {
isChanged = true;
break;
}
List<Integer> shards = next.getValue();
List<Integer> newShard = lastShardingItems.get(executorName);
if (shards == null && newShard != null || shards != null && newShard == null) {
isChanged = true;
break;
}
if (shards != null && newShard != null) {
for (Integer shard : shards) {
if (!newShard.contains(shard)) {
isChanged = true;
break wl_loop;
}
}
}
}
if (!isChanged) {
Iterator<Map.Entry<String, List<Integer>>> newIterator = lastShardingItems.entrySet().iterator();
wl_loop2:
while (newIterator.hasNext()) {
Map.Entry<String, List<Integer>> next = newIterator.next();
String executorName = next.getKey();
if(!oldShardingItems.containsKey(executorName)) {
isChanged = true;
break;
}
List<Integer> shards = next.getValue();
List<Integer> oldShard = oldShardingItems.get(executorName);
if (shards == null && oldShard != null || shards != null && oldShard == null) {
isChanged = true;
break;
}
if (shards != null && oldShard != null) {
for (Integer shard : shards) {
if (!oldShard.contains(shard)) {
isChanged = true;
break wl_loop2;
}
}
}
}
}
if (isChanged) {
jobShardContent.put(enableJob, lastShardingItems);
}
}
return jobShardContent;
}
protected boolean isLocalMode(String jobName) throws Exception {
String localNodePath = SaturnExecutorsNode.getJobConfigLocalModeNodePath(jobName);
if(curatorFramework.checkExists().forPath(localNodePath) != null) {
byte[] data = curatorFramework.getData().forPath(localNodePath);
if(data != null) {
return Boolean.valueOf(new String(data, "UTF-8"));
}
}
return false;
}
protected int getShardingTotalCount(String jobName) throws Exception {
int shardingTotalCount = 0;
String jobConfigShardingTotalCountNodePath = SaturnExecutorsNode.getJobConfigShardingTotalCountNodePath(jobName);
if(curatorFramework.checkExists().forPath(jobConfigShardingTotalCountNodePath) != null) {
byte[] shardingTotalCountData = curatorFramework.getData().forPath(jobConfigShardingTotalCountNodePath);
if (shardingTotalCountData != null) {
try {
shardingTotalCount = Integer.parseInt(new String(shardingTotalCountData, "UTF-8"));
} catch (NumberFormatException e) {
log.error(e.getMessage(), e);
}
}
}
return shardingTotalCount;
}
protected int getLoadLevel(String jobName) {
int loadLevel = LOAD_LEVEL_DEFAULT;
try {
String jobConfigLoadLevelNodePath = SaturnExecutorsNode.getJobConfigLoadLevelNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigLoadLevelNodePath) != null) {
byte[] loadLevelData = curatorFramework.getData().forPath(jobConfigLoadLevelNodePath);
if (loadLevelData != null) {
loadLevel = Integer.parseInt(new String(loadLevelData, "UTF-8"));
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return loadLevel;
}
/**
* 获取Executor集合,默认从sharding/content获取
*/
private List<Executor> getLastOnlineExecutorList() throws Exception {
return namespaceShardingContentService.getExecutorList();
}
/**
* Custom the lastOnlineExecutorList, attention, cannot return null
*/
protected List<Executor> customLastOnlineExecutorList() throws Exception {
return null;
}
/**
* 摘取
* @param allJobs 该域下所有作业
* @param allEnableJobs 该域下所有启用的作业
* @param shardList 默认为空集合
* @param lastOnlineExecutorList 默认为当前存储的数据,如果不想使用存储数据,请重写{@link #customLastOnlineExecutorList()}}方法
* @return true摘取成功;false摘取失败,不需要继续下面的逻辑
*/
protected abstract boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception;
/**
* 按照loadLevel降序排序,如果loadLevel相同,按照作业名降序排序
*/
protected void sortShardList(List<Shard> shardList) {
Collections.sort(shardList, new Comparator<Shard>() {
@Override
public int compare(Shard o1, Shard o2) {
int loadLevelSub = o2.getLoadLevel() - o1.getLoadLevel();
return loadLevelSub == 0 ? o2.getJobName().compareTo(o1.getJobName()) : loadLevelSub;
}
});
}
private List<Executor> getNotDockerExecutors(List<Executor> lastOnlineExecutorList) throws Exception {
List<Executor> notDockerExecutors = new ArrayList<>();
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
String executorName = executor.getExecutorName();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executorName)) == null) {
notDockerExecutors.add(executor);
}
}
return notDockerExecutors;
}
protected void putBackBalancing(List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
if(lastOnlineExecutorList.isEmpty()) {
log.warn("Unnecessary to put shards back to executors balanced because of no executor");
return;
}
sortShardList(shardList);
// 获取非容器executor
List<Executor> notDockerExecutors = getNotDockerExecutors(lastOnlineExecutorList);
// 获取shardList中的作业能够被接管的executors
Map<String, List<Executor>> notDockerExecutorsMapByJob = new HashMap<>();
Map<String, List<Executor>> lastOnlineExecutorListMapByJob = new HashMap<>();
// 是否为本地模式作业的映射
Map<String, Boolean> localModeMap = new HashMap<>();
// 是否配置优先节点的作业的映射
Map<String, Boolean> preferListIsConfiguredMap = new HashMap<>();
// 优先节点的作业的映射
Map<String, List<String>> preferListConfiguredMap = new HashMap<>();
// 是否使用非优先节点的作业的映射
Map<String, Boolean> useDispreferListMap = new HashMap<>();
Iterator<Shard> iterator0 = shardList.iterator();
while(iterator0.hasNext()) {
String jobName = iterator0.next().getJobName();
if(!notDockerExecutorsMapByJob.containsKey(jobName)) {
notDockerExecutorsMapByJob.put(jobName, filterExecutorsByJob(notDockerExecutors, jobName));
}
if(!lastOnlineExecutorListMapByJob.containsKey(jobName)) {
lastOnlineExecutorListMapByJob.put(jobName, filterExecutorsByJob(lastOnlineExecutorList, jobName));
}
if(!localModeMap.containsKey(jobName)) {
localModeMap.put(jobName, isLocalMode(jobName));
}
if(!preferListIsConfiguredMap.containsKey(jobName)) {
preferListIsConfiguredMap.put(jobName, preferListIsConfigured(jobName));
}
if(!preferListConfiguredMap.containsKey(jobName)) {
preferListConfiguredMap.put(jobName, getPreferListConfigured(jobName));
}
if(!useDispreferListMap.containsKey(jobName)) {
useDispreferListMap.put(jobName, useDispreferList(jobName));
}
}
// 整体算法放回算法:拿取Shard,放进负荷最小的executor
// 1、放回localMode的Shard
// 如果配置了preferList,则选取preferList中的executor。 如果preferList中的executor都挂了,则不转移;否则,选取没有接管该作业的executor列表的loadLevel最小的一个。
// 如果没有配置preferList,则选取没有接管该作业的executor列表的loadLevel最小的一个。
Iterator<Shard> shardIterator = shardList.iterator();
while(shardIterator.hasNext()) {
Shard shard = shardIterator.next();
String jobName = shard.getJobName();
if(localModeMap.get(jobName)) {
if(preferListIsConfiguredMap.get(jobName)) {
List<String> preferListConfigured = preferListConfiguredMap.get(jobName);
if (!preferListConfigured.isEmpty()) {
List<Executor> preferExecutorList = new ArrayList<>();
List<Executor> lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName);
for (int i = 0; i < lastOnlineExecutorListByJob.size(); i++) {
Executor executor = lastOnlineExecutorListByJob.get(i);
if (preferListConfigured.contains(executor.getExecutorName())) {
preferExecutorList.add(executor);
}
}
if (!preferExecutorList.isEmpty()) {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(preferExecutorList, jobName);
putShardIntoExecutor(shard, executor);
}
}
} else {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(notDockerExecutorsMapByJob.get(jobName), jobName);
putShardIntoExecutor(shard, executor);
}
shardIterator.remove();
}
}
// 2、放回配置了preferList的Shard
Iterator<Shard> shardIterator2 = shardList.iterator();
while(shardIterator2.hasNext()) {
Shard shard = shardIterator2.next();
String jobName = shard.getJobName();
if(preferListIsConfiguredMap.get(jobName)) { // fix, preferList为空不能作为判断是否配置preferList的依据,比如说配置了容器资源,但是全部下线了。
List<String> preferList = preferListConfiguredMap.get(jobName);
List<Executor> preferExecutorList = new ArrayList<>();
List<Executor> lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName);
for(int i=0; i<lastOnlineExecutorListByJob.size(); i++) {
Executor executor = lastOnlineExecutorListByJob.get(i);
if (preferList.contains(executor.getExecutorName())) {
preferExecutorList.add(executor);
}
}
// 如果preferList的Executor都offline,则放回到全部online的Executor中某一个。如果是这种情况,则后续再操作,避免不均衡的情况
// 如果存在preferExecutor,择优放回
if(!preferExecutorList.isEmpty()) {
Executor executor = getExecutorWithMinLoadLevel(preferExecutorList);
putShardIntoExecutor(shard, executor);
shardIterator2.remove();
} else{ // 如果不存在preferExecutor
// 如果“只使用preferExecutor”,则丢弃;否则,等到后续(在第3步)进行放回操作,避免不均衡的情况
if(!useDispreferListMap.get(jobName)) {
shardIterator2.remove();
}
}
}
}
// 3、放回没有配置preferList的Shard
Iterator<Shard> shardIterator3 = shardList.iterator();
while(shardIterator3.hasNext()) {
Shard shard = shardIterator3.next();
Executor executor = getExecutorWithMinLoadLevel(notDockerExecutorsMapByJob.get(shard.getJobName()));
putShardIntoExecutor(shard, executor);
shardIterator3.remove();
}
}
/**
* 是否使用非preferList<br>
* 1、存在结点,并且该结点值为false,返回false;<br>
* 2、其他情况,返回true
*/
protected boolean useDispreferList(String jobName) {
try {
String jobConfigUseDispreferListNodePath = SaturnExecutorsNode.getJobConfigUseDispreferListNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigUseDispreferListNodePath) != null) {
byte[] useDispreferListData = curatorFramework.getData().forPath(jobConfigUseDispreferListNodePath);
if (useDispreferListData != null && !Boolean.valueOf(new String(useDispreferListData, "UTF-8"))) {
return false;
}
}
return true;
} catch (Exception e) {
log.error(e.getMessage(), e);
return true;
}
}
private Executor getExecutorWithMinLoadLevel(List<Executor> executorList) {
Executor minLoadLevelExecutor = null;
for(int i=0; i<executorList.size(); i++) {
Executor executor = executorList.get(i);
if(minLoadLevelExecutor == null || minLoadLevelExecutor.getTotalLoadLevel() > executor.getTotalLoadLevel()) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
private Executor getExecutorWithMinLoadLevelAndNoThisJob(List<Executor> executorList, String jobName) {
Executor minLoadLevelExecutor = null;
for(int i=0; i<executorList.size(); i++) {
Executor executor = executorList.get(i);
List<Shard> shardList = executor.getShardList();
boolean containThisJob = false;
for(int j=0; j<shardList.size(); j++) {
Shard shard = shardList.get(j);
if(shard.getJobName().equals(jobName)) {
containThisJob = true;
break;
}
}
if(!containThisJob && (minLoadLevelExecutor == null || minLoadLevelExecutor.getTotalLoadLevel() > executor.getTotalLoadLevel())) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
private void putShardIntoExecutor(Shard shard, Executor executor) {
if(executor != null) {
if(isIn(shard, executor.getShardList())) {
log.error("The shard({}-{}) is running in the executor of {}, cannot be put again", shard.getJobName(), shard.getItem(), executor.getExecutorName());
} else {
executor.getShardList().add(shard);
executor.setTotalLoadLevel(executor.getTotalLoadLevel() + shard.getLoadLevel());
}
} else {
log.info("No executor to take over the shard: {}-{}", shard.getJobName(), shard.getItem());
}
}
/**
* 获取该域下的所有作业
*/
private List<String> getAllJobs() throws Exception {
List<String> allJob = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
}
List<String> tmp = curatorFramework.getChildren().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
if(tmp != null) {
allJob.addAll(tmp);
}
return allJob;
}
/**
* 获取该域下的所有enable的作业
*/
protected List<String> getAllEnableJobs(List<String> allJob) throws Exception {
List<String> allEnableJob = new ArrayList<>();
for(int i=0; i<allJob.size(); i++) {
String job = allJob.get(i);
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigEnableNodePath(job)) != null) {
byte[] enableData = curatorFramework.getData().forPath(SaturnExecutorsNode.getJobConfigEnableNodePath(job));
if(enableData != null && Boolean.valueOf(new String(enableData, "UTF-8"))) {
allEnableJob.add(job);
}
}
}
return allEnableJob;
}
protected boolean isIn(Shard shard, List<Shard> shardList) {
for(int i=0; i<shardList.size(); i++) {
Shard tmp = shardList.get(i);
if (tmp.getJobName().equals(shard.getJobName()) && tmp.getItem() == shard.getItem()) {
return true;
}
}
return false;
}
protected boolean preferListIsConfigured(String jobName) throws Exception {
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName)) != null) {
byte[] preferListData = curatorFramework.getData().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName));
if(preferListData != null) {
return new String(preferListData, "UTF-8").trim().length() > 0;
}
}
return false;
}
/**
* 获取配置态的preferList,即使配置的executor不存在,也会返回。 特别的是,对于docker task,如果存在,才去解析出executor列表。
*/
protected List<String> getPreferListConfigured(String jobName) throws Exception {
List<String> preferList = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName)) != null) {
byte[] preferListData = curatorFramework.getData().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName));
if(preferListData != null) {
List<String> allExistsExecutors = getAllExistingExecutors();
String[] split = new String(preferListData, "UTF-8").split(",");
for(String tmp : split) {
String tmpTrim = tmp.trim();
if(!"".equals(tmpTrim)) {
fillRealPreferListIfIsDockerOrNot(preferList, tmpTrim, allExistsExecutors);
}
}
}
}
return preferList;
}
private List<String> getAllExistingExecutors() throws Exception {
List<String> allExistsExecutors = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) {
List<String> executors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath());
if(executors != null) {
allExistsExecutors.addAll(executors);
}
}
return allExistsExecutors;
}
/**
* 如果prefer不是docker容器,并且preferList不包含,则直接添加;<br>
* 如果prefer是docker容器(以@开头),则prefer为task,获取该task下的所有executor,如果不包含,添加进preferList。
*/
private void fillRealPreferListIfIsDockerOrNot(List<String> preferList, String prefer, List<String> allExistsExecutors) throws Exception {
if(!prefer.startsWith("@")) { // not docker server
if(!preferList.contains(prefer)) {
preferList.add(prefer);
}
} else { // docker server, get the real executorList by task
String task = prefer.substring(1);
for(int i=0; i<allExistsExecutors.size(); i++) {
String executor = allExistsExecutors.get(i);
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executor)) != null) {
byte[] taskData = curatorFramework.getData().forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executor));
if(taskData != null && task.equals(new String(taskData, "UTF-8"))) {
if(!preferList.contains(executor)) {
preferList.add(executor);
}
}
}
}
}
}
protected List<Executor> filterExecutorsByJob(List<Executor> executorList, String jobName) throws Exception {
List<Executor> executorListByJob = new ArrayList<>();
for(int i=0; i<executorList.size(); i++) {
Executor executor = executorList.get(i);
List<String> jobNameList = executor.getJobNameList();
if(jobNameList != null && jobNameList.contains(jobName)) {
executorListByJob.add(executor);
}
}
return executorListByJob;
}
private List<Executor> getPreferListOnlineByJob(String jobName, List<String> preferListConfigured, List<Executor> lastOnlineExecutorList) {
List<Executor> preferListOnlineByJob = new ArrayList<>();
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(preferListConfigured.contains(executor.getExecutorName()) && executor.getJobNameList().contains(jobName)) {
preferListOnlineByJob.add(executor);
}
}
return preferListOnlineByJob;
}
private List<Shard> createShards(String jobName, int number, int loadLevel) {
List<Shard> shards = new ArrayList<>();
for(int i=0; i<number; i++) {
Shard shard = new Shard();
shard.setJobName(jobName);
shard.setItem(i);
shard.setLoadLevel(loadLevel);
shards.add(shard);
}
return shards;
}
protected List<Shard> createShards(String jobName, List<Executor> lastOnlineExecutorList) throws Exception {
List<Shard> shardList = new ArrayList<>();
boolean preferListIsConfigured = preferListIsConfigured(jobName);
List<String> preferListConfigured = getPreferListConfigured(jobName);
List<Executor> preferListOnlineByJob = getPreferListOnlineByJob(jobName, preferListConfigured, lastOnlineExecutorList);
boolean localMode = isLocalMode(jobName);
int shardingTotalCount = getShardingTotalCount(jobName);
int loadLevel = getLoadLevel(jobName);
if(localMode) {
if(preferListIsConfigured) {
// 如果当前存在优先节点在线,则新建在线的优先节点的数量的分片
if(!preferListOnlineByJob.isEmpty()) {
shardList.addAll(createShards(jobName, preferListOnlineByJob.size(), loadLevel));
}
} else {
// 新建在线的executor的数量的分片
shardList.addAll(createShards(jobName, lastOnlineExecutorList.size(), loadLevel));
}
} else {
// 新建shardingTotalCount数量的分片
shardList.addAll(createShards(jobName, shardingTotalCount, loadLevel));
}
return shardList;
}
}
/**
* 域下重排,移除已经存在所有executor,重新获取executors,重新获取作业shards
*/
private class ExecuteAllShardingTask extends AbstractAsyncShardingTask {
@Override
protected void logStartInfo() {
log.info("Execute the {} ", this.getClass().getSimpleName());
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJob, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
// 修正所有executor对所有作业的jobNameList
for(int j=0; j<allJobs.size(); j++) {
fixJobNameList(lastOnlineExecutorList, allJobs.get(j));
}
// 获取该域下所有enable作业的所有分片
for(int i=0; i<allEnableJob.size(); i++) {
String jobName = allEnableJob.get(i);
shardList.addAll(createShards(jobName, lastOnlineExecutorList));
}
return true;
}
@Override
protected List<Executor> customLastOnlineExecutorList() throws Exception {
// 从$SaturnExecutors节点下,获取所有正在运行的Executor
List<Executor> lastOnlineExecutorList = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) {
List<String> zkExecutors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath());
if(zkExecutors != null) {
for(int i=0; i<zkExecutors.size(); i++) {
String zkExecutor = zkExecutors.get(i);
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorIpNodePath(zkExecutor)) != null) {
byte[] ipData = curatorFramework.getData().forPath(SaturnExecutorsNode.getExecutorIpNodePath(zkExecutor));
if(ipData != null) {
Executor executor = new Executor();
executor.setExecutorName(zkExecutor);
executor.setIp(new String(ipData, "UTF-8"));
executor.setShardList(new ArrayList<Shard>());
executor.setJobNameList(new ArrayList<String>());
lastOnlineExecutorList.add(executor);
}
}
}
}
}
return lastOnlineExecutorList;
}
}
/**
* executor上线,仅仅添加executor空壳,如果其不存在;如果已经存在,重新设置下ip,防止ExecuteJobServerOnlineShardingTask先于执行而没设ip<br/>
* 特别的,如果当前没有executor,也就是这是第一台executor上线,则需要域全量分片,因为可能已经有作业处理启用状态了。
*/
private class ExecuteOnlineShardingTask extends AbstractAsyncShardingTask {
private String executorName;
private String ip;
public ExecuteOnlineShardingTask(String executorName, String ip) {
this.executorName = executorName;
this.ip = ip;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} online", this.getClass().getSimpleName(), executorName);
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {//NOSONAR
// 如果没有Executor在运行,则需要进行全量分片
if(lastOnlineExecutorList.isEmpty()) {
log.warn("There are no running executors, need all sharding");
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
return false;
}
Executor theExecutor = null;
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor tmp = lastOnlineExecutorList.get(i);
if(tmp.getExecutorName().equals(executorName)) {
theExecutor = tmp;
break;
}
}
if(theExecutor == null) {
theExecutor = new Executor();
theExecutor.setExecutorName(executorName);
theExecutor.setIp(ip);
theExecutor.setShardList(new ArrayList<Shard>());
theExecutor.setJobNameList(new ArrayList<String>());
lastOnlineExecutorList.add(theExecutor);
} else { // 重新设置下ip
theExecutor.setIp(ip);
}
return true;
}
}
/**
* executor下线,摘取该executor运行的所有非本地模式作业,移除该executor
*/
private class ExecuteOfflineShardingTask extends AbstractAsyncShardingTask {
private String executorName;
public ExecuteOfflineShardingTask(String executorName) {
this.executorName = executorName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} offline", this.getClass().getSimpleName(), executorName);
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
/**
* 摘取下线的executor全部Shard
*/
boolean wasOffline = true;
Iterator<Executor> iterator = lastOnlineExecutorList.iterator();
while(iterator.hasNext()) {
Executor executor = iterator.next();
if(executor.getExecutorName().equals(executorName)) {
wasOffline = false;
iterator.remove();
shardList.addAll(executor.getShardList());
break;
}
}
// 如果该executor实际上已经在此之前下线,则摘取失败
if(wasOffline) {
return false;
}
// 移除本地模式的作业分片
Iterator<Shard> shardIterator = shardList.iterator();
while(shardIterator.hasNext()) {
Shard shard = shardIterator.next();
if(isLocalMode(shard.getJobName())) {
shardIterator.remove();
}
}
return true;
}
}
/**
* 作业启用,获取该作业的shards,注意要过滤不能运行该作业的executors
*/
private class ExecuteJobEnableShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobEnableShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} enable", this.getClass().getSimpleName(), jobName);
}
@Override
protected List<String> notifyEnableJobsPrior() {
List<String> notifyEnableJobsPrior = new ArrayList<>();
notifyEnableJobsPrior.add(jobName);
return notifyEnableJobsPrior;
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
// 移除已经在Executor运行的该作业的所有Shard
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator<Shard> iterator = executor.getShardList().iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
if (jobName.equals(shard.getJobName())) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
}
}
}
// 修正该所有executor的对该作业的jobNameList
fixJobNameList(lastOnlineExecutorList, jobName);
// 获取该作业的Shard
shardList.addAll(createShards(jobName, lastOnlineExecutorList));
return true;
}
}
/**
* 作业禁用,摘取所有executor运行的该作业的shard,注意要相应地减loadLevel,不需要放回
*/
private class ExecuteJobDisableShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobDisableShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} disable", this.getClass().getSimpleName(), jobName);
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) {
// 摘取所有该作业的Shard
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator<Shard> iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if (shard.getJobName().equals(jobName)) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
shardList.add(shard);
}
}
}
// 如果shardList为空,则没必要进行放回等操作,摘取失败
if(shardList.isEmpty()) {
return false;
}
return true;
}
@Override
protected void putBackBalancing(List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) {
// 不做操作
}
}
/**
* 作业重排,移除所有executor的该作业shard,重新获取该作业的shards,finally删除forceShard结点
*/
private class ExecuteJobForceShardShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobForceShardShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} forceShard", this.getClass().getSimpleName(), jobName);
}
@Override
public void run() {
try {
super.run();
} finally {
deleteForceShardNode();
}
}
private void deleteForceShardNode() {
try {
String jobConfigForceShardNodePath = SaturnExecutorsNode.getJobConfigForceShardNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigForceShardNodePath) != null) {
curatorFramework.delete().forPath(jobConfigForceShardNodePath);
}
} catch (Throwable t) {
log.error("delete forceShard node error", t);
}
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
// 移除已经在Executor运行的该作业的所有Shard
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator<Shard> iterator = executor.getShardList().iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
if (jobName.equals(shard.getJobName())) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
}
}
}
// 修正所有executor对该作业的jobNameList
fixJobNameList(lastOnlineExecutorList, jobName);
// 如果该作业是启用状态,则创建该作业的Shard
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createShards(jobName, lastOnlineExecutorList));
}
return true;
}
}
/**
* 作业的executor上线,executor级别平衡摘取,但是只能摘取该作业的shard;添加的新的shard
*/
private class ExecuteJobServerOnlineShardingTask extends AbstractAsyncShardingTask {
private String jobName;
private String executorName;
public ExecuteJobServerOnlineShardingTask(String jobName, String executorName) {
this.jobName = jobName;
this.executorName = executorName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName);
}
private String getExecutorIp() {
String ip = null;
try {
String executorIpNodePath = SaturnExecutorsNode.getExecutorIpNodePath(executorName);
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorIpNodePath(executorName)) != null) {
byte[] ipBytes = curatorFramework.getData().forPath(executorIpNodePath);
if (ipBytes != null) {
ip = new String(ipBytes, "UTF-8");
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return ip;
}
private Shard createLocalShard(List<Executor> lastOnlineExecutorList, int loadLevel) {
List<Integer> itemList = new ArrayList<>();
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
List<Shard> shardList = lastOnlineExecutorList.get(i).getShardList();
for (int j = 0; j < shardList.size(); j++) {
Shard shardAlreadyExists = shardList.get(j);
if (shardAlreadyExists.getJobName().equals(jobName)) {
itemList.add(shardAlreadyExists.getItem());
}
}
}
Collections.sort(itemList, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return 01 - 02;
}
});
int item = 0;
if(!itemList.isEmpty()) {
boolean[] flags = new boolean[itemList.size() + 1];
for(int i=0; i<itemList.size(); i++) {
flags[itemList.get(i)] = true;
}
for(int i=0; i<flags.length; i++) {
if(!flags[i]) {
item = i;
break;
}
}
}
Shard shard = new Shard();
shard.setJobName(jobName);
shard.setItem(item);
shard.setLoadLevel(loadLevel);
return shard;
}
private boolean hasShardRunning(List<Executor> lastOnlineExecutorList) {
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
List<Shard> shardList = lastOnlineExecutorList.get(i).getShardList();
for(int j=0; j<shardList.size(); j++) {
if(shardList.get(j).getJobName().equals(jobName)) {
return true;
}
}
}
return false;
}
private List<Shard> pickShardsRunningInDispreferList(List<String> preferListConfigured, List<Executor> lastOnlineExecutorList) {
List<Shard> shards = new ArrayList<>();
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(!preferListConfigured.contains(executor.getExecutorName())) {
Iterator<Shard> iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if(shard.getJobName().equals(jobName)) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
shards.add(shard);
}
}
}
}
return shards;
}
private int getTotalLoadLevel(List<Shard> shardList, List<Executor> executorList) {
int total = 0;
for(int i=0; i<shardList.size(); i++) {
total += shardList.get(i).getLoadLevel();
}
for(int i=0; i<executorList.size(); i++) {
total += executorList.get(i).getTotalLoadLevel();
}
return total;
}
private void pickBalance(List<Shard> shardList, List<Executor> allExecutors) {
int totalLoalLevel = getTotalLoadLevel(shardList, allExecutors);
int averageTotalLoal = totalLoalLevel / (allExecutors.size());
for (int i = 0; i < allExecutors.size(); i++) {
Executor executor = allExecutors.get(i);
while (true) {
int pickLoadLevel = executor.getTotalLoadLevel() - averageTotalLoal;
if (pickLoadLevel > 0 && !executor.getShardList().isEmpty()) {
Shard pickShard = null;
for (int j = 0; j < executor.getShardList().size(); j++) {
Shard shard = executor.getShardList().get(j);
if (!shard.getJobName().equals(jobName)) { // 如果当前Shard不属于该作业,则不摘取,继续下一个
continue;
}
if (pickShard == null) {
pickShard = shard;
} else {
if (pickShard.getLoadLevel() >= pickLoadLevel) {
if (shard.getLoadLevel() >= pickLoadLevel && shard.getLoadLevel() < pickShard.getLoadLevel()) {
pickShard = shard;
}
} else {
if (shard.getLoadLevel() >= pickLoadLevel) {
pickShard = shard;
} else {
if (shard.getLoadLevel() > pickShard.getLoadLevel()) {
pickShard = shard;
}
}
}
}
}
if (pickShard != null) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - pickShard.getLoadLevel());
executor.getShardList().remove(pickShard);
shardList.add(pickShard);
} else { // 没有符合摘取条件的,无需再选择摘取
break;
}
} else { // 无需再选择摘取
break;
}
}
}
}
private List<Shard> createUnLocalShards(int shardingTotalCount, int loadLevel) {
List<Shard> shards = new ArrayList<>();
for(int i=0; i<shardingTotalCount; i++) {
Shard shard = new Shard();
shard.setJobName(jobName);
shard.setItem(i);
shard.setLoadLevel(loadLevel);
shards.add(shard);
}
return shards;
}
private boolean shardsAllRunningInDispreferList(List<String> preferListConfigured, List<Executor> lastOnlineExecutorList) {
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(preferListConfigured.contains(executorName)) {
List<Shard> shardList = executor.getShardList();
for(int j=0; j<shardList.size(); j++) {
if(shardList.get(j).getJobName().equals(jobName)) {
return false;
}
}
}
}
return true;
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
boolean preferListIsConfigured = preferListIsConfigured(jobName); // 是否配置了preferList
boolean useDispreferList = useDispreferList(jobName); // 是否useDispreferList
List<String> preferListConfigured = getPreferListConfigured(jobName); // 配置态的preferList
boolean localMode = isLocalMode(jobName);
int shardingTotalCount = getShardingTotalCount(jobName);
int loadLevel = getLoadLevel(jobName);
// 很小的可能性:status的新增事件先于ip的新增事件
// 那么,如果lastOnlineExecutorList不包含executorName,则添加一个新的Executor
// 添加当前作业至jobNameList
Executor theExecutor = null;
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(executor.getExecutorName().equals(executorName)) {
theExecutor = executor;
break;
}
}
if(theExecutor == null) {
theExecutor = new Executor();
theExecutor.setExecutorName(executorName);
theExecutor.setIp(getExecutorIp());
theExecutor.setShardList(new ArrayList<Shard>());
theExecutor.setJobNameList(new ArrayList<String>());
theExecutor.setTotalLoadLevel(0);
lastOnlineExecutorList.add(theExecutor);
}
if(!theExecutor.getJobNameList().contains(jobName)) {
theExecutor.getJobNameList().add(jobName);
}
if(localMode) {
if(!preferListIsConfigured || preferListConfigured.contains(executorName)) {
if(allEnableJobs.contains(jobName)) {
shardList.add(createLocalShard(lastOnlineExecutorList, loadLevel));
}
}
} else {
boolean hasShardRunning = hasShardRunning(lastOnlineExecutorList);
if(preferListIsConfigured) {
if(preferListConfigured.contains(executorName)) {
// 如果有分片正在运行,摘取全部运行在非优先节点上的分片,还可以平衡摘取
if(hasShardRunning) {
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
pickBalance(shardList, lastOnlineExecutorList);
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
} else {
if(useDispreferList) {
// 如果有分片正在运行,并且都是运行在非优先节点上,可以平衡摘取分片
// 如果有分片正在运行,并且有运行在优先节点上,则摘取全部运行在非优先节点上的分片,不能再平衡摘取
if(hasShardRunning) {
boolean shardsAllRunningInDispreferList = shardsAllRunningInDispreferList(preferListConfigured, lastOnlineExecutorList);
if(shardsAllRunningInDispreferList) {
pickBalance(shardList, lastOnlineExecutorList);
} else {
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
}
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
} else { // 不能再平衡摘取
// 摘取全部运行在非优先节点上的分片
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
}
}
} else {
// 如果有分片正在运行,则平衡摘取
if(hasShardRunning) {
pickBalance(shardList, lastOnlineExecutorList);
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
}
}
return true;
}
}
/**
* 作业的executor下线,将该executor运行的该作业分片都摘取,如果是本地作业,则移除
*/
private class ExecuteJobServerOfflineShardingTask extends AbstractAsyncShardingTask {
private String jobName;
private String executorName;
@Override
protected void logStartInfo() {
log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName);
}
public ExecuteJobServerOfflineShardingTask(String jobName, String executorName) {
this.jobName = jobName;
this.executorName = executorName;
}
@Override
protected boolean pick(List<String> allJobs, List<String> allEnableJobs, List<Shard> shardList, List<Executor> lastOnlineExecutorList) throws Exception {
boolean localMode = isLocalMode(jobName);
for(int i=0; i<lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(executor.getExecutorName().equals(executorName)) {
Iterator<Shard> iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if(shard.getJobName().equals(jobName)) {
if(!localMode) {
shardList.add(shard);
}
iterator.remove();
}
}
executor.getJobNameList().remove(jobName);
break;
}
}
return true;
}
}
/**
* 进行全量分片
* @throws Exception
*/
public void asyncShardingWhenExecutorAll() throws Exception {
if(isLeadership()) {
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
try {
String shardAllAtOnce = SaturnExecutorsNode.getExecutorShardingNodePath("shardAllAtOnce");
if (curatorFramework.checkExists().forPath(shardAllAtOnce) != null) {
curatorFramework.delete().deletingChildrenIfNeeded().forPath(shardAllAtOnce);
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}
/**
* 结点上线处理
* @param executorName
* @throws Exception
*/
public void asyncShardingWhenExecutorOnline(String executorName, String ip) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteOnlineShardingTask(executorName, ip));
}
}
/**
* 结点掉线处理
* @param executorName
* @throws Exception
*/
public void asyncShardingWhenExecutorOffline(String executorName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteOfflineShardingTask(executorName));
}
}
/**
* 作业启用事件
* @param jobName
* @throws Exception
*/
public void asyncShardingWhenJobEnable(String jobName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobEnableShardingTask(jobName));
}
}
/**
* 处理作业禁用事件
* @param jobName
* @throws Exception
*/
public void asyncShardingWhenJobDisable(String jobName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobDisableShardingTask(jobName));
}
}
/**
* 处理作业全排
*/
public void asyncShardingWhenJobForceShard(String jobName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobForceShardShardingTask(jobName));
}
}
/**
* 处理作业executor上线
*/
public void asyncShardingWhenJobServerOnline(String jobName, String executorName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobServerOnlineShardingTask(jobName, executorName));
}
}
/**
* 处理作业executor下线
*/
public void asyncShardingWhenJobServerOffline(String jobName, String executorName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobServerOfflineShardingTask(jobName, executorName));
}
}
/**
* 选举
* @throws Exception
*/
public void leaderElection() throws Exception {
synchronized (shutdownLock) {
log.info("{}-{} leadership election", namespace, hostValue);
LeaderLatch leaderLatch = new LeaderLatch(curatorFramework, SaturnExecutorsNode.LEADER_LATCHNODE_PATH);
try {
leaderLatch.start();
leaderLatch.await();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) == null) {
// 持久化$Jobs节点
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
}
// 持久化LeaderValue
curatorFramework.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH, hostValue.getBytes("UTF-8"));
// 清理、重置变量
executorService.shutdownNow();
while (!executorService.isTerminated()) { // 等待全部任务已经退出
Thread.sleep(200); //NOSONARA
}
needAllSharding.set(false);
shardingCount.set(0);
executorService = newSingleThreadExecutor();
// 提交全量分片线程
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
log.info("{}-{} become leadership", namespace, hostValue);
}
} catch (Exception e) {
log.error(namespace + "-" + hostValue + " leadership election failed", e);
throw e;
} finally {
try {
leaderLatch.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
}
}
}
}
private boolean hasLeadership() throws Exception {
return curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) != null;
}
private boolean isLeadership() throws Exception {
while (!hasLeadership()) {
leaderElection();
}
return new String(curatorFramework.getData().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH), "UTF-8").equals(hostValue);
}
private boolean isLeadershipOnly() throws Exception {
if(hasLeadership()) {
return new String(curatorFramework.getData().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH), "UTF-8").equals(hostValue);
} else {
return false;
}
}
private void releaseMyLeadership() throws Exception {
if(isLeadershipOnly()) {
curatorFramework.delete().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH);
}
}
/**
* 关闭
*/
public void shutdown() {
synchronized (shutdownLock) {
try {
if (curatorFramework.getZookeeperClient().isConnected()) {
releaseMyLeadership();
}
} catch (Exception e) {
log.error("delete leadership failed", e);
}
if (executorService != null) {
executorService.shutdownNow();
}
}
}
public NamespaceShardingContentService getNamespaceShardingContentService() {
return namespaceShardingContentService;
}
}