/*
* Copyright(C) 2010-2011 Alibaba Group Holding Limited All rights reserved. Licensed under the Apache License, Version
* 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the specific language governing permissions and limitations
* under the License.
*/
package com.alibaba.doris.dataserver.migrator;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.locks.ReentrantLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.doris.common.MigrateTypeEnum;
import com.alibaba.doris.common.config.ConfigManager;
import com.alibaba.doris.common.configer.RouteTableConfiger;
import com.alibaba.doris.common.migrate.MigrateStatusReport;
import com.alibaba.doris.common.migrate.MigrateSubCommand;
import com.alibaba.doris.common.migrate.NodeMigrateStatus;
import com.alibaba.doris.common.route.VirtualRouter;
import com.alibaba.doris.dataserver.migrator.action.MigrationActionData;
import com.alibaba.doris.dataserver.migrator.connection.ConnectionManager;
import com.alibaba.doris.dataserver.migrator.connection.MigrationConnectionManager;
import com.alibaba.doris.dataserver.migrator.event.DefaultMigrationListener;
import com.alibaba.doris.dataserver.migrator.event.MigrationEvent;
import com.alibaba.doris.dataserver.migrator.event.MigrationListener;
import com.alibaba.doris.dataserver.migrator.report.MigrationReporter;
import com.alibaba.doris.dataserver.migrator.task.BaseMigrationTask;
import com.alibaba.doris.dataserver.migrator.task.MigrationTask;
import com.alibaba.doris.dataserver.migrator.task.MigrationTaskFactory;
import com.alibaba.doris.dataserver.migrator.task.MigrationThreadFactory;
import com.alibaba.doris.dataserver.store.Storage;
import com.alibaba.fastjson.JSON;
/**
* MigrationManager.
* <p/>
* 1. 迁移开始 ( 或迁移无法开始 ) 2. 节点迁移完毕( 或 节点迁移取消/ 或节点迁移失败 ) 3. 集群整体迁移完毕. 4. 节点数据清理. 5. 节点数据清理完毕.
*
* @author Kun He (Raymond He), kun.hek@alibaba-inc.com
* @since 1.0 2011-5-25
*/
public class MigrationManager implements MigrationListener {
private static final Logger logger = LoggerFactory.getLogger(MigrationManager.class);
public final static String _MigrationManager = "_MigrationManager";
private static MigrationManager manager = new MigrationManager();
private MigrationTaskFactory taskFactory = new MigrationTaskFactory();
// 代表当前处于迁移中或尚未完成的迁移信息存储。
protected volatile Executor executor = Executors.newCachedThreadPool(new MigrationThreadFactory());
protected int migrateThreads;
protected volatile MigrationTaskScheduler migrationTaskScheduler = new MigrationTaskScheduler();
// 其他固定变量
private int port; // dataserver
// 端口.
protected RouteTableConfiger routeTableConfiger;
protected ConfigManager configManager;
protected MigrationReporter migrationReporter;
protected VirtualRouter virtualRouter;
protected Storage storage;
private ReentrantLock controlLock = new ReentrantLock();
/**
* 迁移指令执行结果消息 Message
*
* @author Kun He (Raymond He), kun.hek@alibaba-inc.com
* @since 1.0 2011-5-31
*/
public static class Message {
public static final String _MIGRATION_START_TASK_ERROR = "OK MIGRATION_NEW_TASK";
public static final String _MIGRATION_NEW_TASK = "OK MIGRATION_NEW_TASK";
public static final String _MIGRATION_REPLACE_TASK = "OK MIGRATION_REPLACE_TASK";
public static final String _MIGRATION_TASK_CANCEL = "OK MIGRATION_TASK_CANCEL";
public static final String _MIGRATION_NO_TASK_TO_CANCEL = "OK MIGRATION_NO_TASK_TO_CANCEL";
public static final String _MIGRATION_TASK_CANT_CANCEL = "OK MIGRATION_TASK_CANT_CANCEL";
public static final String _MIGRATION_SAME_TASK_EXISTS = "OK MIGRATION_SAME_TASK_EXISTS";
public static final String _MIGRATION_PRIOR_TASK_EXISTS = "OK MIGRATION_PRIOR_TASK_EXISTS";
public static final String _MIGRATION_ALL_FINISHED = "OK MIGRATION_ALL_FINISHED";
/* 数据清理信息 */
public static final String _MIGRATION_DATA_CLEANING = "OK MIGRATION_DATA_CLEANING";
public static final String _MIGRATION_DATA_CLEAN_FINISHED = "OK MIGRATION_DATA_CLEAN_FINISHED";
public static final String _MIGRATION_INVALID_ALL_FINISHED = "OK MIGRATION_INVALID_ALL_FINISHED";
public static final String _MIGRATION_NO_TASK_RUNNING = "OK MIGRATION_NO_TASK_RUNNING";
public final static String _DATACLEAN_START = "OK DATACLEAN_START";
}
public static MigrationManager getInstance() {
return manager;
}
public MigrationTaskScheduler getMigrationTaskScheduler() {
return migrationTaskScheduler;
}
public int getMigrateThreads() {
return migrateThreads;
}
public void setMigrateThreads(int migrateThreads) {
this.migrateThreads = migrateThreads;
}
/**
* 判断当前状态
*
* @return
*/
public boolean haveMigrationTask() {
MigrationTask lastTask = migrationTaskScheduler.getLastTask();
if (lastTask != null) {
if (lastTask.getMigrateType() == MigrateTypeEnum.TEMP_FAILOVER) {
// 基于lastTask来判断的依据是:1.只有临时节点才存在临时失效task 2.临时失效可能同时存在多个任务。
Map<String, MigrationTask> taskMap = migrationTaskScheduler.getActiveTaskMap();
Iterator<MigrationTask> taskIterator = taskMap.values().iterator();
while (taskIterator.hasNext()) {
MigrationTask task = taskIterator.next();
if (task != null) {
if (isMigratingData(task)) {
return true;
}
}
}
} else {
return isMigratingData(lastTask);
}
}
return false;
}
/**
* 判断当前任务是否真正迁移数据;
*
* @param task
* @return
*/
private boolean isMigratingData(MigrationTask task) {
switch (task.getMigrateStatus()) {
case MIGRATING:
case MIGRATE_NODE_FINISHED:
case MIGRATE_ALL_FINISHED:
case DATACLEANING:
case DATACLEAN_FINISH:
case CANCELLED:
case CANCELLING:
return true;
}
return false;
}
/**
* 启动迁移.
* <p/>
* 进行必要的逻辑判断再执行任务.
*
* @param migrationParam
* @return
*/
public String startMigrate(MigrationActionData migrationActionData) {
controlLock.lock();
try {
String retMsg = startMigrate0(migrationActionData);
return retMsg;
} finally {
controlLock.unlock();
}
}
/**
* @param migrationActionData
* @return
*/
private String startMigrate0(MigrationActionData migrationActionData) {
String retMsg = null;
MigrateSubCommand subCommand = migrationActionData.getSubcommand();
if (logger.isDebugEnabled()) {
logger.debug("Receive new migration task, type " + subCommand + ", route:" + migrationActionData);
}
migrationTaskScheduler.checkAndTerminateFinishedTask();
BaseMigrationTask newTask = taskFactory.createTask(this, migrationActionData);
if (!migrationTaskScheduler.hasActiveTask()) {
if (logger.isDebugEnabled()) {
logger.debug("There is no active migration task. Preprare to start new one." + newTask);
}
retMsg = startTask0(migrationActionData, newTask);
migrationActionData.setSuccess(true);
if (logger.isDebugEnabled()) {
logger.debug("Migration task started. " + newTask.getTaskName() + ". " + newTask);
}
} else { // 判断优先级,如果新任务优先级高,则取消当前执行的任务,执行新的任务
MigrationTask lastActiveTask = migrationTaskScheduler.getLastTask();
int newTaskPriority = newTask.getMigrateType().getPriority();
int activeTaskPriority = lastActiveTask.getMigrateType().getPriority();
if (newTaskPriority < activeTaskPriority) {
if (logger.isDebugEnabled()) {
logger.debug("An active migration task exists, the active will cancel and new will start.Active Task: "
+ lastActiveTask + ", New Task:" + newTask);
}
return cancelTaskAndStartNewOne(migrationActionData, lastActiveTask, newTask);
} else if (newTaskPriority == activeTaskPriority) {
// 都是临时失效回迁, 同时起多个任务
if (lastActiveTask.getMigrateType() == MigrateTypeEnum.TEMP_FAILOVER
&& newTask.getMigrateType() == MigrateTypeEnum.TEMP_FAILOVER) {
// 查看新启动的迁移任务是否已经存在;注释:修复原有的某些情况下,重新启动临时失效回迁会启动新任务的bug;
MigrationTask existsTask = migrationTaskScheduler.getTask(newTask.getTaskKey());
// 如果待启动的迁移任务已经存在;
if (null != existsTask) {
// 如果当前task正在等待数据清理,再次收到迁移开始指令,需要取消原有task并重新启动一个迁移任务。
if (existsTask.getMigrateStatus() == NodeMigrateStatus.MIGRATE_NODE_FINISHED) {
retMsg = cancelTaskAndStartNewOne(migrationActionData, existsTask, newTask);
} else {
// 否则只打印一条任务已经存在的log信息就返回;
if (logger.isDebugEnabled()) {
logger.debug("A Tempfailover migration task exists. And same migration route is request. It's rejected!. ActiveTask:"
+ existsTask + ",newTask:" + newTask);
}
retMsg = Message._MIGRATION_SAME_TASK_EXISTS;
}
return retMsg;
} else {// 是一个新的临时失效迁移任务;
if (logger.isDebugEnabled()) {
logger.debug("Start a new migration task. newTask:" + newTask);
}
startTask0(migrationActionData, newTask);
retMsg = Message._MIGRATION_NEW_TASK;
return retMsg;
}
} else {
if (logger.isDebugEnabled()) {
logger.debug("An same prior migration task exists. Ignore new one. ActiveTask:"
+ lastActiveTask + ", newTask:" + newTask);
}
retMsg = Message._MIGRATION_SAME_TASK_EXISTS;
return retMsg;
}
} else {
if (logger.isDebugEnabled()) {
logger.debug("An prior active migration task exists. New command ir rejected! " + lastActiveTask);
}
retMsg = Message._MIGRATION_PRIOR_TASK_EXISTS;
}
}
return retMsg;
}
private String cancelTaskAndStartNewOne(MigrationActionData migrationActionData, MigrationTask activeTask,
BaseMigrationTask newTask) {
String retMsg = null;
// 取消当前任务,等待其取消完毕.
cancelActiveTask((BaseMigrationTask) activeTask);
if (logger.isDebugEnabled()) {
logger.debug("Cancel active migration task." + activeTask);
}
startTask0(migrationActionData, newTask);
migrationActionData.setSuccess(true);
retMsg = Message._MIGRATION_REPLACE_TASK;
return retMsg;
}
/**
* @param migrationActionData
* @param migrateType
* @param newTask
* @param activeTask0
*/
private String startTask0(MigrationActionData migrationActionData, BaseMigrationTask newTask) {
String retMsg = Message._MIGRATION_NEW_TASK;
// newTask.setMigrateStatus( NodeMigrateStatus.MIGRATING );
newTask.setProgress(0);
MigrationListener reportListener = new DefaultMigrationListener();
reportListener.setMigrationManager(this);
newTask.addListener(this); // MigrationManager 作为监听器.
newTask.addListener(reportListener); // 报告监听器
/**
* 1.启动正式的迁移任务前先准备好连接;<br>
* 2.将任务加入到迁移Task列表中,这样前端的代理将正式生效; <br>
* 3.提交给Executor,执行真正的迁移任务;
*/
if (newTask.prepareTask()) {
migrationTaskScheduler.addMigrationTask(newTask);
executor.execute(newTask);
if (logger.isDebugEnabled()) {
logger.debug("Start new migration task." + newTask);
}
} else {
throw new RuntimeException("Prepare task connection failed!");
}
retMsg = Message._MIGRATION_NEW_TASK;
return retMsg;
}
/**
* 取消迁移
*
* @param migrationActionData
* @return
*/
public String cancelMigrate(MigrationActionData actionData) {
controlLock.lock();
try {
String retMsg = null;
MigrationTask migrationTask = migrationTaskScheduler.getTask(actionData);
if (null != migrationTask) {
migrationTask.cancel();
retMsg = Message._MIGRATION_TASK_CANCEL;
} else {
retMsg = Message._MIGRATION_NO_TASK_TO_CANCEL;
}
return retMsg;
} finally {
controlLock.unlock();
}
}
/**
* cancel all ActiveTask
*
* @param requestMigrationActionData
* @return
*/
private String cancelActiveTask(BaseMigrationTask task) {
String retMsg = null;
NodeMigrateStatus migrateStatus = task.getMigrateStatus();
boolean result = migrationTaskScheduler.cancelTask(task);
if (result) {
retMsg = Message._MIGRATION_TASK_CANCEL + " " + task.getMigrateType();
// 如果当前任务在等待删除数据,则通知放弃删除数据。
if (migrateStatus == NodeMigrateStatus.MIGRATE_NODE_FINISHED) {
task.dataCleanStart();
}
} else {
retMsg = Message._MIGRATION_NO_TASK_TO_CANCEL;
}
return retMsg;
}
/**
* 一次逻辑迁移全部涉及节点结束,AdminServer通知本节点。 将 status 改为 NORMAL 状态
*
* @param migrationActionData
*/
public String allFinishMigrate(MigrationActionData migrationActionData) {
controlLock.lock();
try {
return allFinishMigrate0(migrationActionData);
} finally {
controlLock.unlock();
}
}
/**
* 收到迁移 All Finished 指令
*
* @param migrationActionData
* @return
*/
private String allFinishMigrate0(MigrationActionData migrationActionData) {
String retMsg = null;
MigrationTask task = migrationTaskScheduler.getTask(migrationActionData);
if (logger.isInfoEnabled()) {
logger.info("Receive allFinishMigrate. Related Task: " + task);
logger.info("Receive allFinishMigrate. Related Route: " + migrationActionData);
}
MigrateTypeEnum migrateType = migrationActionData.getSubcommand().getMigrateType();
if (task == null && migrateType == MigrateTypeEnum.EXPANSION) {
task = migrationTaskScheduler.getLastTask();
logger.info("Related Task is null, get current lastTask instead:" + task + ", Route:"
+ task.getMigrationActionData());
}
if (task != null && task.getMigrateStatus() == NodeMigrateStatus.MIGRATE_NODE_FINISHED) {
// 完成后, 在内部开始数据清理.
task.allFinish();
retMsg = Message._MIGRATION_ALL_FINISHED + " " + task.getMigrateType();
} else {
retMsg = Message._MIGRATION_INVALID_ALL_FINISHED + " " + task.getMigrateType() + " currentStatus:"
+ task.getMigrateStatus();
}
migrationActionData.setSuccess(true);
return retMsg;
}
/**
* 刷新路由
*
* @return
*/
public boolean refreshRouteTable() {
// 强制刷新路由, 获取最新的路由
boolean hasLatestRouteTable = false;
try {
long oldRouteTableVersion = routeTableConfiger.getConfigVersion();
if (logger.isInfoEnabled()) {
logger.info("Try to fetch lastest route config. oldRouteTableVersion: " + oldRouteTableVersion);
}
configManager.refreshConfig();
long newRouteTableVersion = routeTableConfiger.getConfigVersion();
if (newRouteTableVersion > oldRouteTableVersion) {
hasLatestRouteTable = true;
if (logger.isInfoEnabled()) {
if (newRouteTableVersion > oldRouteTableVersion) {
logger.info("Succeed to fetch lastest route config. newRouteTableVersion: "
+ newRouteTableVersion);
} else {
logger.info("Succeed to fetch lastest route config. old version == new version:"
+ newRouteTableVersion);
}
}
} else {
logger.warn("Migration all finish route refresh warning: old version:" + oldRouteTableVersion
+ ", new version:" + newRouteTableVersion);
}
} catch (Exception e) {
logger.error("Fail to fetch lastest route config after migration all finish! " + e, e);
}
return hasLatestRouteTable;
}
/**
* dataClean
*
* @param actionData
* @return
*/
public String dataClean(MigrationActionData actionData) {
MigrationTask activeTask = migrationTaskScheduler.getTask(actionData);
if (activeTask != null) {
if (logger.isDebugEnabled()) {
logger.debug("Active dataClean task already exists. Ignore this command. " + activeTask);
}
} else {
BaseMigrationTask cleanTask = (BaseMigrationTask) taskFactory.createDataCleanTask(this, actionData);
MigrationListener reportListener = new DefaultMigrationListener();
reportListener.setMigrationManager(this);
cleanTask.addListener(this); // MigrationManager 作为监听器.
cleanTask.addListener(reportListener); // 报告监听器
boolean hasLatestRouteTable = refreshRouteTable();
if (logger.isInfoEnabled()) {
logger.info("Before data clean start, refresh route table: . " + hasLatestRouteTable);
}
executor.execute(cleanTask);
migrationTaskScheduler.addMigrationTask(cleanTask);
}
return Message._DATACLEAN_START;
}
/**
* 通知迁移开始
*/
public void completeAndClearMigration(MigrationActionData actionData) {
MigrationTask task = migrationTaskScheduler.getTask(actionData);
if (logger.isDebugEnabled()) {
logger.debug("Complete migration task and try to clearMigrationStatus. Task key:" + task.getTaskKey());
}
try {
if (task != null) {
if (logger.isDebugEnabled()) {
logger.debug("Set status to finish=true , taskKey=" + task.getTaskKey());
}
task.setFinish(true);
reportMigrationComplete((BaseMigrationTask) task);
}
} finally {
migrationTaskScheduler.removeTask(task);
}
}
/**
* 两阶段迁移完毕,报告信息.
*/
protected void reportMigrationComplete(BaseMigrationTask task) {
task.setMigrateStatus(NodeMigrateStatus.MIGRATE_ALL_FINISHED);
MigrationEvent event = new MigrationEvent();
event.setMigrateType(task.getMigrateType());
event.setProgress(task.getProgress());
event.setMigrateStatus(task.getMigrateStatus());
event.setMigrateRoute(task.getMigrationActionData().getMigrationRoutePairs());
event.setServerPort(port);
event.setMigrationTask(task);
event.setMessage("Overall Data Migration Complete. Node port: " + port);
migrationReporter.report(event);
task.setProgress(-1);
task.setMigrateStatus(NodeMigrateStatus.NORMAL);
}
/**
* 查询迁移状态
*
* @param actionData
* @return
*/
public String queryStatus(MigrationActionData actionData) {
controlLock.lock();
try {
migrationTaskScheduler.checkAndTerminateFinishedTask();
String retMsg = null;
if (actionData == null || actionData.getMigrationRoutePairs() == null) {
// 如果没有传 actionData , 则查询当前 DataServer 的整体状态,只要有一个task在执行,都认为是迁移状态
MigrateStatusReport report = new MigrateStatusReport();
if (migrationTaskScheduler.hasActiveTask()) {
MigrationTask lastTask = migrationTaskScheduler.getLastTask();
if (logger.isDebugEnabled()) {
logger.debug("Query Status: Last task: " + lastTask);
}
report.setMigrateType(lastTask.getMigrateType().toString());
report.setStatus(lastTask.getMigrateStatus().toString());
report.setProgress(lastTask.getProgress());
report.setStartTime(lastTask.getStartTime());
report.setElapseTime(System.currentTimeMillis() - lastTask.getStartTime());
} else {
if (logger.isDebugEnabled()) {
logger.debug("Query Status: no active task: Status:" + NodeMigrateStatus.NORMAL);
}
report.setMigrateType(MigrateTypeEnum.NONE.toString());
report.setStatus(NodeMigrateStatus.NORMAL.toString());
report.setProgress(-1);
}
retMsg = "OK " + JSON.toJSONString(report);
return retMsg;
} else {
MigrateStatusReport report = new MigrateStatusReport();
MigrationTask task = migrationTaskScheduler.getTask(actionData);
// 如果迁移任务线程存在,则取任务的状态.
if (task != null) {
report.setMigrateType(task.getMigrateType().toString());
report.setStatus(task.getMigrateStatus().toString());
report.setProgress(task.getProgress());
report.setStartTime(task.getStartTime());
report.setElapseTime(System.currentTimeMillis() - task.getStartTime());
} else {
report.setMigrateType(MigrateTypeEnum.NONE.toString());
report.setStatus(NodeMigrateStatus.NORMAL.toString());
report.setProgress(-1);
}
retMsg = "OK " + JSON.toJSONString(report);
if (logger.isDebugEnabled()) {
logger.debug("Migration.manager Query migrate status. Result:" + retMsg);
}
actionData.setReturnMessage(retMsg);
actionData.setSuccess(true);
return retMsg;
}
} finally {
controlLock.unlock();
}
}
public void setMigrationReporter(MigrationReporter migrationReporter) {
this.migrationReporter = migrationReporter;
}
public MigrationReporter getMigrationReporter() {
return migrationReporter;
}
public ConnectionManager getMigrationConnectionManager() {
return new MigrationConnectionManager();
}
public void setPort(int port) {
this.port = port;
}
public int getPort() {
return port;
}
public void setVirtualRouter(VirtualRouter virtualRouter) {
this.virtualRouter = virtualRouter;
}
public VirtualRouter getVirtualRouter() {
return virtualRouter;
}
public void setStorage(Storage storage) {
this.storage = storage;
}
public Storage getStorage() {
return storage;
}
public String getMigrationListenerName() {
return this.getClass().getSimpleName();
}
public void onMigrationStart(MigrationEvent event) {
}
public void onMigraionProcessing(MigrationEvent event) {
}
public void onMigrationAllFinished(MigrationEvent event) {
}
public void onMigrationCancelled(MigrationEvent event) {
}
public void onMigrationNodeFinished(MigrationEvent event) {
}
public void onDataCleanStart(MigrationEvent event) {
}
public void onDataCleanProcessing(MigrationEvent event) {
}
public void onDataCleanError(MigrationEvent event) {
}
public void onDataCleanFinish(MigrationEvent event) {
if (logger.isDebugEnabled()) {
logger.debug(" onDataCleanFinish ");
}
}
public void onExitMigrationTask(MigrationEvent event) {
if (logger.isDebugEnabled()) {
logger.debug(" onExitMigrationTask ");
}
migrationTaskScheduler.removeTask(event.getMigrationTask());
}
public void onMigrationFail(MigrationEvent event) {
}
public MigrationManager getMigrationManager() {
return this;
}
public void setMigrationManager(MigrationManager migrationManager) {
// do nothing.
}
public void setConfigManager(ConfigManager configManager) {
this.configManager = configManager;
}
public void setRouteTableConfiger(RouteTableConfiger routeTableConfiger) {
this.routeTableConfiger = routeTableConfiger;
}
}