/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.jstorm.daemon.supervisor;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.jstorm.callback.RunnableCallback;
import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.StormConfig;
import com.alibaba.jstorm.daemon.worker.ProcessSimulator;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.PathUtils;
import com.alibaba.jstorm.utils.TimeUtils;
/**
* @author Johnfang (xiaojian.fxj@alibaba-inc.com)
*/
public class ShutdownWork extends RunnableCallback {
private static final Logger LOG = LoggerFactory.getLogger(ShutdownWork.class);
/**
* shutdown all workers
*
* @param conf storm conf
* @param supervisorId supervisor id
* @param workerIdToTopology worker to topology to be removed
* @param workerThreadPids worker to pid, used in local mode only
* @param cgroupManager cgroup manager (if used)
* @param block whether to use blocking wait
* @param killingWorkers the workers being killed
*/
@SuppressWarnings("unused")
public void shutWorker(Map conf,
String supervisorId,
Map<String, String> workerIdToTopology,
ConcurrentHashMap<String, String> workerThreadPids,
CgroupManager cgroupManager,
boolean block,
Map<String, Integer> killingWorkers,
Map<String, Integer> taskCleanupTimeoutMap) {
Map<String, List<String>> workerId2Pids = new HashMap<>();
boolean localMode = false;
int maxWaitTime = 0;
if (killingWorkers == null)
killingWorkers = new HashMap<>();
for (Entry<String, String> entry : workerIdToTopology.entrySet()) {
String workerId = entry.getKey();
String topologyId = entry.getValue();
List<String> pids;
try {
pids = getPid(conf, workerId);
} catch (IOException e1) {
pids = Lists.newArrayList();
LOG.error("Failed to get pid for " + workerId + " of " + topologyId);
}
workerId2Pids.put(workerId, pids);
if (killingWorkers.get(workerId) == null) {
killingWorkers.put(workerId, TimeUtils.current_time_secs());
LOG.info("Begin to shut down " + topologyId + ":" + workerId);
try {
String threadPid = workerThreadPids.get(workerId);
// local mode
if (threadPid != null) {
ProcessSimulator.killProcess(threadPid);
localMode = true;
continue;
}
for (String pid : pids) {
JStormUtils.process_killed(Integer.parseInt(pid));
}
if (taskCleanupTimeoutMap != null && taskCleanupTimeoutMap.get(topologyId) != null) {
maxWaitTime = Math.max(maxWaitTime, taskCleanupTimeoutMap.get(topologyId));
} else {
maxWaitTime = Math.max(maxWaitTime, ConfigExtension.getTaskCleanupTimeoutSec(conf));
}
} catch (Exception e) {
LOG.info("Failed to shutdown ", e);
}
}
}
if (block) {
JStormUtils.sleepMs(maxWaitTime);
}
for (Entry<String, String> entry : workerIdToTopology.entrySet()) {
String workerId = entry.getKey();
String topologyId = entry.getValue();
List<String> pids = workerId2Pids.get(workerId);
int cleanupTimeout;
if (taskCleanupTimeoutMap != null && taskCleanupTimeoutMap.get(topologyId) != null) {
cleanupTimeout = taskCleanupTimeoutMap.get(topologyId);
} else {
cleanupTimeout = ConfigExtension.getTaskCleanupTimeoutSec(conf);
}
int initCleanupTime = killingWorkers.get(workerId);
if (TimeUtils.current_time_secs() - initCleanupTime > cleanupTimeout) {
if (!localMode) {
for (String pid : pids) {
JStormUtils.ensure_process_killed(Integer.parseInt(pid));
if (cgroupManager != null) {
cgroupManager.shutDownWorker(workerId, true);
}
}
}
tryCleanupWorkerDir(conf, workerId);
LOG.info("Successfully shut down " + workerId);
killingWorkers.remove(workerId);
}
}
}
/**
* clean the directory , sub-directories of STORM-LOCAL-DIR/workers/workerId
*
* @param conf storm conf
* @param workerId worker id
*/
public static void tryCleanupWorkerDir(Map conf, String workerId) {
try {
// delete heartbeat dir LOCAL_DIR/workers/workid/heartbeats
PathUtils.rmr(StormConfig.worker_heartbeats_root(conf, workerId));
// delete pid dir, LOCAL_DIR/workers/workerid/pids
PathUtils.rmr(StormConfig.worker_pids_root(conf, workerId));
// delete workerid dir, LOCAL_DIR/worker/workerid
PathUtils.rmr(StormConfig.worker_root(conf, workerId));
} catch (Exception e) {
LOG.warn(e + "Failed to cleanup worker " + workerId + ". Will retry later");
}
}
/**
* When worker has been started by manually and supervisor, it will return multiple pid
*
* @param conf storm conf
* @param workerId worker id
*/
public static List<String> getPid(Map conf, String workerId) throws IOException {
String workerPidPath = StormConfig.worker_pids_root(conf, workerId);
return PathUtils.read_dir_contents(workerPidPath);
}
}