/* * Licensed to Crate under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. Crate licenses this file * to you under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial * agreement. */ package io.crate.jobs.transport; import io.crate.exceptions.Exceptions; import io.crate.executor.transport.kill.KillJobsRequest; import io.crate.executor.transport.kill.KillResponse; import io.crate.executor.transport.kill.TransportKillJobsNodeAction; import io.crate.jobs.JobContextService; import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Singleton; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportConnectionListener; import org.elasticsearch.transport.TransportService; import java.util.Arrays; import java.util.Collection; import java.util.UUID; import java.util.stream.Collectors; /** * service that listens to node-disconnected-events and kills jobContexts that were started by the nodes that got disconnected */ @Singleton public class NodeDisconnectJobMonitorService extends AbstractLifecycleComponent implements TransportConnectionListener { private final ThreadPool threadPool; private final JobContextService jobContextService; private final TransportService transportService; private final static TimeValue DELAY = TimeValue.timeValueMinutes(1); private final TransportKillJobsNodeAction killJobsNodeAction; private final static Logger LOGGER = Loggers.getLogger(NodeDisconnectJobMonitorService.class); @Inject public NodeDisconnectJobMonitorService(Settings settings, ThreadPool threadPool, JobContextService jobContextService, TransportService transportService, TransportKillJobsNodeAction killJobsNodeAction) { super(settings); this.threadPool = threadPool; this.jobContextService = jobContextService; this.transportService = transportService; this.killJobsNodeAction = killJobsNodeAction; } @Override protected void doStart() { transportService.addConnectionListener(this); } @Override protected void doStop() { transportService.removeConnectionListener(this); } @Override protected void doClose() { } @Override public void onNodeConnected(DiscoveryNode node) { } @Override public void onNodeDisconnected(final DiscoveryNode node) { final Collection<UUID> contexts = jobContextService.getJobIdsByCoordinatorNode(node.getId()).collect(Collectors.toList()); if (contexts.isEmpty()) { // Disconnected node is not a handler node --> kill jobs on all participated nodes contexts.addAll(jobContextService.getJobIdsByParticipatingNodes(node.getId()).collect(Collectors.toList())); KillJobsRequest killJobsRequest = new KillJobsRequest(contexts); if (!contexts.isEmpty()) { killJobsNodeAction.broadcast(killJobsRequest, new ActionListener<KillResponse>() { @Override public void onResponse(KillResponse killResponse) { } @Override public void onFailure(Exception e) { LOGGER.warn("failed to send kill request to nodes"); } }, Arrays.asList(node.getId())); } else { return; } } threadPool.schedule(DELAY, ThreadPool.Names.GENERIC, () -> jobContextService.killJobs(contexts)); } }