GridTaskWorker.java example

Explorer

ignite-master
- examples
  - src
    - main
      - java
        org
        apache
        ignite
        examples
        ExampleNodeStartup.java
        ExamplesUtils.java
        binary
        computegrid
        ComputeClientBinaryTaskExecutionExample.java
        ComputeClientTask.java
        package-info.java
        datagrid
        CacheClientBinaryPutGetExample.java
        CacheClientBinaryQueryExample.java
        package-info.java
        store
        auto
        CacheBinaryAutoStoreExample.java
        package-info.java
        package-info.java
        computegrid
        ComputeAsyncExample.java
        ComputeBroadcastExample.java
        ComputeCallableExample.java
        ComputeClosureExample.java
        ComputeContinuousMapperExample.java
        ComputeFibonacciContinuationExample.java
        ComputeReducerExample.java
        ComputeRunnableExample.java
        ComputeTaskMapExample.java
        ComputeTaskSplitExample.java
        cluster
        ClusterGroupExample.java
        package-info.java
        failover
        ComputeFailoverExample.java
        ComputeFailoverNodeStartup.java
        package-info.java
        montecarlo
        Credit.java
        CreditRiskExample.java
        CreditRiskManager.java
        package-info.java
        package-info.java
        datagrid
        CacheAffinityExample.java
        CacheApiExample.java
        CacheAsyncApiExample.java
        CacheContinuousAsyncQueryExample.java
        CacheContinuousQueryExample.java
        CacheDataStreamerExample.java
        CacheEntryProcessorExample.java
        CacheEventsExample.java
        CachePutGetExample.java
        CacheQueryDmlExample.java
        CacheQueryExample.java
        CacheTransactionExample.java
        MemoryPoliciesExample.java
        package-info.java
        starschema
        CacheStarSchemaExample.java
        DimProduct.java
        DimStore.java
        FactPurchase.java
        package-info.java
        store
        CacheLoadOnlyStoreExample.java
        auto
        CacheAutoStoreExample.java
        package-info.java
        jdbc
        CacheJdbcPersonStore.java
        CacheJdbcStoreExample.java
        package-info.java
        package-info.java
        spring
        CacheSpringPersonStore.java
        CacheSpringStoreExample.java
        package-info.java
        datastructures
        IgniteAtomicLongExample.java
        IgniteAtomicReferenceExample.java
        IgniteAtomicSequenceExample.java
        IgniteAtomicStampedExample.java
        IgniteCountDownLatchExample.java
        IgniteExecutorServiceExample.java
        IgniteLockExample.java
        IgniteQueueExample.java
        IgniteSemaphoreExample.java
        IgniteSetExample.java
        package-info.java
        events
        EventsExample.java
        package-info.java
        igfs
        IgfsExample.java
        IgfsMapReduceExample.java
        IgfsNodeStartup.java
        package-info.java
        messaging
        MessagingExample.java
        MessagingPingPongExample.java
        MessagingPingPongListenActorExample.java
        package-info.java
        misc
        client
        memcache
        MemcacheRestExample.java
        MemcacheRestExampleNodeStartup.java
        package-info.java
        package-info.java
        deployment
        DeploymentExample.java
        package-info.java
        lifecycle
        LifecycleExample.java
        package-info.java
        package-info.java
        springbean
        SpringBeanExample.java
        package-info.java
        model
        Address.java
        Employee.java
        EmployeeKey.java
        Organization.java
        OrganizationType.java
        Person.java
        package-info.java
        package-info.java
        servicegrid
        ServicesExample.java
        SimpleMapService.java
        SimpleMapServiceImpl.java
        package-info.java
        springdata
        PersonRepository.java
        SpringAppCfg.java
        SpringDataExample.java
        streaming
        StreamTransformerExample.java
        StreamVisitorExample.java
        package-info.java
        wordcount
        CacheConfig.java
        QueryWords.java
        StreamWords.java
        package-info.java
        socket
        WordsSocketStreamerClient.java
        WordsSocketStreamerServer.java
        package-info.java
        util
        DbH2ServerStartup.java
        package-info.java
      - java-lgpl
        org
        apache
        ignite
        examples
        datagrid
        SpatialQueryExample.java
        hibernate
        HibernateL2CacheExample.java
        Post.java
        User.java
        package-info.java
        store
        hibernate
        CacheHibernatePersonStore.java
        CacheHibernateStoreExample.java
        package-info.java
        misc
        schedule
        ComputeScheduleExample.java
        package-info.java
      - java8
        org
        apache
        ignite
        examples
        java8
        cluster
        ClusterGroupExample.java
        package-info.java
        computegrid
        ComputeAsyncExample.java
        ComputeBroadcastExample.java
        ComputeCallableExample.java
        ComputeClosureExample.java
        ComputeRunnableExample.java
        package-info.java
        datagrid
        CacheAffinityExample.java
        CacheApiExample.java
        CacheAsyncApiExample.java
        CacheEntryProcessorExample.java
        package-info.java
        datastructures
        IgniteExecutorServiceExample.java
        package-info.java
        events
        EventsExample.java
        package-info.java
        messaging
        MessagingExample.java
        MessagingPingPongExample.java
        package-info.java
        package-info.java
        streaming
        StreamTransformerExample.java
        StreamVisitorExample.java
        package-info.java
      - ml
        org
        apache
        ignite
        examples
        ml
        math
        decompositions
        CholeskyDecompositionExample.java
        EigenDecompositionExample.java
        LUDecompositionExample.java
        SingularValueDecompositionExample.java
        package-info.java
        matrix
        CacheMatrixExample.java
        ExampleMatrixStorage.java
        MatrixCustomStorageExample.java
        MatrixExample.java
        MatrixExampleUtil.java
        OffHeapMatrixExample.java
        SparseDistributedMatrixExample.java
        SparseMatrixExample.java
        package-info.java
        package-info.java
        tracer
        TracerExample.java
        package-info.java
        vector
        CacheVectorExample.java
        ExampleVectorStorage.java
        OffHeapVectorExample.java
        SparseVectorExample.java
        VectorCustomStorageExample.java
        VectorExample.java
        package-info.java
      - spark
        org
        apache
        ignite
        examples
        spark
        SharedRDDExample.java
        package-info.java
    - test
- modules

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.internal.processors.task;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteException;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.cluster.ClusterTopologyException;
import org.apache.ignite.compute.ComputeJob;
import org.apache.ignite.compute.ComputeJobAfterSend;
import org.apache.ignite.compute.ComputeJobBeforeFailover;
import org.apache.ignite.compute.ComputeJobFailoverException;
import org.apache.ignite.compute.ComputeJobResult;
import org.apache.ignite.compute.ComputeJobResultPolicy;
import org.apache.ignite.compute.ComputeJobSibling;
import org.apache.ignite.compute.ComputeLoadBalancer;
import org.apache.ignite.compute.ComputeTask;
import org.apache.ignite.compute.ComputeTaskContinuousMapper;
import org.apache.ignite.compute.ComputeTaskNoResultCache;
import org.apache.ignite.compute.ComputeTaskSpis;
import org.apache.ignite.compute.ComputeUserUndeclaredException;
import org.apache.ignite.events.Event;
import org.apache.ignite.events.JobEvent;
import org.apache.ignite.events.TaskEvent;
import org.apache.ignite.igfs.IgfsOutOfSpaceException;
import org.apache.ignite.internal.ComputeTaskInternalFuture;
import org.apache.ignite.internal.GridInternalException;
import org.apache.ignite.internal.GridJobCancelRequest;
import org.apache.ignite.internal.GridJobExecuteRequest;
import org.apache.ignite.internal.GridJobExecuteResponse;
import org.apache.ignite.internal.GridJobResultImpl;
import org.apache.ignite.internal.GridJobSiblingImpl;
import org.apache.ignite.internal.GridKernalContext;
import org.apache.ignite.internal.GridTaskSessionImpl;
import org.apache.ignite.internal.IgniteClientDisconnectedCheckedException;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.cluster.ClusterGroupEmptyCheckedException;
import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException;
import org.apache.ignite.internal.compute.ComputeTaskTimeoutCheckedException;
import org.apache.ignite.internal.managers.deployment.GridDeployment;
import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion;
import org.apache.ignite.internal.processors.closure.AffinityTask;
import org.apache.ignite.internal.processors.service.GridServiceNotFoundException;
import org.apache.ignite.internal.processors.timeout.GridTimeoutObject;
import org.apache.ignite.internal.util.typedef.CO;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.A;
import org.apache.ignite.internal.util.typedef.internal.CU;
import org.apache.ignite.internal.util.typedef.internal.LT;
import org.apache.ignite.internal.util.typedef.internal.S;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.util.worker.GridWorker;
import org.apache.ignite.internal.visor.util.VisorClusterGroupEmptyException;
import org.apache.ignite.lang.IgniteInClosure;
import org.apache.ignite.lang.IgniteUuid;
import org.apache.ignite.marshaller.Marshaller;
import org.apache.ignite.resources.TaskContinuousMapperResource;
import org.jetbrains.annotations.Nullable;
import org.jsr166.ConcurrentLinkedDeque8;

import static org.apache.ignite.compute.ComputeJobResultPolicy.FAILOVER;
import static org.apache.ignite.compute.ComputeJobResultPolicy.WAIT;
import static org.apache.ignite.events.EventType.EVT_JOB_FAILED_OVER;
import static org.apache.ignite.events.EventType.EVT_JOB_MAPPED;
import static org.apache.ignite.events.EventType.EVT_JOB_RESULTED;
import static org.apache.ignite.events.EventType.EVT_TASK_FAILED;
import static org.apache.ignite.events.EventType.EVT_TASK_FINISHED;
import static org.apache.ignite.events.EventType.EVT_TASK_REDUCED;
import static org.apache.ignite.events.EventType.EVT_TASK_STARTED;
import static org.apache.ignite.events.EventType.EVT_TASK_TIMEDOUT;
import static org.apache.ignite.internal.GridTopic.TOPIC_JOB;
import static org.apache.ignite.internal.GridTopic.TOPIC_JOB_CANCEL;
import static org.apache.ignite.internal.managers.communication.GridIoPolicy.MANAGEMENT_POOL;
import static org.apache.ignite.internal.managers.communication.GridIoPolicy.PUBLIC_POOL;
import static org.apache.ignite.internal.processors.task.GridTaskThreadContextKey.TC_IO_POLICY;
import static org.apache.ignite.internal.processors.task.GridTaskThreadContextKey.TC_NO_FAILOVER;

/**
 * Grid task worker. Handles full task life cycle.
 * @param <T> Task argument type.
 * @param <R> Task return value type.
 */
class GridTaskWorker<T, R> extends GridWorker implements GridTimeoutObject {
    /** Split size threshold. */
    private static final int SPLIT_WARN_THRESHOLD = 1000;

    /** Retry delay factor (ms). Retry delay = retryAttempt * RETRY_DELAY_MS */
    private static final long RETRY_DELAY_MS = 10;

    /** {@code True} for internal tasks. */
    private boolean internal;

    /** */
    private enum State {
        /** */
        WAITING,

        /** */
        REDUCING,

        /** */
        REDUCED,

        /** */
        FINISHING
    }

    /** Static logger to avoid re-creation. */
    private static final AtomicReference<IgniteLogger> logRef = new AtomicReference<>();

    /** */
    private final GridKernalContext ctx;

    /** */
    private final IgniteLogger log;

    /** */
    private final Marshaller marsh;

    /** */
    private final GridTaskSessionImpl ses;

    /** */
    private final ComputeTaskInternalFuture<R> fut;

    /** */
    private final T arg;

    /** */
    private final GridTaskEventListener evtLsnr;

    /** */
    private Map<IgniteUuid, GridJobResultImpl> jobRes;

    /** */
    private State state = State.WAITING;

    /** */
    private final GridDeployment dep;

    /** Task class. */
    private final Class<?> taskCls;

    /** Optional subgrid. */
    private final Map<GridTaskThreadContextKey, Object> thCtx;

    /** */
    private ComputeTask<T, R> task;

    /** */
    private final Queue<GridJobExecuteResponse> delayedRess = new ConcurrentLinkedDeque8<>();

    /** */
    private boolean continuous;

    /** */
    private final Object mux = new Object();

    /** */
    private boolean lockRespProc = true;

    /** */
    private final boolean resCache;

    /** */
    private final boolean noFailover;

    /** */
    private final int affPartId;

    /** */
    private final String affCacheName;

    /** */
    private final int[] affCacheIds;

    /** */
    private AffinityTopologyVersion mapTopVer;

    /** */
    private int retryAttemptCnt;

    /** */
    private final UUID subjId;

    /** Continuous mapper. */
    private final ComputeTaskContinuousMapper mapper = new ComputeTaskContinuousMapper() {
        /** {@inheritDoc} */
        @Override public void send(ComputeJob job, ClusterNode node) {
            try {
                A.notNull(job, "job");
                A.notNull(node, "node");

                processMappedJobs(Collections.singletonMap(job, node));
            }
            catch (IgniteCheckedException e) {
                throw U.convertException(e);
            }
        }

        /** {@inheritDoc} */
        @Override public void send(Map<? extends ComputeJob, ClusterNode> mappedJobs) {
            try {
                A.notNull(mappedJobs, "mappedJobs");

                processMappedJobs(mappedJobs);
            }
            catch (IgniteCheckedException e) {
                throw U.convertException(e);
            }
        }

        /** {@inheritDoc} */
        @Override public void send(ComputeJob job) {
            A.notNull(job, "job");

            send(Collections.singleton(job));
        }

        /** {@inheritDoc} */
        @Override public void send(Collection<? extends ComputeJob> jobs) {
            try {
                A.notNull(jobs, "jobs");

                if (jobs.isEmpty())
                    throw new IgniteException("Empty jobs collection passed to send(...) method.");

                ComputeLoadBalancer balancer = ctx.loadBalancing().getLoadBalancer(ses, getTaskTopology());

                for (ComputeJob job : jobs) {
                    if (job == null)
                        throw new IgniteException("Null job passed to send(...) method.");

                    processMappedJobs(Collections.singletonMap(job, balancer.getBalancedNode(job, null)));
                }
            }
            catch (IgniteCheckedException e) {
                throw U.convertException(e);
            }
        }
    };

    /**
     * @param ctx Kernal context.
     * @param arg Task argument.
     * @param ses Grid task session.
     * @param fut Task future.
     * @param taskCls Task class.
     * @param task Task instance that might be null.
     * @param dep Deployed task.
     * @param evtLsnr Event listener.
     * @param thCtx Thread-local context from task processor.
     * @param subjId Subject ID.
     */
    GridTaskWorker(
        GridKernalContext ctx,
        @Nullable T arg,
        GridTaskSessionImpl ses,
        ComputeTaskInternalFuture<R> fut,
        @Nullable Class<?> taskCls,
        @Nullable ComputeTask<T, R> task,
        GridDeployment dep,
        GridTaskEventListener evtLsnr,
        @Nullable Map<GridTaskThreadContextKey, Object> thCtx,
        UUID subjId) {
        super(ctx.config().getIgniteInstanceName(), "grid-task-worker", ctx.log(GridTaskWorker.class));

        assert ses != null;
        assert fut != null;
        assert evtLsnr != null;
        assert dep != null;

        this.arg = arg;
        this.ctx = ctx;
        this.fut = fut;
        this.ses = ses;
        this.taskCls = taskCls;
        this.task = task;
        this.dep = dep;
        this.evtLsnr = evtLsnr;
        this.thCtx = thCtx;
        this.subjId = subjId;

        log = U.logger(ctx, logRef, this);

        marsh = ctx.config().getMarshaller();

        resCache = dep.annotation(taskCls, ComputeTaskNoResultCache.class) == null;

        Boolean noFailover = getThreadContext(TC_NO_FAILOVER);

        this.noFailover = noFailover != null ? noFailover : false;

        if (task instanceof AffinityTask) {
            AffinityTask affTask = (AffinityTask)task;

            assert affTask.affinityCacheNames() != null : affTask;
            assert affTask.partition() >= 0 : affTask;

            affPartId = affTask.partition();
            affCacheName = F.first(affTask.affinityCacheNames());
            mapTopVer = affTask.topologyVersion();

            affCacheIds = new int[affTask.affinityCacheNames().size()];
            int i = 0;
            for (String cacheName : affTask.affinityCacheNames()) {
                affCacheIds[i] = CU.cacheId(cacheName);
                ++i;
            }
        }
        else {
            affPartId = -1;
            affCacheName = null;
            mapTopVer = null;
            affCacheIds = null;
        }
    }

    /**
     * Gets value from thread-local context.
     *
     * @param key Thread-local context key.
     * @return Thread-local context value, if any.
     */
    @SuppressWarnings({"unchecked"})
    @Nullable private <V> V getThreadContext(GridTaskThreadContextKey key) {
        return thCtx == null ? null : (V)thCtx.get(key);
    }

    /**
     * @return Task session ID.
     */
    IgniteUuid getTaskSessionId() {
        return ses.getId();
    }

    /**
     * @return Task session.
     */
    GridTaskSessionImpl getSession() {
        return ses;
    }

    /**
     * @return Task future.
     */
    ComputeTaskInternalFuture<R> getTaskFuture() {
        return fut;
    }

    /**
     * Gets property dep.
     *
     * @return Property dep.
     */
    GridDeployment getDeployment() {
        return dep;
    }

    /**
     * @return Grid task.
     */
    public ComputeTask<T, R> getTask() {
        return task;
    }

    /**
     * @param task Deployed task.
     */
    public void setTask(ComputeTask<T, R> task) {
        this.task = task;
    }

    /**
     * @return {@code True} if task is internal.
     */
    public boolean isInternal() {
        return internal;
    }

    /** {@inheritDoc} */
    @Override public IgniteUuid timeoutId() {
        return ses.getId();
    }

    /** {@inheritDoc} */
    @Override public void onTimeout() {
        synchronized (mux) {
            if (state != State.WAITING)
                return;
        }

        U.warn(log, "Task has timed out: " + ses);

        recordTaskEvent(EVT_TASK_TIMEDOUT, "Task has timed out.");

        Throwable e = new ComputeTaskTimeoutCheckedException("Task timed out (check logs for error messages): " + ses);

        finishTask(null, e);
    }

    /** {@inheritDoc} */
    @Override public long endTime() {
        return ses.getEndTime();
    }

    /**
     * @param taskCls Task class.
     * @return Task instance.
     * @throws IgniteCheckedException Thrown in case of any instantiation error.
     */
    private ComputeTask<T, R> newTask(Class<? extends ComputeTask<T, R>> taskCls) throws IgniteCheckedException {
        ComputeTask<T, R> task = dep.newInstance(taskCls);

        if (task == null)
            throw new IgniteCheckedException("Failed to instantiate task (is default constructor available?): " + taskCls);

        return task;
    }

    /**
     *
     */
    private void initializeSpis() {
        ComputeTaskSpis spis = dep.annotation(taskCls, ComputeTaskSpis.class);

        if (spis != null) {
            ses.setLoadBalancingSpi(spis.loadBalancingSpi());
            ses.setFailoverSpi(spis.failoverSpi());
            ses.setCheckpointSpi(spis.checkpointSpi());
        }
    }

    /**
     * Maps this task's jobs to nodes and sends them out.
     */
    @SuppressWarnings({"unchecked"})
    @Override protected void body() {
        evtLsnr.onTaskStarted(this);

        try {
            // Use either user task or deployed one.
            if (task == null) {
                assert taskCls != null;
                assert ComputeTask.class.isAssignableFrom(taskCls);

                try {
                    task = newTask((Class<? extends ComputeTask<T, R>>)taskCls);
                }
                catch (IgniteCheckedException e) {
                    // If cannot instantiate task, then assign internal flag based
                    // on information available.
                    internal = dep.internalTask(null, taskCls);

                    recordTaskEvent(EVT_TASK_STARTED, "Task started.");

                    throw e;
                }
            }

            internal = ses.isInternal();

            recordTaskEvent(EVT_TASK_STARTED, "Task started.");

            initializeSpis();

            ses.setClassLoader(dep.classLoader());

            // Nodes are ignored by affinity tasks.
            final List<ClusterNode> shuffledNodes =
                affCacheIds == null ? getTaskTopology() : Collections.<ClusterNode>emptyList();

            // Load balancer.
            ComputeLoadBalancer balancer = ctx.loadBalancing().getLoadBalancer(ses, shuffledNodes);

            continuous = ctx.resource().isAnnotationPresent(dep, task, TaskContinuousMapperResource.class);

            if (log.isDebugEnabled())
                log.debug("Injected task resources [continuous=" + continuous + ']');

            // Inject resources.
            ctx.resource().inject(dep, task, ses, balancer, mapper);

            Map<? extends ComputeJob, ClusterNode> mappedJobs = U.wrapThreadLoader(dep.classLoader(),
                new Callable<Map<? extends ComputeJob, ClusterNode>>() {
                    @Override public Map<? extends ComputeJob, ClusterNode> call() {
                        return task.map(shuffledNodes, arg);
                    }
                });

            if (log.isDebugEnabled())
                log.debug("Mapped task jobs to nodes [jobCnt=" + (mappedJobs != null ? mappedJobs.size() : 0) +
                    ", mappedJobs=" + mappedJobs + ", ses=" + ses + ']');

            if (F.isEmpty(mappedJobs)) {
                synchronized (mux) {
                    // Check if some jobs are sent from continuous mapper.
                    if (F.isEmpty(jobRes))
                        throw new IgniteCheckedException("Task map operation produced no mapped jobs: " + ses);
                }
            }
            else
                processMappedJobs(mappedJobs);

            synchronized (mux) {
                lockRespProc = false;
            }

            processDelayedResponses();
        }
        catch (ClusterGroupEmptyCheckedException e) {
            U.warn(log, "Failed to map task jobs to nodes (topology projection is empty): " + ses);

            finishTask(null, e);
        }
        catch (IgniteException | IgniteCheckedException e) {
            if (!fut.isCancelled()) {
                if (!(e instanceof VisorClusterGroupEmptyException))
                    U.error(log, "Failed to map task jobs to nodes: " + ses, e);

                finishTask(null, e);
            }
            else if (log.isDebugEnabled())
                log.debug("Failed to map task jobs to nodes due to task cancellation: " + ses);
        }
        // Catch throwable to protect against bad user code.
        catch (Throwable e) {
            String errMsg = "Failed to map task jobs to nodes due to undeclared user exception" +
                " [cause=" + e.getMessage() + ", ses=" + ses + "]";

            U.error(log, errMsg, e);

            finishTask(null, new ComputeUserUndeclaredException(errMsg, e));

            if (e instanceof Error)
                throw e;
        }
    }

    /**
     * @param jobs Map of jobs.
     * @throws IgniteCheckedException Thrown in case of any error.
     */
    private void processMappedJobs(Map<? extends ComputeJob, ClusterNode> jobs) throws IgniteCheckedException {
        if (F.isEmpty(jobs))
            return;

        Collection<GridJobResultImpl> jobResList = new ArrayList<>(jobs.size());

        Collection<ComputeJobSibling> sibs = new ArrayList<>(jobs.size());

        // Map jobs to nodes for computation.
        for (Map.Entry<? extends ComputeJob, ClusterNode> mappedJob : jobs.entrySet()) {
            ComputeJob job = mappedJob.getKey();
            ClusterNode node = mappedJob.getValue();

            if (job == null)
                throw new IgniteCheckedException("Job can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');

            if (node == null)
                throw new IgniteCheckedException("Node can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');

            IgniteUuid jobId = IgniteUuid.fromUuid(ctx.localNodeId());

            GridJobSiblingImpl sib = new GridJobSiblingImpl(ses.getId(), jobId, node.id(), ctx);

            jobResList.add(new GridJobResultImpl(job, jobId, node, sib));

            // Do not add siblings if result cache is disabled.
            if (resCache)
                sibs.add(sib);

            recordJobEvent(EVT_JOB_MAPPED, jobId, node, "Job got mapped.");
        }

        synchronized (mux) {
            if (state != State.WAITING)
                throw new IgniteCheckedException("Task is not in waiting state [state=" + state + ", ses=" + ses + ']');

            // Do not add siblings if result cache is disabled.
            if (resCache)
                ses.addJobSiblings(sibs);

            if (jobRes == null)
                jobRes = new HashMap<>();

            // Populate all remote mappedJobs into map, before mappedJobs are sent.
            // This is done to avoid race condition when we start
            // getting results while still sending out references.
            for (GridJobResultImpl res : jobResList) {
                if (jobRes.put(res.getJobContext().getJobId(), res) != null)
                    throw new IgniteCheckedException("Duplicate job ID for remote job found: " + res.getJobContext().getJobId());

                res.setOccupied(true);

                if (resCache && jobRes.size() > ctx.discovery().size() && jobRes.size() % SPLIT_WARN_THRESHOLD == 0)
                    LT.warn(log, "Number of jobs in task is too large for task: " + ses.getTaskName() +
                        ". Consider reducing number of jobs or disabling job result cache with " +
                        "@ComputeTaskNoResultCache annotation.");
            }
        }

        // Set mapped flag.
        ses.onMapped();

        // Send out all remote mappedJobs.
        for (GridJobResultImpl res : jobResList) {
            evtLsnr.onJobSend(this, res.getSibling());

            try {
                sendRequest(res);
            }
            finally {
                // Open job for processing results.
                synchronized (mux) {
                    res.setOccupied(false);
                }
            }
        }

        processDelayedResponses();
    }

    /**
     * @return Topology for this task.
     * @throws IgniteCheckedException Thrown in case of any error.
     */
    private List<ClusterNode> getTaskTopology() throws IgniteCheckedException {
        Collection<UUID> top = ses.getTopology();

        Collection<? extends ClusterNode> subgrid = top != null ? ctx.discovery().nodes(top) : ctx.discovery().allNodes();

        int size = subgrid.size();

        if (size == 0)
            throw new ClusterGroupEmptyCheckedException("Topology projection is empty.");

        List<ClusterNode> shuffledNodes = new ArrayList<>(size);

        for (ClusterNode node : subgrid)
            shuffledNodes.add(node);

        if (shuffledNodes.size() > 1)
            // Shuffle nodes prior to giving them to user.
            Collections.shuffle(shuffledNodes);

        // Load balancer.
        return shuffledNodes;
    }

    /**
     *
     */
    private void processDelayedResponses() {
        GridJobExecuteResponse res = delayedRess.poll();

        if (res != null)
            onResponse(res);
    }

    /**
     * @param msg Job execution response.
     */
    void onResponse(GridJobExecuteResponse msg) {
        assert msg != null;

        if (fut.isDone()) {
            if (log.isDebugEnabled())
                log.debug("Ignoring job response since task has finished: " + msg);

            return;
        }

        GridJobExecuteResponse res = msg;

        while (res != null) {
            GridJobResultImpl jobRes = null;

            // Flag indicating whether occupied flag for
            // job response was changed in this method apply.
            boolean selfOccupied = false;

            IgniteInternalFuture<?> affFut = null;

            boolean waitForAffTop = false;

            final GridJobExecuteResponse failoverRes = res;

            try {
                synchronized (mux) {
                    // If task is not waiting for responses,
                    // then there is no point to proceed.
                    if (state != State.WAITING) {
                        if (log.isDebugEnabled())
                            log.debug("Ignoring response since task is already reducing or finishing [res=" + res +
                                ", job=" + ses + ", state=" + state + ']');

                        return;
                    }

                    jobRes = this.jobRes.get(res.getJobId());

                    if (jobRes == null) {
                        if (log.isDebugEnabled())
                            U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);

                        res = delayedRess.poll();

                        // We can not return here because there can be more delayed messages in the queue.
                        continue;
                    }

                    // Only process 1st response and ignore following ones. This scenario
                    // is possible if node has left topology and and fake failure response
                    // was created from discovery listener and when sending request failed.
                    if (jobRes.hasResponse()) {
                        if (log.isDebugEnabled())
                            log.debug("Received redundant response for a job (will ignore): " + res);

                        res = delayedRess.poll();

                        // We can not return here because there can be more delayed messages in the queue.
                        continue;
                    }

                    if (!jobRes.getNode().id().equals(res.getNodeId())) {
                        if (log.isDebugEnabled())
                            log.debug("Ignoring stale response as job was already resent to other node [res=" + res +
                                ", jobRes=" + jobRes + ']');

                        // Prevent processing 2 responses for the same job simultaneously.
                        jobRes.setOccupied(true);

                        selfOccupied = true;

                        // We can not return here because there can be more delayed messages in the queue.
                        continue;
                    }

                    if (jobRes.isOccupied()) {
                        if (log.isDebugEnabled())
                            log.debug("Adding response to delayed queue (job is either being sent or processing " +
                                "another response): " + res);

                        delayedRess.offer(res);

                        return;
                    }

                    if (lockRespProc) {
                        delayedRess.offer(res);

                        return;
                    }

                    lockRespProc = true;

                    selfOccupied = true;

                    // Prevent processing 2 responses for the same job simultaneously.
                    jobRes.setOccupied(true);

                    // We don't keep reference to job if results are not cached.
                    if (!resCache)
                        this.jobRes.remove(res.getJobId());
                }

                if (res.getFakeException() != null)
                    jobRes.onResponse(null, res.getFakeException(), null, false);
                else {
                    ClassLoader clsLdr = dep.classLoader();

                    try {
                        boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();

                        Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(),
                            U.resolveClassLoader(clsLdr, ctx.config()));

                        IgniteException ex = loc ? res.getException() :
                            U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(),
                                U.resolveClassLoader(clsLdr, ctx.config()));

                        Map<Object, Object> attrs = loc ? res.getJobAttributes() :
                            U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(),
                                U.resolveClassLoader(clsLdr, ctx.config()));

                        jobRes.onResponse(res0, ex, attrs, res.isCancelled());

                        if (loc)
                            ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
                    }
                    catch (IgniteCheckedException e) {
                        U.error(log, "Error deserializing job response: " + res, e);

                        finishTask(null, e);
                    }
                }

                List<ComputeJobResult> results;

                if (!resCache)
                    results = Collections.emptyList();
                else {
                    synchronized (mux) {
                        results = getRemoteResults();
                    }
                }

                ComputeJobResultPolicy plc = result(jobRes, results);

                if (plc == null) {
                    String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " +
                        "method that returned null (will fail the whole task): " + jobRes;

                    finishTask(null, new IgniteCheckedException(errMsg));

                    return;
                }

                boolean retry = false;
                synchronized (mux) {
                    // If task is not waiting for responses,
                    // then there is no point to proceed.
                    if (state != State.WAITING) {
                        if (log.isDebugEnabled())
                            log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" +
                                "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');

                        return;
                    }

                    if (res.retry()) {
                        // Retry is used only with affinity call / run.
                        assert affCacheIds != null;
                        retry = true;

                        mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.discovery().topologyVersionEx());
                        affFut = ctx.cache().context().exchange().affinityReadyFuture(mapTopVer);

                        if (affFut != null && !affFut.isDone()) {
                            waitForAffTop = true;

                            jobRes.resetResponse();
                        }
                    } else {
                        switch (plc) {
                            // Start reducing all results received so far.
                            case REDUCE: {
                                state = State.REDUCING;

                                break;
                            }

                            // Keep waiting if there are more responses to come,
                            // otherwise, reduce.
                            case WAIT: {
                                assert results.size() <= this.jobRes.size();

                                // If there are more results to wait for.
                                // If result cache is disabled, then we reduce
                                // when both collections are empty.
                                if (results.size() == this.jobRes.size()) {
                                    plc = ComputeJobResultPolicy.REDUCE;

                                    // All results are received, proceed to reduce method.
                                    state = State.REDUCING;
                                }

                                break;
                            }

                            case FAILOVER: {
                                if (affCacheIds != null) {
                                    mapTopVer = ctx.discovery().topologyVersionEx();

                                    affFut = ctx.cache().context().exchange().affinityReadyFuture(mapTopVer);
                                }

                                if (affFut != null && !affFut.isDone()) {
                                    waitForAffTop = true;

                                    jobRes.resetResponse();
                                }
                                else if (!failover(res, jobRes, getTaskTopology()))
                                    plc = null;

                                break;
                            }
                        }
                    }
                }

                // Outside of synchronization.
                if (retry && !waitForAffTop) {
                    // Handle retry
                    retryAttemptCnt++;

                    final long wait = retryAttemptCnt * RETRY_DELAY_MS;
                    sendRetryRequest(wait, jobRes, res);
                }
                else if (plc != null && !waitForAffTop && !retry) {
                    // Handle failover.
                    if (plc == FAILOVER)
                        sendFailoverRequest(jobRes);
                    else {
                        evtLsnr.onJobFinished(this, jobRes.getSibling());

                        if (plc == ComputeJobResultPolicy.REDUCE)
                            reduce(results);
                    }
                }
            }
            catch (IgniteCheckedException e) {
                U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);

                finishTask(null, e);

                waitForAffTop = false;
            }
            finally {
                // Open up job for processing responses.
                // Only unset occupied flag, if it was
                // set in this method.
                if (selfOccupied) {
                    assert jobRes != null;

                    synchronized (mux) {
                        jobRes.setOccupied(false);

                        lockRespProc = false;
                    }

                    // Process delayed responses if there are any.
                    res = delayedRess.poll();
                }
            }

            if (waitForAffTop && affFut != null) {
                affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {
                    @Override public void apply(IgniteInternalFuture<?> fut0) {
                        ctx.closure().runLocalSafe(new Runnable() {
                            @Override public void run() {
                                onResponse(failoverRes);
                            }
                        }, false);
                    }
                });
            }
        }
    }

    /**
     * @param waitms Waitms.
     * @param jRes Job result.
     * @param resp Job responce.
     */
    private void sendRetryRequest(final long waitms, final GridJobResultImpl jRes, final GridJobExecuteResponse resp) {
        ctx.timeout().schedule(new Runnable() {
            @Override public void run() {
                ctx.closure().runLocalSafe(new Runnable() {
                    @Override public void run() {
                        try {
                            ClusterNode newNode = ctx.affinity().mapPartitionToNode(affCacheName, affPartId,
                                mapTopVer);

                            if(!checkTargetNode(resp, jRes, newNode))
                                return;

                            sendRequest(jRes);
                        }
                        catch (Exception e) {
                            U.error(log, "Failed to re-map job or retry request [ses=" + ses + "]", e);

                            finishTask(null, e);
                        }
                    }
                }, false);
            }
        }, waitms, -1);
    }

    /**
     * @param jobRes Job result.
     * @param results Existing job results.
     * @return Job result policy.
     */
    @SuppressWarnings({"CatchGenericClass"})
    @Nullable private ComputeJobResultPolicy result(final ComputeJobResult jobRes, final List<ComputeJobResult> results) {
        assert !Thread.holdsLock(mux);

        return U.wrapThreadLoader(dep.classLoader(), new CO<ComputeJobResultPolicy>() {
            @Nullable @Override public ComputeJobResultPolicy apply() {
                try {
                    // Obtain job result policy.
                    ComputeJobResultPolicy plc = null;

                    try {
                        plc = task.result(jobRes, results);

                        if (plc == FAILOVER && noFailover) {
                            IgniteException e = jobRes.getException();

                            if (e != null)
                                throw e;

                            plc = WAIT;
                        }
                    }
                    finally {
                        recordJobEvent(EVT_JOB_RESULTED, jobRes.getJobContext().getJobId(),
                            jobRes.getNode(), "Job got resulted with: " + plc);
                    }

                    if (log.isDebugEnabled())
                        log.debug("Obtained job result policy [policy=" + plc + ", ses=" + ses + ']');

                    return plc;
                }
                catch (IgniteException e) {
                    if (X.hasCause(e, GridInternalException.class) ||
                        X.hasCause(e, IgfsOutOfSpaceException.class)) {
                        // Print internal exceptions only if debug is enabled.
                        if (log.isDebugEnabled())
                            U.error(log, "Failed to obtain remote job result policy for result from " +
                                "ComputeTask.result(..) method (will fail the whole task): " + jobRes, e);
                    }
                    else if (X.hasCause(e, ComputeJobFailoverException.class)) {
                        IgniteCheckedException e0 = new IgniteCheckedException(" Job was not failed over because " +
                            "ComputeJobResultPolicy.FAILOVER was not returned from " +
                            "ComputeTask.result(...) method for job result with ComputeJobFailoverException.", e);

                        finishTask(null, e0);

                        return null;
                    }
                    else if (X.hasCause(e, GridServiceNotFoundException.class) ||
                        X.hasCause(e, ClusterTopologyCheckedException.class)) {
                        // Should be throttled, because GridServiceProxy continuously retry getting service.
                        LT.error(log, e, "Failed to obtain remote job result policy for result from " +
                            "ComputeTask.result(..) method (will fail the whole task): " + jobRes);
                    }
                    else
                        U.error(log, "Failed to obtain remote job result policy for result from " +
                            "ComputeTask.result(..) method (will fail the whole task): " + jobRes, e);

                    finishTask(null, e);

                    return null;
                }
                catch (Throwable e) {
                    String errMsg = "Failed to obtain remote job result policy for result from" +
                        "ComputeTask.result(..) method due to undeclared user exception " +
                        "(will fail the whole task): " + jobRes;

                    U.error(log, errMsg, e);

                    Throwable tmp = new ComputeUserUndeclaredException(errMsg, e);

                    // Failed to successfully obtain result policy and
                    // hence forced to fail the whole deployed task.
                    finishTask(null, tmp);

                    if (e instanceof Error)
                        throw e;

                    return null;
                }
            }
        });
    }

    /**
     * @param results Job results.
     */
    private void reduce(final List<ComputeJobResult> results) {
        R reduceRes = null;
        Throwable userE = null;

        try {
            try {
                // Reduce results.
                reduceRes = U.wrapThreadLoader(dep.classLoader(), new Callable<R>() {
                    @Nullable @Override public R call() {
                        return task.reduce(results);
                    }
                });
            }
            finally {
                synchronized (mux) {
                    assert state == State.REDUCING : "Invalid task state: " + state;

                    state = State.REDUCED;
                }
            }

            if (log.isDebugEnabled())
                log.debug("Reduced job responses [reduceRes=" + reduceRes + ", ses=" + ses + ']');

            recordTaskEvent(EVT_TASK_REDUCED, "Task reduced.");
        }
        catch (ClusterTopologyCheckedException e) {
            U.warn(log, "Failed to reduce job results for task (any nodes from task topology left grid?): " + task);

            userE = e;
        }
        catch (IgniteCheckedException e) {
            U.error(log, "Failed to reduce job results for task: " + task, e);

            userE = e;
        }
        // Catch Throwable to protect against bad user code.
        catch (Throwable e) {
            String errMsg = "Failed to reduce job results due to undeclared user exception [task=" + task +
                ", err=" + e + ']';

            U.error(log, errMsg, e);

            userE = new ComputeUserUndeclaredException(errMsg ,e);

            if (e instanceof Error)
                throw e;
        }
        finally {
            finishTask(reduceRes, userE);
        }
    }

    /**
     * @param res Execution response.
     * @param jobRes Job result.
     * @param top Topology.
     * @return {@code True} if fail-over SPI returned a new node.
     */
    private boolean failover(
        GridJobExecuteResponse res,
        GridJobResultImpl jobRes,
        Collection<? extends ClusterNode> top
    ) {
        assert Thread.holdsLock(mux);

        try {
            ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobBeforeFailover.class);

            ClusterNode node = ctx.failover().failover(ses, jobRes, new ArrayList<>(top), affPartId,
                affCacheName, mapTopVer);

            return checkTargetNode(res, jobRes, node);
        }
        // Catch Throwable to protect against bad user code.
        catch (Throwable e) {
            String errMsg = "Failed to failover job due to undeclared user exception [job=" +
                jobRes.getJob() + ", err=" + e + ']';

            U.error(log, errMsg, e);

            finishTask(null, new ComputeUserUndeclaredException(errMsg, e));

            if (e instanceof Error)
                throw (Error)e;

            return false;
        }
    }

    /**
     * @param res Execution response.
     * @param jobRes Job result.
     * @param node New target node.
     * @return {@code True} if new target node is not null.
     */
    private boolean checkTargetNode(GridJobExecuteResponse res, GridJobResultImpl jobRes, ClusterNode node) {
        if (node == null) {
            String msg = "Failed to failover a job to another node (failover SPI returned null) [job=" +
                jobRes.getJob() + ", node=" + jobRes.getNode() + ']';

            if (log.isDebugEnabled())
                log.debug(msg);

            Throwable e = new ClusterTopologyCheckedException(msg, jobRes.getException());

            finishTask(null, e);

            return false;
        }

        if (log.isDebugEnabled())
            log.debug("Resolved job failover [newNode=" + node + ", oldNode=" + jobRes.getNode() +
                ", job=" + jobRes.getJob() + ", resMsg=" + res + ']');

        synchronized (mux) {
            jobRes.setNode(node);
            jobRes.resetResponse();

            if (!resCache) {
                    // Store result back in map before sending.
                    this.jobRes.put(res.getJobId(), jobRes);
            }
        }

        return true;
    }

    /**
     * @param jobRes Job result.
     */
    private void sendFailoverRequest(GridJobResultImpl jobRes) {
        // Internal failover notification.
        evtLsnr.onJobFailover(this, jobRes.getSibling(), jobRes.getNode().id());

        long timeout = ses.getEndTime() - U.currentTimeMillis();

        if (timeout > 0) {
            recordJobEvent(EVT_JOB_FAILED_OVER, jobRes.getJobContext().getJobId(),
                jobRes.getNode(), "Job failed over.");

            // Send new reference to remote nodes for execution.
            sendRequest(jobRes);
        }
        else
            // Don't apply 'finishTask(..)' here as it will
            // be called from 'onTimeout(..)' callback.
            U.warn(log, "Failed to fail-over job due to task timeout: " + jobRes);
    }

    /**
     * Interrupts child jobs on remote nodes.
     */
    private void cancelChildren() {
        Collection<GridJobResultImpl> doomed = new LinkedList<>();

        synchronized (mux) {
            // Only interrupt unfinished jobs.
            if (jobRes != null)
                for (GridJobResultImpl res : jobRes.values())
                    if (!res.hasResponse())
                        doomed.add(res);
        }

        // Send cancellation request to all unfinished children.
        for (GridJobResultImpl res : doomed) {
            UUID nodeId = res.getNode().id();

            if (nodeId.equals(ctx.localNodeId()))
                // Cancel local jobs.
                ctx.job().cancelJob(ses.getId(), res.getJobContext().getJobId(), /*courtesy*/true);
            else {
                try {
                    ClusterNode node = ctx.discovery().node(nodeId);

                    if (node != null)
                        ctx.io().sendToGridTopic(node,
                            TOPIC_JOB_CANCEL,
                            new GridJobCancelRequest(ses.getId(), res.getJobContext().getJobId(), /*courtesy*/true),
                            PUBLIC_POOL);
                }
                catch (IgniteCheckedException e) {
                    try {
                        if (!isDeadNode(nodeId))
                            U.error(log, "Failed to send cancel request to node (will ignore) [nodeId=" +
                                nodeId + ", taskName=" + ses.getTaskName() +
                                ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']', e);
                    }
                    catch (IgniteClientDisconnectedCheckedException ignored) {
                        if (log.isDebugEnabled())
                            log.debug("Failed to send cancel request to node, client disconnected [nodeId=" +
                                nodeId + ", taskName=" + ses.getTaskName() + ']');
                    }
                }
            }
        }
    }

    /**
     * @param res Job result.
     */
    private void sendRequest(ComputeJobResult res) {
        assert res != null;

        GridJobExecuteRequest req = null;

        ClusterNode node = res.getNode();

        try {
            ClusterNode curNode = ctx.discovery().node(node.id());

            // Check if node exists prior to sending to avoid cases when a discovery
            // listener notified about node leaving after topology resolution. Note
            // that we make this check because we cannot count on exception being
            // thrown in case of send failure.
            if (curNode == null) {
                U.warn(log, "Failed to send job request because remote node left grid (if fail-over is enabled, " +
                    "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() +
                    ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');

                ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);

                GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(),
                    res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);

                fakeRes.setFakeException(new ClusterTopologyException("Failed to send job due to node failure: " + node));

                onResponse(fakeRes);
            }
            else {
                long timeout = ses.getEndTime() == Long.MAX_VALUE ? Long.MAX_VALUE :
                    ses.getEndTime() - U.currentTimeMillis();

                if (timeout > 0) {
                    boolean loc = node.id().equals(ctx.discovery().localNode().id()) &&
                        !ctx.config().isMarshalLocalJobs();

                    Map<Object, Object> sesAttrs = ses.isFullSupport() ? ses.getAttributes() : null;
                    Map<? extends Serializable, ? extends Serializable> jobAttrs =
                        (Map<? extends Serializable, ? extends Serializable>)res.getJobContext().getAttributes();

                    boolean forceLocDep = internal || !ctx.deploy().enabled();

                    req = new GridJobExecuteRequest(
                        ses.getId(),
                        res.getJobContext().getJobId(),
                        ses.getTaskName(),
                        ses.getUserVersion(),
                        ses.getTaskClassName(),
                        loc ? null : U.marshal(marsh, res.getJob()),
                        loc ? res.getJob() : null,
                        ses.getStartTime(),
                        timeout,
                        ses.getTopology(),
                        loc ? null : U.marshal(marsh, ses.getJobSiblings()),
                        loc ? ses.getJobSiblings() : null,
                        loc ? null : U.marshal(marsh, sesAttrs),
                        loc ? sesAttrs : null,
                        loc ? null : U.marshal(marsh, jobAttrs),
                        loc ? jobAttrs : null,
                        ses.getCheckpointSpi(),
                        dep.classLoaderId(),
                        dep.deployMode(),
                        continuous,
                        dep.participants(),
                        forceLocDep,
                        ses.isFullSupport(),
                        internal,
                        subjId,
                        affCacheIds,
                        affPartId,
                        mapTopVer,
                        ses.executorName());

                    if (loc)
                        ctx.job().processJobExecuteRequest(ctx.discovery().localNode(), req);
                    else {
                        byte plc;

                        if (internal)
                            plc = MANAGEMENT_POOL;
                        else {
                            Byte ctxPlc = getThreadContext(TC_IO_POLICY);

                            if (ctxPlc != null)
                                plc = ctxPlc;
                            else
                                plc = PUBLIC_POOL;
                        }

                        // Send job execution request.
                        ctx.io().sendToGridTopic(node, TOPIC_JOB, req, plc);

                        if (log.isDebugEnabled())
                            log.debug("Sent job request [req=" + req + ", node=" + node + ']');
                    }

                    if (!loc)
                        ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);
                }
                else
                    U.warn(log, "Job timed out prior to sending job execution request: " + res.getJob());
            }
        }
        catch (IgniteCheckedException e) {
            IgniteException fakeErr = null;

            try {
                boolean deadNode = isDeadNode(res.getNode().id());

                // Avoid stack trace if node has left grid.
                if (deadNode) {
                    U.warn(log, "Failed to send job request because remote node left grid (if failover is enabled, " +
                        "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() +
                        ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');

                    fakeErr = new ClusterTopologyException("Failed to send job due to node failure: " + node, e);
                }
                else
                    U.error(log, "Failed to send job request: " + req, e);

            }
            catch (IgniteClientDisconnectedCheckedException e0) {
                if (log.isDebugEnabled())
                    log.debug("Failed to send job request, client disconnected [node=" + node +
                        ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" +
                        res.getJobContext().getJobId() + ']');

                fakeErr = U.convertException(e0);
            }

            GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(),
                res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);

            if (fakeErr == null)
                fakeErr = U.convertException(e);

            fakeRes.setFakeException(fakeErr);

            onResponse(fakeRes);
        }
    }

    /**
     * @param nodeId Node ID.
     */
    void onNodeLeft(UUID nodeId) {
        Collection<GridJobExecuteResponse> resList = null;

        synchronized (mux) {
            // First check if job cares about future responses.
            if (state != State.WAITING)
                return;

            if (jobRes != null) {
                for (GridJobResultImpl jr : jobRes.values()) {
                    if (!jr.hasResponse() && jr.getNode().id().equals(nodeId)) {
                        if (log.isDebugEnabled())
                            log.debug("Creating fake response because node left grid [job=" + jr.getJob() +
                                ", nodeId=" + nodeId + ']');

                        // Artificial response in case if a job is waiting for a response from
                        // non-existent node.
                        GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(nodeId, ses.getId(),
                            jr.getJobContext().getJobId(), null, null, null, null, null, null, false, null);

                        fakeRes.setFakeException(new ClusterTopologyException("Node has left grid: " + nodeId));

                        if (resList == null)
                            resList = new ArrayList<>();

                        resList.add(fakeRes);
                    }
                }
            }
        }

        if (resList == null)
            return;

        // Simulate responses without holding synchronization.
        for (GridJobExecuteResponse res : resList) {
            if (log.isDebugEnabled())
                log.debug("Simulating fake response from left node [res=" + res + ", nodeId=" + nodeId + ']');

            onResponse(res);
        }
    }

    /**
     * @param evtType Event type.
     * @param msg Event message.
     */
    private void recordTaskEvent(int evtType, String msg) {
        if (!internal && ctx.event().isRecordable(evtType)) {
            Event evt = new TaskEvent(
                ctx.discovery().localNode(),
                msg,
                evtType,
                ses.getId(),
                ses.getTaskName(),
                ses.getTaskClassName(),
                internal,
                subjId);

            ctx.event().record(evt);
        }
    }

    /**
     * @param evtType Event type.
     * @param jobId Job ID.
     * @param evtNode Event node.
     * @param msg Event message.
     */
    private void recordJobEvent(int evtType, IgniteUuid jobId, ClusterNode evtNode, String msg) {
        if (!internal && ctx.event().isRecordable(evtType)) {
            JobEvent evt = new JobEvent();

            evt.message(msg);
            evt.node(ctx.discovery().localNode());
            evt.taskName(ses.getTaskName());
            evt.taskClassName(ses.getTaskClassName());
            evt.taskSessionId(ses.getId());
            evt.taskNode(evtNode);
            evt.jobId(jobId);
            evt.type(evtType);
            evt.taskSubjectId(ses.subjectId());

            ctx.event().record(evt);
        }
    }

    /**
     * @return Collection of job results.
     */
    private List<ComputeJobResult> getRemoteResults() {
        assert Thread.holdsLock(mux);

        List<ComputeJobResult> results = new ArrayList<>(jobRes.size());

        for (GridJobResultImpl jobResult : jobRes.values())
            if (jobResult.hasResponse())
                results.add(jobResult);

        return results;
    }

    /**
     * @param res Task result.
     * @param e Exception.
     */
    void finishTask(@Nullable R res, @Nullable Throwable e) {
        finishTask(res, e, true);
    }

    /**
     * @param res Task result.
     * @param e Exception.
     * @param cancelChildren Whether to cancel children in case the task become cancelled.
     */
    void finishTask(@Nullable R res, @Nullable Throwable e, boolean cancelChildren) {
        // Avoid finishing a job more than once from
        // different threads.
        synchronized (mux) {
            if (state == State.REDUCING || state == State.FINISHING)
                return;

            state = State.FINISHING;
        }

        try {
            if (e == null)
                recordTaskEvent(EVT_TASK_FINISHED, "Task finished.");
            else
                recordTaskEvent(EVT_TASK_FAILED, "Task failed.");

            // Clean resources prior to finishing future.
            evtLsnr.onTaskFinished(this);

            if (cancelChildren)
                cancelChildren();
        }
        // Once we marked task as 'Finishing' we must complete it.
        finally {
            fut.onDone(res, e);

            ses.onDone();
        }
    }

    /**
     * Checks whether node is alive or dead.
     *
     * @param uid UID of node to check.
     * @return {@code true} if node is dead, {@code false} is node is alive.
     * @throws IgniteClientDisconnectedCheckedException if ping failed when client disconnected.
     */
    private boolean isDeadNode(UUID uid) throws IgniteClientDisconnectedCheckedException {
        return ctx.discovery().node(uid) == null || !ctx.discovery().pingNode(uid);
    }

    /** {@inheritDoc} */
    @SuppressWarnings("unchecked")
    @Override public boolean equals(Object obj) {
        if (this == obj)
            return true;

        if (obj == null)
            return false;

        assert obj instanceof GridTaskWorker;

        return ses.getId().equals(((GridTaskWorker<T, R>)obj).ses.getId());
    }

    /** {@inheritDoc} */
    @Override public int hashCode() {
        return ses.getId().hashCode();
    }

    /** {@inheritDoc} */
    @Override public String toString() {
        synchronized (mux) {
            return S.toString(GridTaskWorker.class, this);
        }
    }
}