/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.work; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executor; import com.codahale.metrics.Counter; import org.apache.drill.common.SelfCleaningRunnable; import org.apache.drill.common.concurrent.ExtendedLatch; import org.apache.drill.exec.coord.ClusterCoordinator; import org.apache.drill.exec.metrics.DrillMetrics; import org.apache.drill.exec.proto.BitControl.FragmentStatus; import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; import org.apache.drill.exec.proto.ExecProtos.FragmentHandle; import org.apache.drill.exec.proto.GeneralRPCProtos.Ack; import org.apache.drill.exec.proto.UserBitShared.QueryId; import org.apache.drill.exec.proto.helper.QueryIdHelper; import org.apache.drill.exec.rpc.DrillRpcFuture; import org.apache.drill.exec.rpc.RpcException; import org.apache.drill.exec.rpc.control.Controller; import org.apache.drill.exec.rpc.control.WorkEventBus; import org.apache.drill.exec.rpc.data.DataConnectionCreator; import org.apache.drill.exec.server.BootStrapContext; import org.apache.drill.exec.server.DrillbitContext; import org.apache.drill.exec.store.sys.PersistentStoreProvider; import org.apache.drill.exec.work.batch.ControlMessageHandler; import org.apache.drill.exec.work.foreman.Foreman; import org.apache.drill.exec.work.foreman.QueryManager; import org.apache.drill.exec.work.fragment.FragmentExecutor; import org.apache.drill.exec.work.fragment.FragmentManager; import org.apache.drill.exec.work.user.UserWorker; import com.codahale.metrics.Gauge; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * Manages the running fragments in a Drillbit. Periodically requests run-time stats updates from fragments * running elsewhere. */ public class WorkManager implements AutoCloseable { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(WorkManager.class); /* * We use a {@see java.util.concurrent.ConcurrentHashMap} because it promises never to throw a * {@see java.util.ConcurrentModificationException}; we need that because the statusThread may * iterate over the map while other threads add FragmentExecutors via the {@see #WorkerBee}. */ private final Map<FragmentHandle, FragmentExecutor> runningFragments = new ConcurrentHashMap<>(); private final ConcurrentMap<QueryId, Foreman> queries = Maps.newConcurrentMap(); private final BootStrapContext bContext; private DrillbitContext dContext; private final ControlMessageHandler controlMessageWorker; private final UserWorker userWorker; private final WorkerBee bee; private final WorkEventBus workBus; private final Executor executor; private final StatusThread statusThread; /** * How often the StatusThread collects statistics about running fragments. */ private final static int STATUS_PERIOD_SECONDS = 5; public WorkManager(final BootStrapContext context) { this.bContext = context; bee = new WorkerBee(); // TODO should this just be an interface? workBus = new WorkEventBus(); // TODO should this just be an interface? executor = context.getExecutor(); // TODO references to this escape here (via WorkerBee) before construction is done controlMessageWorker = new ControlMessageHandler(bee); // TODO getFragmentRunner(), getForemanForQueryId() userWorker = new UserWorker(bee); // TODO should just be an interface? addNewForeman(), getForemanForQueryId() statusThread = new StatusThread(); } public void start( final DrillbitEndpoint endpoint, final Controller controller, final DataConnectionCreator data, final ClusterCoordinator coord, final PersistentStoreProvider provider) { dContext = new DrillbitContext(endpoint, bContext, coord, controller, data, workBus, provider); statusThread.start(); DrillMetrics.register("drill.fragments.running", new Gauge<Integer>() { @Override public Integer getValue() { return runningFragments.size(); } }); } public Executor getExecutor() { return executor; } public WorkEventBus getWorkBus() { return workBus; } public ControlMessageHandler getControlMessageHandler() { return controlMessageWorker; } public UserWorker getUserWorker() { return userWorker; } public WorkerBee getBee() { return bee; } @Override public void close() throws Exception { statusThread.interrupt(); final long numRunningFragments = runningFragments.size(); if (numRunningFragments != 0) { logger.warn("Closing WorkManager but there are {} running fragments.", numRunningFragments); if (logger.isDebugEnabled()) { for (final FragmentHandle handle : runningFragments.keySet()) { logger.debug("Fragment still running: {} status: {}", QueryIdHelper.getQueryIdentifier(handle), runningFragments.get(handle).getStatus()); } } } getContext().close(); } public DrillbitContext getContext() { return dContext; } private ExtendedLatch exitLatch = null; // used to wait to exit when things are still running /** * Waits until it is safe to exit. Blocks until all currently running fragments have completed. * * <p>This is intended to be used by {@link org.apache.drill.exec.server.Drillbit#close()}.</p> */ public void waitToExit() { synchronized(this) { if (queries.isEmpty() && runningFragments.isEmpty()) { return; } exitLatch = new ExtendedLatch(); } // Wait for at most 5 seconds or until the latch is released. exitLatch.awaitUninterruptibly(5000); } /** * If it is safe to exit, and the exitLatch is in use, signals it so that waitToExit() will * unblock. */ private void indicateIfSafeToExit() { synchronized(this) { if (exitLatch != null) { if (queries.isEmpty() && runningFragments.isEmpty()) { exitLatch.countDown(); } } } } /** * Narrowed interface to WorkManager that is made available to tasks it is managing. */ public class WorkerBee { public void addNewForeman(final Foreman foreman) { queries.put(foreman.getQueryId(), foreman); // We're relying on the Foreman to clean itself up with retireForeman(). executor.execute(foreman); } /** * Add a self contained runnable work to executor service. * @param runnable */ public void addNewWork(final Runnable runnable) { executor.execute(runnable); } /** * Remove the given Foreman from the running query list. * * <p>The running query list is a bit of a misnomer, because it doesn't * necessarily mean that {@link org.apache.drill.exec.work.foreman.Foreman#run()} * is executing. That only lasts for the duration of query setup, after which * the Foreman instance survives as a state machine that reacts to events * from the local root fragment as well as RPC responses from remote Drillbits.</p> * * @param foreman the Foreman to retire */ public void retireForeman(final Foreman foreman) { Preconditions.checkNotNull(foreman); final QueryId queryId = foreman.getQueryId(); final boolean wasRemoved = queries.remove(queryId, foreman); if (!wasRemoved) { logger.warn("Couldn't find retiring Foreman for query " + queryId); // throw new IllegalStateException("Couldn't find retiring Foreman for query " + queryId); } indicateIfSafeToExit(); } public Foreman getForemanForQueryId(final QueryId queryId) { return queries.get(queryId); } public DrillbitContext getContext() { return dContext; } /** * Currently used to start a root fragment that is not blocked on data, and leaf fragments. * @param fragmentExecutor the executor to run */ public void addFragmentRunner(final FragmentExecutor fragmentExecutor) { final FragmentHandle fragmentHandle = fragmentExecutor.getContext().getHandle(); runningFragments.put(fragmentHandle, fragmentExecutor); executor.execute(new SelfCleaningRunnable(fragmentExecutor) { @Override protected void cleanup() { runningFragments.remove(fragmentHandle); indicateIfSafeToExit(); } }); } /** * Currently used to start a root fragment that is blocked on data, and intermediate fragments. This method is * called, when the first batch arrives. * * @param fragmentManager the manager for the fragment */ public void startFragmentPendingRemote(final FragmentManager fragmentManager) { final FragmentHandle fragmentHandle = fragmentManager.getHandle(); final FragmentExecutor fragmentExecutor = fragmentManager.getRunnable(); if (fragmentExecutor == null) { // the fragment was most likely cancelled return; } runningFragments.put(fragmentHandle, fragmentExecutor); executor.execute(new SelfCleaningRunnable(fragmentExecutor) { @Override protected void cleanup() { runningFragments.remove(fragmentHandle); workBus.removeFragmentManager(fragmentHandle); indicateIfSafeToExit(); } }); } public FragmentExecutor getFragmentRunner(final FragmentHandle handle) { return runningFragments.get(handle); } } /** * Periodically gather current statistics. {@link QueryManager} uses a FragmentStatusListener to * maintain changes to state, and should be current. However, we want to collect current statistics * about RUNNING queries, such as current memory consumption, number of rows processed, and so on. * The FragmentStatusListener only tracks changes to state, so the statistics kept there will be * stale; this thread probes for current values. */ private class StatusThread extends Thread { public StatusThread() { // assume this thread is created by a non-daemon thread setName("WorkManager.StatusThread"); } @Override public void run() { while(true) { final Controller controller = dContext.getController(); final List<DrillRpcFuture<Ack>> futures = Lists.newArrayList(); for(final FragmentExecutor fragmentExecutor : runningFragments.values()) { final FragmentStatus status = fragmentExecutor.getStatus(); if (status == null) { continue; } final DrillbitEndpoint ep = fragmentExecutor.getContext().getForemanEndpoint(); futures.add(controller.getTunnel(ep).sendFragmentStatus(status)); } for(final DrillRpcFuture<Ack> future : futures) { try { future.checkedGet(); } catch(final RpcException ex) { logger.info("Failure while sending intermediate fragment status to Foreman", ex); } } try { Thread.sleep(STATUS_PERIOD_SECONDS * 1000); } catch(final InterruptedException e) { // Preserve evidence that the interruption occurred so that code higher up on the call stack can learn of the // interruption and respond to it if it wants to. Thread.currentThread().interrupt(); // exit status thread on interrupt. break; } } } } }