/* * Copyright 2016-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package com.facebook.buck.distributed; import com.facebook.buck.distributed.thrift.CoordinatorService; import com.facebook.buck.distributed.thrift.FinishedBuildingRequest; import com.facebook.buck.distributed.thrift.FinishedBuildingResponse; import com.facebook.buck.distributed.thrift.GetTargetsToBuildAction; import com.facebook.buck.distributed.thrift.GetTargetsToBuildRequest; import com.facebook.buck.distributed.thrift.GetTargetsToBuildResponse; import com.facebook.buck.distributed.thrift.StampedeId; import com.facebook.buck.log.Logger; import com.facebook.buck.slb.ThriftException; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import java.io.Closeable; import java.io.IOException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import javax.annotation.Nullable; import org.apache.thrift.TException; import org.apache.thrift.server.TThreadedSelectorServer; import org.apache.thrift.transport.TNonblockingServerSocket; import org.apache.thrift.transport.TTransportException; public class ThriftCoordinatorServer implements Closeable { private static final Logger LOG = Logger.get(ThriftCoordinatorServer.class); private static final long MAX_TEAR_DOWN_MILLIS = TimeUnit.SECONDS.toMillis(2); private static final long MAX_DIST_BUILD_DURATION_MILLIS = TimeUnit.HOURS.toMillis(2); // TODO(ruibm): Find some heuristic to compute this. private static final int MAX_TARGETS_ALLOCATED_PER_MINION = 2; private final MinionWorkloadAllocator allocator; private final int port; private final CoordinatorServiceHandler handler; private final CoordinatorService.Processor<CoordinatorService.Iface> processor; private final Object lock; private final CompletableFuture<Integer> exitCodeFuture; private final StampedeId stampedeId; @Nullable private TNonblockingServerSocket transport; @Nullable private TThreadedSelectorServer server; @Nullable private Thread serverThread; public ThriftCoordinatorServer(int port, BuildTargetsQueue queue, StampedeId stampedeId) { this.stampedeId = stampedeId; this.lock = new Object(); this.exitCodeFuture = new CompletableFuture<>(); this.allocator = new MinionWorkloadAllocator(queue, MAX_TARGETS_ALLOCATED_PER_MINION); this.port = port; this.handler = new CoordinatorServiceHandler(); this.processor = new CoordinatorService.Processor<CoordinatorService.Iface>(handler); } public ThriftCoordinatorServer start() throws IOException { synchronized (lock) { try { transport = new TNonblockingServerSocket(this.port); } catch (TTransportException e) { throw new ThriftException(e); } TThreadedSelectorServer.Args serverArgs = new TThreadedSelectorServer.Args(transport); serverArgs.processor(processor); server = new TThreadedSelectorServer(serverArgs); serverThread = new Thread(() -> Preconditions.checkNotNull(server).serve()); serverThread.start(); } return this; } public ThriftCoordinatorServer stop() throws IOException { synchronized (lock) { Preconditions.checkNotNull(server, "Server has already been stopped.").stop(); server = null; try { Preconditions.checkNotNull(serverThread).join(MAX_TEAR_DOWN_MILLIS); } catch (InterruptedException e) { throw new IOException("Coordinator thrift server took too long to tear down.", e); } finally { serverThread = null; } } return this; } public int getPort() { return port; } @Override public void close() throws IOException { if (server != null) { stop(); } } public Future<Integer> getExitCode() { return exitCodeFuture; } private void setBuildExitCode(int exitCode) { exitCodeFuture.complete(exitCode); } public int waitUntilBuildCompletesAndReturnExitCode() { try { LOG.verbose("Coordinator going into blocking wait mode..."); return getExitCode().get(MAX_DIST_BUILD_DURATION_MILLIS, TimeUnit.MILLISECONDS); } catch (ExecutionException | TimeoutException | InterruptedException e) { LOG.error(e); throw new RuntimeException("The distributed build Coordinator was interrupted.", e); } } private class CoordinatorServiceHandler implements CoordinatorService.Iface { @Override public GetTargetsToBuildResponse getTargetsToBuild(GetTargetsToBuildRequest request) throws TException { LOG.debug( String.format("Minion [%s] is requesting for new targets to build.", request.minionId)); checkBuildId(request.getStampedeId()); synchronized (lock) { Preconditions.checkArgument(request.isSetMinionId()); GetTargetsToBuildResponse response = new GetTargetsToBuildResponse(); if (allocator.isBuildFinished()) { LOG.debug( String.format( "Minion [%s] is being told to exit because the build has finished.", request.minionId)); return response.setAction(GetTargetsToBuildAction.CLOSE_CLIENT); } ImmutableList<String> targets = allocator.getTargetsToBuild(request.getMinionId()); if (targets.isEmpty()) { LOG.debug( String.format( "Minion [%s] is being told to retry getting more workload later.", request.minionId)); return response.setAction(GetTargetsToBuildAction.RETRY_LATER); } else { LOG.debug( String.format( "Minion [%s] is being handed [%d] BuildTargets to build: [%s]", request.minionId, targets.size(), Joiner.on(", ").join(targets))); return response.setAction(GetTargetsToBuildAction.BUILD_TARGETS).setBuildTargets(targets); } } } @Override public FinishedBuildingResponse finishedBuilding(FinishedBuildingRequest request) throws TException { LOG.info(String.format("Minion [%s] has finished building.", request.getMinionId())); checkBuildId(request.getStampedeId()); synchronized (lock) { Preconditions.checkArgument(request.isSetMinionId()); Preconditions.checkArgument(request.isSetBuildExitCode()); FinishedBuildingResponse response = new FinishedBuildingResponse(); if (request.getBuildExitCode() != 0) { setBuildExitCode(request.getBuildExitCode()); response.setContinueBuilding(false); } else { allocator.finishedBuildingTargets(request.getMinionId()); if (getExitCode().isDone()) { response.setContinueBuilding(false); } else { if (allocator.isBuildFinished()) { // Build has finished in all Minions successfully!! setBuildExitCode(0); response.setContinueBuilding(false); } else { response.setContinueBuilding(true); } } } return response; } } private void checkBuildId(StampedeId buildId) { Preconditions.checkArgument( stampedeId.equals(buildId), "Request stampede build id [%s] does not match the current build id [%s].", buildId.getId(), stampedeId.getId()); } } }