/* * Copyright 2016-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package com.facebook.buck.distributed; import com.facebook.buck.distributed.thrift.BuckVersion; import com.facebook.buck.distributed.thrift.BuildJob; import com.facebook.buck.distributed.thrift.BuildJobState; import com.facebook.buck.distributed.thrift.BuildMode; import com.facebook.buck.distributed.thrift.BuildSlaveEvent; import com.facebook.buck.distributed.thrift.BuildSlaveEventsQuery; import com.facebook.buck.distributed.thrift.BuildSlaveStatus; import com.facebook.buck.distributed.thrift.BuildStatus; import com.facebook.buck.distributed.thrift.LogLineBatchRequest; import com.facebook.buck.distributed.thrift.LogRecord; import com.facebook.buck.distributed.thrift.MultiGetBuildSlaveLogDirResponse; import com.facebook.buck.distributed.thrift.MultiGetBuildSlaveRealTimeLogsResponse; import com.facebook.buck.distributed.thrift.RunId; import com.facebook.buck.distributed.thrift.StampedeId; import com.facebook.buck.event.BuckEventBus; import com.facebook.buck.event.ConsoleEvent; import com.facebook.buck.io.ProjectFilesystem; import com.facebook.buck.log.Logger; import com.facebook.buck.model.Pair; import com.facebook.buck.util.HumanReadableException; import com.facebook.buck.util.cache.FileHashCache; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; public class DistBuildClientExecutor { private static final Logger LOG = Logger.get(DistBuildClientExecutor.class); private static final DateFormat DATE_FORMAT = new SimpleDateFormat("[yyyy-MM-dd HH:mm:ss.SSS]"); private static final int DEFAULT_STATUS_POLL_INTERVAL_MILLIS = 1000; private final DistBuildService distBuildService; private final DistBuildLogStateTracker distBuildLogStateTracker; private final BuildJobState buildJobState; private final BuckVersion buckVersion; private final DistBuildClientStatsTracker distBuildClientStats; private final ScheduledExecutorService scheduler; private final int statusPollIntervalMillis; private final Map<RunId, Integer> nextEventIdBySlaveRunId = new HashMap<>(); public static class ExecutionResult { public final StampedeId stampedeId; public final int exitCode; public ExecutionResult(StampedeId stampedeId, int exitCode) { this.stampedeId = stampedeId; this.exitCode = exitCode; } } public DistBuildClientExecutor( BuildJobState buildJobState, DistBuildService distBuildService, DistBuildLogStateTracker distBuildLogStateTracker, BuckVersion buckVersion, DistBuildClientStatsTracker distBuildClientStats, ScheduledExecutorService scheduler, int statusPollIntervalMillis) { this.buildJobState = buildJobState; this.distBuildService = distBuildService; this.distBuildLogStateTracker = distBuildLogStateTracker; this.buckVersion = buckVersion; this.distBuildClientStats = distBuildClientStats; this.scheduler = scheduler; this.statusPollIntervalMillis = statusPollIntervalMillis; } public DistBuildClientExecutor( BuildJobState buildJobState, DistBuildService distBuildService, DistBuildLogStateTracker distBuildLogStateTracker, BuckVersion buckVersion, DistBuildClientStatsTracker distBuildClientStats, ScheduledExecutorService scheduler) { this( buildJobState, distBuildService, distBuildLogStateTracker, buckVersion, distBuildClientStats, scheduler, DEFAULT_STATUS_POLL_INTERVAL_MILLIS); } private BuildJob initBuild( ListeningExecutorService networkExecutorService, ProjectFilesystem projectFilesystem, FileHashCache fileHashCache, BuckEventBus eventBus, BuildMode buildMode, int numberOfMinions) throws IOException, InterruptedException { distBuildClientStats.startCreateBuildTimer(); BuildJob job = distBuildService.createBuild(buildMode, numberOfMinions); distBuildClientStats.stopCreateBuildTimer(); final StampedeId stampedeId = job.getStampedeId(); distBuildClientStats.setStampedeId(stampedeId.getId()); LOG.info("Created job. Build id = " + stampedeId.getId()); logDebugInfo(job); postDistBuildStatusEvent(eventBus, job, ImmutableList.of(), "UPLOADING DATA"); List<ListenableFuture<?>> asyncJobs = new LinkedList<>(); LOG.info("Uploading local changes."); asyncJobs.add( distBuildService.uploadMissingFilesAsync( buildJobState.fileHashes, distBuildClientStats, networkExecutorService)); LOG.info("Uploading target graph."); asyncJobs.add( networkExecutorService.submit( () -> { try { distBuildService.uploadTargetGraph(buildJobState, stampedeId, distBuildClientStats); } catch (IOException e) { throw new RuntimeException("Failed to upload target graph with exception.", e); } })); LOG.info("Uploading buck dot-files."); asyncJobs.add( distBuildService.uploadBuckDotFilesAsync( stampedeId, projectFilesystem, fileHashCache, distBuildClientStats, networkExecutorService)); try { Futures.allAsList(asyncJobs).get(); } catch (ExecutionException e) { LOG.error("Upload failed."); throw new RuntimeException(e); } postDistBuildStatusEvent(eventBus, job, ImmutableList.of(), "STARTING REMOTE BUILD"); distBuildService.setBuckVersion(stampedeId, buckVersion, distBuildClientStats); LOG.info("Set Buck Version. Build status: " + job.getStatus().toString()); distBuildClientStats.startPerformDistributedBuildTimer(); job = distBuildService.startBuild(stampedeId); LOG.info("Started job. Build status: " + job.getStatus().toString()); logDebugInfo(job); return job; } private void checkTerminateScheduledUpdates( BuildJob job, Optional<List<BuildSlaveStatus>> slaveStatuses) { if (job.getStatus().equals(BuildStatus.FINISHED_SUCCESSFULLY) || job.getStatus().equals(BuildStatus.FAILED)) { // Terminate scheduled tasks with a custom exception to indicate success. throw new JobCompletedException(job, slaveStatuses); } } private BuildJob fetchBuildInformationFromServer( BuildJob job, BuckEventBus eventBus, ListeningExecutorService networkExecutorService) { final StampedeId stampedeId = job.getStampedeId(); try { job = distBuildService.getCurrentBuildJobState(stampedeId); } catch (IOException e) { throw new RuntimeException(e); } LOG.info("Got build status: " + job.getStatus().toString()); if (!job.isSetSlaveInfoByRunId()) { postDistBuildStatusEvent(eventBus, job, ImmutableList.of()); checkTerminateScheduledUpdates(job, Optional.empty()); return job; } ListenableFuture<?> slaveEventsFuture = fetchAndPostBuildSlaveEventsAsync(job, eventBus, networkExecutorService); ListenableFuture<List<BuildSlaveStatus>> slaveStatusesFuture = fetchBuildSlaveStatusesAsync(job, networkExecutorService); ListenableFuture<?> logStreamingFuture = fetchAndProcessRealTimeSlaveLogsAsync(job, networkExecutorService); List<BuildSlaveStatus> slaveStatuses = ImmutableList.of(); try { slaveStatuses = slaveStatusesFuture.get(); postDistBuildStatusEvent(eventBus, job, slaveStatuses); slaveEventsFuture.get(); logStreamingFuture.get(); } catch (ExecutionException e) { LOG.error(e, "Failed to get slave statuses, events or logs."); } catch (InterruptedException e) { throw new RuntimeException(e); } checkTerminateScheduledUpdates(job, Optional.of(slaveStatuses)); return job; } public ExecutionResult executeAndPrintFailuresToEventBus( ListeningExecutorService networkExecutorService, ProjectFilesystem projectFilesystem, FileHashCache fileHashCache, BuckEventBus eventBus, BuildMode buildMode, int numberOfMinions) throws IOException, InterruptedException { final BuildJob initJob = initBuild( networkExecutorService, projectFilesystem, fileHashCache, eventBus, buildMode, numberOfMinions); nextEventIdBySlaveRunId.clear(); ScheduledFuture<?> distBuildStatusUpdatingFuture = scheduler.scheduleWithFixedDelay( () -> fetchBuildInformationFromServer(initJob, eventBus, networkExecutorService), 0, statusPollIntervalMillis, TimeUnit.MILLISECONDS); final List<BuildSlaveStatus> buildSlaveStatusList; BuildJob finalJob; try { distBuildStatusUpdatingFuture.get(); throw new RuntimeException("Unreachable State."); } catch (ExecutionException e) { if (e.getCause() instanceof JobCompletedException) { // Everything is awesome. finalJob = ((JobCompletedException) e.getCause()).getDistBuildJob(); buildSlaveStatusList = ((JobCompletedException) e.getCause()) .getBuildSlaveStatuses() .orElse(ImmutableList.of()); } else { throw new HumanReadableException(e, "Failed to fetch build information from server."); } } finally { distBuildClientStats.stopPerformDistributedBuildTimer(); } postDistBuildStatusEvent(eventBus, finalJob, buildSlaveStatusList, "FETCHING LOG DIRS"); materializeSlaveLogDirs(finalJob); if (finalJob.getStatus().equals(BuildStatus.FINISHED_SUCCESSFULLY)) { LOG.info("DistBuild was successful!"); postDistBuildStatusEvent(eventBus, finalJob, buildSlaveStatusList, "FINISHED"); } else { LOG.info("DistBuild was not successful!"); postDistBuildStatusEvent(eventBus, finalJob, buildSlaveStatusList, "FAILED"); } logDebugInfo(finalJob); return new ExecutionResult( finalJob.getStampedeId(), finalJob.getStatus().equals(BuildStatus.FINISHED_SUCCESSFULLY) ? 0 : 1); } private void postDistBuildStatusEvent( BuckEventBus eventBus, BuildJob job, List<BuildSlaveStatus> slaveStatuses) { postDistBuildStatusEvent(eventBus, job, slaveStatuses, null); } private void postDistBuildStatusEvent( BuckEventBus eventBus, BuildJob job, List<BuildSlaveStatus> slaveStatuses, String statusOverride) { Optional<List<LogRecord>> logBook = Optional.empty(); Optional<String> lastLine = Optional.empty(); if (job.isSetDebug() && job.getDebug().isSetLogBook()) { logBook = Optional.of(job.getDebug().getLogBook()); if (logBook.get().size() > 0) { lastLine = Optional.of(logBook.get().get(logBook.get().size() - 1).getName()); } } String stage = statusOverride == null ? job.getStatus().toString() : statusOverride; DistBuildStatus status = DistBuildStatus.builder() .setStatus(stage) .setMessage(lastLine) .setLogBook(logBook) .setSlaveStatuses(slaveStatuses) .build(); eventBus.post(new DistBuildStatusEvent(status)); } private void logDebugInfo(BuildJob job) { if (job.isSetDebug() && job.getDebug().getLogBook().size() > 0) { LOG.debug("Debug info: "); for (LogRecord log : job.getDebug().getLogBook()) { LOG.debug(DATE_FORMAT.format(new Date(log.getTimestampMillis())) + log.getName()); } } } @VisibleForTesting ListenableFuture<List<BuildSlaveStatus>> fetchBuildSlaveStatusesAsync( BuildJob job, ListeningExecutorService networkExecutorService) { if (!job.isSetSlaveInfoByRunId()) { return Futures.immediateFuture(ImmutableList.of()); } StampedeId stampedeId = job.getStampedeId(); List<ListenableFuture<Optional<BuildSlaveStatus>>> slaveStatusFutures = new LinkedList<>(); // TODO(shivanker, alisdair): Replace this with a multiFetch request. for (String id : job.getSlaveInfoByRunId().keySet()) { RunId runId = new RunId(); runId.setId(id); slaveStatusFutures.add( networkExecutorService.submit( () -> distBuildService.fetchBuildSlaveStatus(stampedeId, runId))); } return Futures.transform( Futures.allAsList(slaveStatusFutures), slaveStatusList -> slaveStatusList .stream() .filter(Optional::isPresent) .map(x -> x.get()) .collect(Collectors.toList())); } @VisibleForTesting ListenableFuture<?> fetchAndPostBuildSlaveEventsAsync( BuildJob job, BuckEventBus eventBus, ListeningExecutorService networkExecutorService) { if (!job.isSetSlaveInfoByRunId()) { return Futures.immediateFuture(null); } StampedeId stampedeId = job.getStampedeId(); List<BuildSlaveEventsQuery> fetchEventQueries = new LinkedList<>(); for (String id : job.getSlaveInfoByRunId().keySet()) { RunId runId = new RunId(); runId.setId(id); fetchEventQueries.add( distBuildService.createBuildSlaveEventsQuery( stampedeId, runId, nextEventIdBySlaveRunId.getOrDefault(runId, 0))); } ListenableFuture<List<Pair<Integer, BuildSlaveEvent>>> fetchEventsFuture = networkExecutorService.submit( () -> distBuildService.multiGetBuildSlaveEvents(fetchEventQueries)); ListenableFuture<?> postEventsFuture = Futures.transform( fetchEventsFuture, sequenceIdAndEvents -> { // Sort such that all events from the same RunId come together, and in increasing order // of their sequence IDs. Also, we cannot directly sort sequenceIdAndEvents as it might // be an ImmutableList, hence we make it a stream. sequenceIdAndEvents = sequenceIdAndEvents .stream() .sorted( (event1, event2) -> { RunId runId1 = event1.getSecond().getRunId(); RunId runId2 = event2.getSecond().getRunId(); int result = runId1.compareTo(runId2); if (result == 0) { result = event1.getFirst().compareTo(event2.getFirst()); } return result; }) .collect(Collectors.toList()); for (Pair<Integer, BuildSlaveEvent> sequenceIdAndEvent : sequenceIdAndEvents) { BuildSlaveEvent slaveEvent = sequenceIdAndEvent.getSecond(); nextEventIdBySlaveRunId.put( slaveEvent.getRunId(), sequenceIdAndEvent.getFirst() + 1); switch (slaveEvent.getEventType()) { case CONSOLE_EVENT: ConsoleEvent consoleEvent = DistBuildUtil.createConsoleEvent(slaveEvent.getConsoleEvent()); eventBus.post(consoleEvent); break; case UNKNOWN: default: LOG.error( String.format( "Unknown type of BuildSlaveEvent received: [%d]", slaveEvent.getEventType().getValue())); break; } } return null; }); return postEventsFuture; } @VisibleForTesting ListenableFuture<?> fetchAndProcessRealTimeSlaveLogsAsync( BuildJob job, ListeningExecutorService networkExecutorService) { if (!job.isSetSlaveInfoByRunId()) { return Futures.immediateFuture(null); } List<LogLineBatchRequest> newLogLineRequests = distBuildLogStateTracker.createRealtimeLogRequests(job.getSlaveInfoByRunId().values()); if (newLogLineRequests.size() == 0) { return Futures.immediateFuture(null); } return networkExecutorService.submit( () -> { try { MultiGetBuildSlaveRealTimeLogsResponse slaveLogsResponse = distBuildService.fetchSlaveLogLines(job.getStampedeId(), newLogLineRequests); Preconditions.checkState(slaveLogsResponse.isSetMultiStreamLogs()); distBuildLogStateTracker.processStreamLogs(slaveLogsResponse.getMultiStreamLogs()); } catch (IOException e) { LOG.error(e, "Encountered error while streaming logs from BuildSlave(s)."); } }); } @VisibleForTesting void materializeSlaveLogDirs(BuildJob job) { if (!job.isSetSlaveInfoByRunId()) { return; } List<RunId> runIds = distBuildLogStateTracker.runIdsToMaterializeLogDirsFor(job.getSlaveInfoByRunId().values()); if (runIds.size() == 0) { return; } distBuildClientStats.startMaterializeSlaveLogsTimer(); try { MultiGetBuildSlaveLogDirResponse logDirsResponse = distBuildService.fetchBuildSlaveLogDir(job.stampedeId, runIds); Preconditions.checkState(logDirsResponse.isSetLogDirs()); distBuildLogStateTracker.materializeLogDirs(logDirsResponse.getLogDirs()); } catch (IOException ex) { LOG.error(ex, "Error fetching slave log directories from frontend."); } distBuildClientStats.stopMaterializeSlaveLogsTimer(); } public static final class JobCompletedException extends RuntimeException { private final BuildJob job; private final Optional<List<BuildSlaveStatus>> buildSlaveStatuses; private JobCompletedException( BuildJob job, Optional<List<BuildSlaveStatus>> buildSlaveStatuses) { super(String.format("DistBuild job completed with status: [%s]", job.getStatus().toString())); this.job = job; this.buildSlaveStatuses = buildSlaveStatuses; } public BuildJob getDistBuildJob() { return job; } public Optional<List<BuildSlaveStatus>> getBuildSlaveStatuses() { return buildSlaveStatuses; } } }