/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.event.query; import com.facebook.presto.client.FailureInfo; import com.facebook.presto.client.NodeVersion; import com.facebook.presto.connector.ConnectorId; import com.facebook.presto.eventlistener.EventListenerManager; import com.facebook.presto.execution.Column; import com.facebook.presto.execution.Input; import com.facebook.presto.execution.QueryInfo; import com.facebook.presto.execution.QueryStats; import com.facebook.presto.execution.StageInfo; import com.facebook.presto.execution.TaskId; import com.facebook.presto.execution.TaskInfo; import com.facebook.presto.execution.TaskState; import com.facebook.presto.operator.DriverStats; import com.facebook.presto.operator.OperatorStats; import com.facebook.presto.operator.TableFinishInfo; import com.facebook.presto.operator.TaskStats; import com.facebook.presto.spi.eventlistener.QueryCompletedEvent; import com.facebook.presto.spi.eventlistener.QueryContext; import com.facebook.presto.spi.eventlistener.QueryCreatedEvent; import com.facebook.presto.spi.eventlistener.QueryFailureInfo; import com.facebook.presto.spi.eventlistener.QueryIOMetadata; import com.facebook.presto.spi.eventlistener.QueryInputMetadata; import com.facebook.presto.spi.eventlistener.QueryMetadata; import com.facebook.presto.spi.eventlistener.QueryOutputMetadata; import com.facebook.presto.spi.eventlistener.QueryStatistics; import com.facebook.presto.spi.eventlistener.SplitCompletedEvent; import com.facebook.presto.spi.eventlistener.SplitFailureInfo; import com.facebook.presto.spi.eventlistener.SplitStatistics; import com.facebook.presto.transaction.TransactionId; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.json.JsonCodec; import io.airlift.log.Logger; import io.airlift.node.NodeInfo; import org.joda.time.DateTime; import javax.annotation.Nullable; import javax.inject.Inject; import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import static java.lang.Math.max; import static java.lang.Math.toIntExact; import static java.time.Duration.ofMillis; import static java.time.Instant.ofEpochMilli; import static java.util.Objects.requireNonNull; public class QueryMonitor { private static final Logger log = Logger.get(QueryMonitor.class); private final JsonCodec<StageInfo> stageInfoCodec; private final ObjectMapper objectMapper; private final EventListenerManager eventListenerManager; private final String serverVersion; private final String serverAddress; private final String environment; private final QueryMonitorConfig config; @Inject public QueryMonitor( ObjectMapper objectMapper, JsonCodec<StageInfo> stageInfoCodec, EventListenerManager eventListenerManager, NodeInfo nodeInfo, NodeVersion nodeVersion, QueryMonitorConfig config) { this.eventListenerManager = requireNonNull(eventListenerManager, "eventListenerManager is null"); this.stageInfoCodec = requireNonNull(stageInfoCodec, "stageInfoCodec is null"); this.objectMapper = requireNonNull(objectMapper, "objectMapper is null"); this.serverVersion = requireNonNull(nodeVersion, "nodeVersion is null").toString(); this.serverAddress = requireNonNull(nodeInfo, "nodeInfo is null").getExternalAddress(); this.environment = requireNonNull(nodeInfo, "nodeInfo is null").getEnvironment(); this.config = requireNonNull(config, "config is null"); } public void queryCreatedEvent(QueryInfo queryInfo) { eventListenerManager.queryCreated( new QueryCreatedEvent( queryInfo.getQueryStats().getCreateTime().toDate().toInstant(), new QueryContext( queryInfo.getSession().getUser(), queryInfo.getSession().getPrincipal(), queryInfo.getSession().getRemoteUserAddress(), queryInfo.getSession().getUserAgent(), queryInfo.getSession().getClientInfo(), queryInfo.getSession().getSource(), queryInfo.getSession().getCatalog(), queryInfo.getSession().getSchema(), queryInfo.getResourceGroupName(), mergeSessionAndCatalogProperties(queryInfo), serverAddress, serverVersion, environment), new QueryMetadata( queryInfo.getQueryId().toString(), queryInfo.getSession().getTransactionId().map(TransactionId::toString), queryInfo.getQuery(), queryInfo.getState().toString(), queryInfo.getSelf(), Optional.empty()))); } public void queryCompletedEvent(QueryInfo queryInfo) { try { Optional<QueryFailureInfo> queryFailureInfo = Optional.empty(); if (queryInfo.getFailureInfo() != null) { FailureInfo failureInfo = queryInfo.getFailureInfo(); Optional<TaskInfo> failedTask = queryInfo.getOutputStage().flatMap(QueryMonitor::findFailedTask); queryFailureInfo = Optional.of(new QueryFailureInfo( queryInfo.getErrorCode(), Optional.ofNullable(failureInfo.getType()), Optional.ofNullable(failureInfo.getMessage()), failedTask.map(task -> task.getTaskStatus().getTaskId().toString()), failedTask.map(task -> task.getTaskStatus().getSelf().getHost()), objectMapper.writeValueAsString(queryInfo.getFailureInfo()))); } ImmutableList.Builder<QueryInputMetadata> inputs = ImmutableList.builder(); for (Input input : queryInfo.getInputs()) { inputs.add(new QueryInputMetadata( input.getConnectorId().getCatalogName(), input.getSchema(), input.getTable(), input.getColumns().stream() .map(Column::toString).collect(Collectors.toList()), input.getConnectorInfo())); } QueryStats queryStats = queryInfo.getQueryStats(); Optional<QueryOutputMetadata> output = Optional.empty(); if (queryInfo.getOutput().isPresent()) { Optional<TableFinishInfo> tableFinishInfo = queryStats.getOperatorSummaries().stream() .map(OperatorStats::getInfo) .filter(TableFinishInfo.class::isInstance) .map(TableFinishInfo.class::cast) .findFirst(); output = Optional.of( new QueryOutputMetadata( queryInfo.getOutput().get().getConnectorId().getCatalogName(), queryInfo.getOutput().get().getSchema(), queryInfo.getOutput().get().getTable(), tableFinishInfo.map(TableFinishInfo::getConnectorOutputMetadata), tableFinishInfo.map(TableFinishInfo::isJsonLengthLimitExceeded))); } eventListenerManager.queryCompleted( new QueryCompletedEvent( new QueryMetadata( queryInfo.getQueryId().toString(), queryInfo.getSession().getTransactionId().map(TransactionId::toString), queryInfo.getQuery(), queryInfo.getState().toString(), queryInfo.getSelf(), queryInfo.getOutputStage().flatMap(stage -> stageInfoCodec.toJsonWithLengthLimit(stage, toIntExact(config.getMaxOutputStageJsonSize().toBytes())))), new QueryStatistics( ofMillis(queryStats.getTotalCpuTime().toMillis()), ofMillis(queryStats.getTotalScheduledTime().toMillis()), ofMillis(queryStats.getQueuedTime().toMillis()), Optional.ofNullable(queryStats.getAnalysisTime()).map(duration -> ofMillis(duration.toMillis())), Optional.ofNullable(queryStats.getDistributedPlanningTime()).map(duration -> ofMillis(duration.toMillis())), queryStats.getPeakMemoryReservation().toBytes(), queryStats.getRawInputDataSize().toBytes(), queryStats.getRawInputPositions(), queryStats.getCompletedDrivers(), queryInfo.isCompleteInfo(), objectMapper.writeValueAsString(queryInfo.getQueryStats().getOperatorSummaries())), new QueryContext( queryInfo.getSession().getUser(), queryInfo.getSession().getPrincipal(), queryInfo.getSession().getRemoteUserAddress(), queryInfo.getSession().getUserAgent(), queryInfo.getSession().getClientInfo(), queryInfo.getSession().getSource(), queryInfo.getSession().getCatalog(), queryInfo.getSession().getSchema(), queryInfo.getResourceGroupName(), mergeSessionAndCatalogProperties(queryInfo), serverAddress, serverVersion, environment), new QueryIOMetadata(inputs.build(), output), queryFailureInfo, ofEpochMilli(queryStats.getCreateTime().getMillis()), ofEpochMilli(queryStats.getExecutionStartTime().getMillis()), ofEpochMilli(queryStats.getEndTime().getMillis()))); logQueryTimeline(queryInfo); } catch (JsonProcessingException e) { throw Throwables.propagate(e); } } private static Optional<TaskInfo> findFailedTask(StageInfo stageInfo) { for (StageInfo subStage : stageInfo.getSubStages()) { Optional<TaskInfo> task = findFailedTask(subStage); if (task.isPresent()) { return task; } } return stageInfo.getTasks().stream() .filter(taskInfo -> taskInfo.getTaskStatus().getState() == TaskState.FAILED) .findFirst(); } private static Map<String, String> mergeSessionAndCatalogProperties(QueryInfo queryInfo) { ImmutableMap.Builder<String, String> mergedProperties = ImmutableMap.builder(); mergedProperties.putAll(queryInfo.getSession().getSystemProperties()); for (Map.Entry<ConnectorId, Map<String, String>> catalogEntry : queryInfo.getSession().getCatalogProperties().entrySet()) { for (Map.Entry<String, String> entry : catalogEntry.getValue().entrySet()) { mergedProperties.put(catalogEntry.getKey().getCatalogName() + "." + entry.getKey(), entry.getValue()); } } return mergedProperties.build(); } private static void logQueryTimeline(QueryInfo queryInfo) { try { QueryStats queryStats = queryInfo.getQueryStats(); DateTime queryStartTime = queryStats.getCreateTime(); DateTime queryEndTime = queryStats.getEndTime(); // query didn't finish cleanly if (queryStartTime == null || queryEndTime == null) { return; } // planning duration -- start to end of planning long planning = queryStats.getTotalPlanningTime() == null ? 0 : queryStats.getTotalPlanningTime().toMillis(); List<StageInfo> stages = StageInfo.getAllStages(queryInfo.getOutputStage()); // long lastSchedulingCompletion = 0; long firstTaskStartTime = queryEndTime.getMillis(); long lastTaskStartTime = queryStartTime.getMillis() + planning; long lastTaskEndTime = queryStartTime.getMillis() + planning; for (StageInfo stage : stages) { // only consider leaf stages if (!stage.getSubStages().isEmpty()) { continue; } for (TaskInfo taskInfo : stage.getTasks()) { TaskStats taskStats = taskInfo.getStats(); DateTime firstStartTime = taskStats.getFirstStartTime(); if (firstStartTime != null) { firstTaskStartTime = Math.min(firstStartTime.getMillis(), firstTaskStartTime); } DateTime lastStartTime = taskStats.getLastStartTime(); if (lastStartTime != null) { lastTaskStartTime = max(lastStartTime.getMillis(), lastTaskStartTime); } DateTime endTime = taskStats.getEndTime(); if (endTime != null) { lastTaskEndTime = max(endTime.getMillis(), lastTaskEndTime); } } } long elapsed = queryEndTime.getMillis() - queryStartTime.getMillis(); long scheduling = firstTaskStartTime - queryStartTime.getMillis() - planning; long running = lastTaskEndTime - firstTaskStartTime; long finishing = queryEndTime.getMillis() - lastTaskEndTime; log.info("TIMELINE: Query %s :: Transaction:[%s] :: elapsed %sms :: planning %sms :: scheduling %sms :: running %sms :: finishing %sms :: begin %s :: end %s", queryInfo.getQueryId(), queryInfo.getSession().getTransactionId().map(TransactionId::toString).orElse(""), max(elapsed, 0), max(planning, 0), max(scheduling, 0), max(running, 0), max(finishing, 0), queryStartTime, queryEndTime); } catch (Exception e) { log.error(e, "Error logging query timeline"); } } public void splitCompletedEvent(TaskId taskId, DriverStats driverStats) { splitCompletedEvent(taskId, driverStats, null, null); } public void splitFailedEvent(TaskId taskId, DriverStats driverStats, Throwable cause) { splitCompletedEvent(taskId, driverStats, cause.getClass().getName(), cause.getMessage()); } private void splitCompletedEvent(TaskId taskId, DriverStats driverStats, @Nullable String failureType, @Nullable String failureMessage) { Optional<Duration> timeToStart = Optional.empty(); if (driverStats.getStartTime() != null) { timeToStart = Optional.of(ofMillis(driverStats.getStartTime().getMillis() - driverStats.getCreateTime().getMillis())); } Optional<Duration> timeToEnd = Optional.empty(); if (driverStats.getEndTime() != null) { timeToEnd = Optional.of(ofMillis(driverStats.getEndTime().getMillis() - driverStats.getCreateTime().getMillis())); } Optional<SplitFailureInfo> splitFailureMetadata = Optional.empty(); if (failureType != null) { splitFailureMetadata = Optional.of(new SplitFailureInfo(failureType, failureMessage != null ? failureMessage : "")); } try { eventListenerManager.splitCompleted( new SplitCompletedEvent( taskId.getQueryId().toString(), taskId.getStageId().toString(), Integer.toString(taskId.getId()), driverStats.getCreateTime().toDate().toInstant(), Optional.ofNullable(driverStats.getStartTime()).map(startTime -> startTime.toDate().toInstant()), Optional.ofNullable(driverStats.getEndTime()).map(endTime -> endTime.toDate().toInstant()), new SplitStatistics( ofMillis(driverStats.getTotalCpuTime().toMillis()), ofMillis(driverStats.getElapsedTime().toMillis()), ofMillis(driverStats.getQueuedTime().toMillis()), ofMillis(driverStats.getTotalUserTime().toMillis()), ofMillis(driverStats.getRawInputReadTime().toMillis()), driverStats.getRawInputPositions(), driverStats.getRawInputDataSize().toBytes(), driverStats.getPeakMemoryReservation().toBytes(), timeToStart, timeToEnd), splitFailureMetadata, objectMapper.writeValueAsString(driverStats))); } catch (JsonProcessingException e) { log.error(e, "Error processing split completion event for task %s", taskId); } } }