/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.runtime.webmonitor.handlers.checkpoints; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.flink.runtime.checkpoint.AbstractCheckpointStats; import org.apache.flink.runtime.checkpoint.CheckpointStatsHistory; import org.apache.flink.runtime.checkpoint.CheckpointStatsSnapshot; import org.apache.flink.runtime.checkpoint.MinMaxAvgStats; import org.apache.flink.runtime.checkpoint.SubtaskStateStats; import org.apache.flink.runtime.checkpoint.TaskStateStats; import org.apache.flink.runtime.executiongraph.AccessExecutionGraph; import org.apache.flink.runtime.instance.ActorGateway; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.webmonitor.ExecutionGraphHolder; import org.apache.flink.runtime.webmonitor.handlers.AbstractExecutionGraphRequestHandler; import org.apache.flink.runtime.webmonitor.handlers.AbstractJobVertexRequestHandler; import org.apache.flink.runtime.webmonitor.handlers.JsonFactory; import org.apache.flink.runtime.webmonitor.history.ArchivedJson; import org.apache.flink.runtime.webmonitor.history.JsonArchivist; import java.io.IOException; import java.io.StringWriter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import static org.apache.flink.runtime.webmonitor.handlers.checkpoints.CheckpointStatsHandler.writeMinMaxAvg; import static org.apache.flink.util.Preconditions.checkNotNull; /** * Request handler that returns checkpoint stats for a single job vertex with * the summary stats and all subtasks. */ public class CheckpointStatsDetailsSubtasksHandler extends AbstractExecutionGraphRequestHandler { private static final String CHECKPOINT_STATS_DETAILS_SUBTASKS_REST_PATH = "/jobs/:jobid/checkpoints/details/:checkpointid/subtasks/:vertexid"; private final CheckpointStatsCache cache; public CheckpointStatsDetailsSubtasksHandler(ExecutionGraphHolder executionGraphHolder, CheckpointStatsCache cache) { super(executionGraphHolder); this.cache = checkNotNull(cache); } @Override public String[] getPaths() { return new String[]{CHECKPOINT_STATS_DETAILS_SUBTASKS_REST_PATH}; } @Override public String handleJsonRequest( Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception { return super.handleJsonRequest(pathParams, queryParams, jobManager); } @Override public String handleRequest(AccessExecutionGraph graph, Map<String, String> params) throws Exception { long checkpointId = CheckpointStatsDetailsHandler.parseCheckpointId(params); if (checkpointId == -1) { return "{}"; } JobVertexID vertexId = AbstractJobVertexRequestHandler.parseJobVertexId(params); if (vertexId == null) { return "{}"; } CheckpointStatsSnapshot snapshot = graph.getCheckpointStatsSnapshot(); if (snapshot == null) { return "{}"; } AbstractCheckpointStats checkpoint = snapshot.getHistory().getCheckpointById(checkpointId); if (checkpoint != null) { cache.tryAdd(checkpoint); } else { checkpoint = cache.tryGet(checkpointId); if (checkpoint == null) { return "{}"; } } TaskStateStats taskStats = checkpoint.getTaskStateStats(vertexId); if (taskStats == null) { return "{}"; } return createSubtaskCheckpointDetailsJson(checkpoint, taskStats); } public static class CheckpointStatsDetailsSubtasksJsonArchivist implements JsonArchivist { @Override public Collection<ArchivedJson> archiveJsonWithPath(AccessExecutionGraph graph) throws IOException { CheckpointStatsSnapshot stats = graph.getCheckpointStatsSnapshot(); if (stats == null) { return Collections.emptyList(); } CheckpointStatsHistory history = stats.getHistory(); List<ArchivedJson> archive = new ArrayList<>(); for (AbstractCheckpointStats checkpoint : history.getCheckpoints()) { for (TaskStateStats subtaskStats : checkpoint.getAllTaskStateStats()) { String json = createSubtaskCheckpointDetailsJson(checkpoint, subtaskStats); String path = CHECKPOINT_STATS_DETAILS_SUBTASKS_REST_PATH .replace(":jobid", graph.getJobID().toString()) .replace(":checkpointid", String.valueOf(checkpoint.getCheckpointId())) .replace(":vertexid", subtaskStats.getJobVertexId().toString()); archive.add(new ArchivedJson(path, json)); } } return archive; } } private static String createSubtaskCheckpointDetailsJson(AbstractCheckpointStats checkpoint, TaskStateStats taskStats) throws IOException { StringWriter writer = new StringWriter(); JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer); gen.writeStartObject(); // Overview gen.writeNumberField("id", checkpoint.getCheckpointId()); gen.writeStringField("status", checkpoint.getStatus().toString()); gen.writeNumberField("latest_ack_timestamp", taskStats.getLatestAckTimestamp()); gen.writeNumberField("state_size", taskStats.getStateSize()); gen.writeNumberField("end_to_end_duration", taskStats.getEndToEndDuration(checkpoint.getTriggerTimestamp())); gen.writeNumberField("alignment_buffered", taskStats.getAlignmentBuffered()); gen.writeNumberField("num_subtasks", taskStats.getNumberOfSubtasks()); gen.writeNumberField("num_acknowledged_subtasks", taskStats.getNumberOfAcknowledgedSubtasks()); if (taskStats.getNumberOfAcknowledgedSubtasks() > 0) { gen.writeObjectFieldStart("summary"); gen.writeObjectFieldStart("state_size"); writeMinMaxAvg(gen, taskStats.getSummaryStats().getStateSizeStats()); gen.writeEndObject(); gen.writeObjectFieldStart("end_to_end_duration"); MinMaxAvgStats ackTimestampStats = taskStats.getSummaryStats().getAckTimestampStats(); gen.writeNumberField("min", Math.max(0, ackTimestampStats.getMinimum() - checkpoint.getTriggerTimestamp())); gen.writeNumberField("max", Math.max(0, ackTimestampStats.getMaximum() - checkpoint.getTriggerTimestamp())); gen.writeNumberField("avg", Math.max(0, ackTimestampStats.getAverage() - checkpoint.getTriggerTimestamp())); gen.writeEndObject(); gen.writeObjectFieldStart("checkpoint_duration"); gen.writeObjectFieldStart("sync"); writeMinMaxAvg(gen, taskStats.getSummaryStats().getSyncCheckpointDurationStats()); gen.writeEndObject(); gen.writeObjectFieldStart("async"); writeMinMaxAvg(gen, taskStats.getSummaryStats().getAsyncCheckpointDurationStats()); gen.writeEndObject(); gen.writeEndObject(); gen.writeObjectFieldStart("alignment"); gen.writeObjectFieldStart("buffered"); writeMinMaxAvg(gen, taskStats.getSummaryStats().getAlignmentBufferedStats()); gen.writeEndObject(); gen.writeObjectFieldStart("duration"); writeMinMaxAvg(gen, taskStats.getSummaryStats().getAlignmentDurationStats()); gen.writeEndObject(); gen.writeEndObject(); gen.writeEndObject(); } SubtaskStateStats[] subtasks = taskStats.getSubtaskStats(); gen.writeArrayFieldStart("subtasks"); for (int i = 0; i < subtasks.length; i++) { SubtaskStateStats subtask = subtasks[i]; gen.writeStartObject(); gen.writeNumberField("index", i); if (subtask != null) { gen.writeStringField("status", "completed"); gen.writeNumberField("ack_timestamp", subtask.getAckTimestamp()); gen.writeNumberField("end_to_end_duration", subtask.getEndToEndDuration(checkpoint.getTriggerTimestamp())); gen.writeNumberField("state_size", subtask.getStateSize()); gen.writeObjectFieldStart("checkpoint"); gen.writeNumberField("sync", subtask.getSyncCheckpointDuration()); gen.writeNumberField("async", subtask.getAsyncCheckpointDuration()); gen.writeEndObject(); gen.writeObjectFieldStart("alignment"); gen.writeNumberField("buffered", subtask.getAlignmentBuffered()); gen.writeNumberField("duration", subtask.getAlignmentDuration()); gen.writeEndObject(); } else { gen.writeStringField("status", "pending_or_failed"); } gen.writeEndObject(); } gen.writeEndArray(); gen.writeEndObject(); gen.close(); return writer.toString(); } }