/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.gateway.handlers;
import co.cask.cdap.api.dataset.lib.cube.AggregationFunction;
import co.cask.cdap.api.metrics.MetricDataQuery;
import co.cask.cdap.api.metrics.MetricSearchQuery;
import co.cask.cdap.api.metrics.MetricStore;
import co.cask.cdap.api.metrics.MetricTimeSeries;
import co.cask.cdap.api.metrics.TagValue;
import co.cask.cdap.app.mapreduce.MRJobInfoFetcher;
import co.cask.cdap.app.store.Store;
import co.cask.cdap.common.BadRequestException;
import co.cask.cdap.common.NotFoundException;
import co.cask.cdap.common.app.RunIds;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.utils.TimeMathParser;
import co.cask.cdap.internal.app.store.WorkflowDataset;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.ProgramType;
import co.cask.cdap.proto.WorkflowStatistics;
import co.cask.cdap.proto.WorkflowStatsComparison;
import co.cask.http.AbstractHttpHandler;
import co.cask.http.HttpResponder;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import org.jboss.netty.handler.codec.http.HttpRequest;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.QueryParam;
/**
* Workflow Statistics Handler
*/
@Singleton
@Path(Constants.Gateway.API_VERSION_3 + "/namespaces/{namespace-id}")
public class WorkflowStatsSLAHttpHandler extends AbstractHttpHandler {
private static final Logger LOG = LoggerFactory.getLogger(WorkflowStatsSLAHttpHandler.class);
private final Store store;
private final MRJobInfoFetcher mrJobInfoFetcher;
private final MetricStore metricStore;
@Inject
WorkflowStatsSLAHttpHandler(Store store, MRJobInfoFetcher mrJobInfoFetcher, MetricStore metricStore) {
this.store = store;
this.mrJobInfoFetcher = mrJobInfoFetcher;
this.metricStore = metricStore;
}
/**
* Returns the statistics for a given workflow.
*
* @param request The request
* @param responder The responder
* @param namespaceId The namespace the application is in
* @param appId The application the workflow is in
* @param workflowId The workflow that needs to have it stats shown
* @param start The start time of the range
* @param end The end time of the range
* @param percentiles The list of percentile values on which visibility is needed
*/
@GET
@Path("apps/{app-id}/workflows/{workflow-id}/statistics")
public void workflowStats(HttpRequest request, HttpResponder responder,
@PathParam("namespace-id") String namespaceId,
@PathParam("app-id") String appId,
@PathParam("workflow-id") String workflowId,
@QueryParam("start") @DefaultValue("now-1d") String start,
@QueryParam("end") @DefaultValue("now") String end,
@QueryParam("percentile") @DefaultValue("90.0") List<Double> percentiles) throws Exception {
long startTime = TimeMathParser.parseTimeInSeconds(start);
long endTime = TimeMathParser.parseTimeInSeconds(end);
if (startTime < 0) {
throw new BadRequestException("Invalid start time. The time you entered was : " + startTime);
} else if (endTime < 0) {
throw new BadRequestException("Invalid end time. The time you entered was : " + endTime);
} else if (endTime < startTime) {
throw new BadRequestException("Start time : " + startTime + " cannot be larger than end time : " + endTime);
}
for (double i : percentiles) {
if (i < 0.0 || i > 100.0) {
throw new BadRequestException("Percentile values have to be greater than or equal to 0 and" +
" less than or equal to 100. Invalid input was " + Double.toString(i));
}
}
Id.Workflow workflow = Id.Workflow.from(Id.Namespace.from(namespaceId), appId, workflowId);
WorkflowStatistics workflowStatistics = store.getWorkflowStatistics(workflow, startTime, endTime, percentiles);
if (workflowStatistics == null) {
responder.sendString(HttpResponseStatus.OK, "There are no statistics associated with this workflow : "
+ workflowId + " in the specified time range.");
return;
}
responder.sendJson(HttpResponseStatus.OK, workflowStatistics);
}
/**
* The endpoint returns a list of workflow metrics based on the workflow run and a surrounding number of runs
* of the workflow that are spaced apart by a time interval from each other.
*
* @param request The request
* @param responder The responder
* @param namespaceId The namespace the application is in
* @param appId The application the workflow is in
* @param workflowId The workflow that needs to have it stats shown
* @param runId The run id of the Workflow that the user wants to see
* @param limit The number of the records that the user wants to compare against on either side of the run
* @param interval The timeInterval with which the user wants to space out the runs
*/
@GET
@Path("apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/statistics")
public void workflowRunDetail(HttpRequest request, HttpResponder responder,
@PathParam("namespace-id") String namespaceId,
@PathParam("app-id") String appId,
@PathParam("workflow-id") String workflowId,
@PathParam("run-id") String runId,
@QueryParam("limit") @DefaultValue("10") int limit,
@QueryParam("interval") @DefaultValue("10s") String interval) throws Exception {
if (limit <= 0) {
throw new BadRequestException("Limit has to be greater than 0. Entered value was : " + limit);
}
long timeInterval;
try {
timeInterval = TimeMathParser.resolutionInSeconds(interval);
} catch (IllegalArgumentException e) {
throw new BadRequestException("Interval is specified with invalid time unit. It should be specified with one" +
" of the 'ms', 's', 'm', 'h', 'd' units. Entered value was : " + interval);
}
if (timeInterval <= 0) {
throw new BadRequestException("Interval should be greater than 0 and should be specified with one of the 'ms'," +
" 's', 'm', 'h', 'd' units. Entered value was : " + interval);
}
Id.Workflow workflow = Id.Workflow.from(Id.Namespace.from(namespaceId), appId, workflowId);
Collection<WorkflowDataset.WorkflowRunRecord> workflowRunRecords =
store.retrieveSpacedRecords(workflow, runId, limit, timeInterval);
List<WorkflowRunMetrics> workflowRunMetricsList = new ArrayList<>();
Map<String, Long> startTimes = new HashMap<>();
for (WorkflowDataset.WorkflowRunRecord workflowRunRecord : workflowRunRecords) {
workflowRunMetricsList.add(getDetailedRecord(workflow, workflowRunRecord.getWorkflowRunId()));
startTimes.put(workflowRunRecord.getWorkflowRunId(),
RunIds.getTime(RunIds.fromString(workflowRunRecord.getWorkflowRunId()), TimeUnit.SECONDS));
}
Collection<WorkflowStatsComparison.ProgramNodes> formattedStatisticsMap = format(workflowRunMetricsList);
responder.sendJson(HttpResponseStatus.OK, new WorkflowStatsComparison(startTimes, formattedStatisticsMap));
}
/**
* Compare the metrics of 2 runs of a workflow
*
* @param request The request
* @param responder The responder
* @param namespaceId The namespace the application is in
* @param appId The application the workflow is in
* @param workflowId The workflow that needs to have it stats shown
* @param runId The run id of the Workflow that the user wants to see
* @param otherRunId The other run id of the same workflow that the user wants to compare against
*/
@GET
@Path("apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/compare")
public void compare(HttpRequest request, HttpResponder responder,
@PathParam("namespace-id") String namespaceId,
@PathParam("app-id") String appId,
@PathParam("workflow-id") String workflowId,
@PathParam("run-id") String runId,
@QueryParam("other-run-id") String otherRunId) throws Exception {
Id.Workflow workflow = Id.Workflow.from(Id.Namespace.from(namespaceId), appId, workflowId);
WorkflowRunMetrics detailedStatistics = getDetailedRecord(workflow, runId);
WorkflowRunMetrics otherDetailedStatistics = getDetailedRecord(workflow, otherRunId);
if (detailedStatistics == null) {
throw new NotFoundException("The run-id provided was not found : " + runId);
}
if (otherDetailedStatistics == null) {
throw new NotFoundException("The other run-id provided was not found : " + otherRunId);
}
List<WorkflowRunMetrics> workflowRunMetricsList = new ArrayList<>();
workflowRunMetricsList.add(detailedStatistics);
workflowRunMetricsList.add(otherDetailedStatistics);
responder.sendJson(HttpResponseStatus.OK, format(workflowRunMetricsList));
}
private Collection<WorkflowStatsComparison.ProgramNodes> format(List<WorkflowRunMetrics> workflowRunMetricsList) {
Map<String, WorkflowStatsComparison.ProgramNodes> programLevelDetails = new HashMap<>();
for (WorkflowRunMetrics workflowRunMetrics : workflowRunMetricsList) {
for (ProgramMetrics programMetrics : workflowRunMetrics.getProgramMetricsList()) {
String programName = programMetrics.getProgramName();
if (programLevelDetails.get(programName) == null) {
WorkflowStatsComparison.ProgramNodes programNodes = new WorkflowStatsComparison.ProgramNodes(
programName, programMetrics.getProgramType(),
new ArrayList<WorkflowStatsComparison.ProgramNodes.WorkflowProgramDetails>());
programLevelDetails.put(programName, programNodes);
}
programLevelDetails.get(programName).addWorkflowDetails(
workflowRunMetrics.getWorkflowRunId(), programMetrics.getProgramRunId(),
programMetrics.getProgramStartTime(), programMetrics.getMetrics());
}
}
return programLevelDetails.values();
}
/**
* Returns the detailed Record for the Workflow
*
* @param workflowId Workflow that needs to get its detailed record
* @param runId Run Id of the workflow
* @return Return the Workflow Run Metrics
*/
@Nullable
private WorkflowRunMetrics getDetailedRecord(Id.Workflow workflowId, String runId) throws Exception {
WorkflowDataset.WorkflowRunRecord workflowRunRecord = store.getWorkflowRun(workflowId, runId);
if (workflowRunRecord == null) {
return null;
}
List<WorkflowDataset.ProgramRun> programRuns = workflowRunRecord.getProgramRuns();
List<ProgramMetrics> programMetricsList = new ArrayList<>();
for (WorkflowDataset.ProgramRun programRun : programRuns) {
Map<String, Long> programMap = new HashMap<>();
String programName = programRun.getName();
ProgramType programType = programRun.getProgramType();
Id.Program program = Id.Program.from(workflowId.getNamespaceId(), workflowId.getApplicationId(),
programType, programName);
String programRunId = programRun.getRunId();
if (programType == ProgramType.MAPREDUCE) {
programMap = getMapreduceDetails(program, programRunId);
} else if (programType == ProgramType.SPARK) {
programMap = getSparkDetails(program, programRunId);
}
programMap.put("timeTaken", programRun.getTimeTaken());
long programStartTime = RunIds.getTime(RunIds.fromString(programRunId), TimeUnit.SECONDS);
programMetricsList.add(new ProgramMetrics(programName, programType, programRunId, programStartTime, programMap));
}
return new WorkflowRunMetrics(runId, programMetricsList);
}
private Map<String, Long> getMapreduceDetails(Id.Program mapreduceProgram, String runId) throws Exception {
Id.Run mrRun = new Id.Run(mapreduceProgram, runId);
return mrJobInfoFetcher.getMRJobInfo(mrRun).getCounters();
}
private Map<String, Long> getSparkDetails(Id.Program sparkProgram, String runId) throws Exception {
Map<String, String> context = new HashMap<>();
context.put(Constants.Metrics.Tag.NAMESPACE, sparkProgram.getNamespaceId());
context.put(Constants.Metrics.Tag.APP, sparkProgram.getApplicationId());
context.put(Constants.Metrics.Tag.SPARK, sparkProgram.getId());
context.put(Constants.Metrics.Tag.RUN_ID, runId);
List<TagValue> tags = new ArrayList<>();
for (Map.Entry<String, String> entry : context.entrySet()) {
tags.add(new TagValue(entry.getKey(), entry.getValue()));
}
MetricSearchQuery metricSearchQuery = new MetricSearchQuery(0, 0, Integer.MAX_VALUE, tags);
Collection<String> metricNames = metricStore.findMetricNames(metricSearchQuery);
Map<String, Long> overallResult = new HashMap<>();
for (String metricName : metricNames) {
Collection<MetricTimeSeries> resultPerQuery = metricStore.query(
new MetricDataQuery(0, 0, Integer.MAX_VALUE, metricName, AggregationFunction.SUM,
context, new ArrayList<String>()));
for (MetricTimeSeries metricTimeSeries : resultPerQuery) {
overallResult.put(metricTimeSeries.getMetricName(), metricTimeSeries.getTimeValues().get(0).getValue());
}
}
return overallResult;
}
private static class ProgramMetrics {
private final String programName;
private final ProgramType programType;
private final String programRunId;
private final long programStartTime;
private final Map<String, Long> metrics;
private ProgramMetrics(String programName, ProgramType programType, String programRunId,
long programStartTime, Map<String, Long> metrics) {
this.programName = programName;
this.programType = programType;
this.programRunId = programRunId;
this.programStartTime = programStartTime;
this.metrics = metrics;
}
public String getProgramName() {
return programName;
}
public ProgramType getProgramType() {
return programType;
}
public Map<String, Long> getMetrics() {
return metrics;
}
public long getProgramStartTime() {
return programStartTime;
}
public String getProgramRunId() {
return programRunId;
}
}
private static class WorkflowRunMetrics {
private final String workflowRunId;
private final List<ProgramMetrics> programMetricsList;
private WorkflowRunMetrics(String workflowRunId, List<ProgramMetrics> programMetricsList) {
this.workflowRunId = workflowRunId;
this.programMetricsList = programMetricsList;
}
public String getWorkflowRunId() {
return workflowRunId;
}
public List<ProgramMetrics> getProgramMetricsList() {
return programMetricsList;
}
}
}