/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.app.mapreduce; import co.cask.cdap.api.dataset.lib.cube.AggregationFunction; import co.cask.cdap.api.dataset.lib.cube.TimeValue; import co.cask.cdap.api.metrics.MetricDataQuery; import co.cask.cdap.api.metrics.MetricStore; import co.cask.cdap.api.metrics.MetricTimeSeries; import co.cask.cdap.app.metrics.MapReduceMetrics; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.MRJobInfo; import co.cask.cdap.proto.MRTaskInfo; import co.cask.cdap.proto.ProgramType; import com.google.common.base.Preconditions; import com.google.common.collect.HashBasedTable; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Table; import com.google.inject.Inject; import org.apache.hadoop.mapreduce.TaskCounter; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Retrieves information/reports for a MapReduce run via the Metrics system. */ public class LocalMRJobInfoFetcher implements MRJobInfoFetcher { private final MetricStore metricStore; @Inject public LocalMRJobInfoFetcher(MetricStore metricStore) { this.metricStore = metricStore; } /** * @param runId for which information will be returned. * @return a {@link MRJobInfo} containing information about a particular MapReduce program run. */ public MRJobInfo getMRJobInfo(Id.Run runId) { Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType())); // baseTags has tag keys: ns.app.mr.runid Map<String, String> baseTags = Maps.newHashMap(); baseTags.put(Constants.Metrics.Tag.NAMESPACE, runId.getNamespace().getId()); baseTags.put(Constants.Metrics.Tag.APP, runId.getProgram().getApplicationId()); baseTags.put(Constants.Metrics.Tag.MAPREDUCE, runId.getProgram().getId()); baseTags.put(Constants.Metrics.Tag.RUN_ID, runId.getId()); Map<String, String> mapTags = Maps.newHashMap(baseTags); mapTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Mapper.getId()); Map<String, String> reduceTags = Maps.newHashMap(baseTags); reduceTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Reducer.getId()); // map from RunId -> (CounterName -> CounterValue) Table<String, String, Long> mapTaskMetrics = HashBasedTable.create(); Table<String, String, Long> reduceTaskMetrics = HashBasedTable.create(); // Populate mapTaskMetrics and reduce Task Metrics via MetricStore. Used to construct MRTaskInfo below. Map<String, String> metricNamesToCounters = Maps.newHashMap(); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS), TaskCounter.MAP_INPUT_RECORDS.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS), TaskCounter.MAP_OUTPUT_RECORDS.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_BYTES), TaskCounter.MAP_OUTPUT_BYTES.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION), MapReduceMetrics.METRIC_TASK_COMPLETION); // get metrics grouped by instance-id for the map tasks queryGroupedAggregates(mapTags, mapTaskMetrics, metricNamesToCounters); Map<String, Long> mapProgress = Maps.newHashMap(); if (mapTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) { mapProgress = Maps.newHashMap(mapTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION)); } Map<String, String> reduceMetricsToCounters = Maps.newHashMap(); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS), TaskCounter.REDUCE_INPUT_RECORDS.name()); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS), TaskCounter.REDUCE_OUTPUT_RECORDS.name()); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION), MapReduceMetrics.METRIC_TASK_COMPLETION); // get metrics grouped by instance-id for the map tasks queryGroupedAggregates(reduceTags, reduceTaskMetrics, reduceMetricsToCounters); Map<String, Long> reduceProgress = Maps.newHashMap(); if (reduceTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) { reduceProgress = Maps.newHashMap(reduceTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION)); } // Construct MRTaskInfos from the information we can get from Metric system. List<MRTaskInfo> mapTaskInfos = Lists.newArrayList(); for (Map.Entry<String, Map<String, Long>> taskEntry : mapTaskMetrics.rowMap().entrySet()) { String mapTaskId = taskEntry.getKey(); mapTaskInfos.add(new MRTaskInfo(mapTaskId, null, null, null, mapProgress.get(mapTaskId) / 100.0F, taskEntry.getValue())); } List<MRTaskInfo> reduceTaskInfos = Lists.newArrayList(); for (Map.Entry<String, Map<String, Long>> taskEntry : reduceTaskMetrics.rowMap().entrySet()) { String reduceTaskId = taskEntry.getKey(); reduceTaskInfos.add(new MRTaskInfo(reduceTaskId, null, null, null, reduceProgress.get(reduceTaskId) / 100.0F, taskEntry.getValue())); } return getJobCounters(mapTags, reduceTags, mapTaskInfos, reduceTaskInfos); } private MRJobInfo getJobCounters(Map<String, String> mapTags, Map<String, String> reduceTags, List<MRTaskInfo> mapTaskInfos, List<MRTaskInfo> reduceTaskInfos) { HashMap<String, Long> metrics = Maps.newHashMap(); Map<String, String> mapMetricsToCounters = ImmutableMap.of(prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.MAP_INPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.MAP_OUTPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_BYTES), TaskCounter.MAP_OUTPUT_BYTES.name(), prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION); getAggregates(mapTags, mapMetricsToCounters, metrics); float mapProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F; Map<String, String> reduceMetricsToCounters = ImmutableMap.of(prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.REDUCE_INPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.REDUCE_OUTPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION); getAggregates(reduceTags, reduceMetricsToCounters, metrics); float reduceProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F; return new MRJobInfo(mapProgress, reduceProgress, metrics, mapTaskInfos, reduceTaskInfos, false); } private String prependSystem(String metric) { return "system." + metric; } private void getAggregates(Map<String, String> tags, Map<String, String> metricsToCounters, Map<String, Long> result) { Map<String, AggregationFunction> metrics = Maps.newHashMap(); // all map-reduce metrics are gauges for (String metric : metricsToCounters.keySet()) { metrics.put(metric, AggregationFunction.LATEST); } MetricDataQuery metricDataQuery = new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metrics, tags, ImmutableList.<String>of()); Collection<MetricTimeSeries> query = metricStore.query(metricDataQuery); // initialize elements to zero for (String counterName : metricsToCounters.values()) { result.put(counterName, 0L); } for (MetricTimeSeries metricTimeSeries : query) { List<TimeValue> timeValues = metricTimeSeries.getTimeValues(); TimeValue timeValue = Iterables.getOnlyElement(timeValues); result.put(metricsToCounters.get(metricTimeSeries.getMetricName()), timeValue.getValue()); } } // queries MetricStore for one metric across all tasks of a certain TaskType, using GroupBy InstanceId private void queryGroupedAggregates(Map<String, String> tags, Table<String, String, Long> allTaskMetrics, Map<String, String> metricsToCounters) { Map<String, AggregationFunction> metrics = Maps.newHashMap(); // all map-reduce metrics are gauges for (String metric : metricsToCounters.keySet()) { metrics.put(metric, AggregationFunction.LATEST); } MetricDataQuery metricDataQuery = new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metrics, tags, ImmutableList.of(Constants.Metrics.Tag.INSTANCE_ID)); Collection<MetricTimeSeries> query = metricStore.query(metricDataQuery); for (MetricTimeSeries metricTimeSeries : query) { List<TimeValue> timeValues = metricTimeSeries.getTimeValues(); TimeValue timeValue = Iterables.getOnlyElement(timeValues); String taskId = metricTimeSeries.getTagValues().get(Constants.Metrics.Tag.INSTANCE_ID); allTaskMetrics.put(taskId, metricsToCounters.get(metricTimeSeries.getMetricName()), timeValue.getValue()); } } }