/** * Copyright 2011-2014 Scale Unlimited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.scaleunlimited.cascading; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cascading.flow.Flow; import cascading.stats.FlowStats; import cascading.stats.FlowStepStats; @SuppressWarnings("rawtypes") public class FlowCounters { static final Logger LOGGER = LoggerFactory.getLogger(FlowCounters.class); /** * Run the flow, and return back a Map that has entries for every requested * counter *group* where that counter has been set during the Flow execution. * Note that a Flow with multiple steps (Hadoop jobs) will sum the counter * values for all jobs, but warn when that happens. * * Note we have to have the funky counterGroup and extraGroups to avoid * a collision with the existing run(flow) method, if only a Flow is passed. * * @param flow Flow to be run & counted * @param counterGroup Counter group to return. * @param extraGroups Additional counter groups to return. * @return Map of counter enum => counts. */ public static Map<Enum, Long> run(Flow flow, Class<? extends Enum> counterGroup, Class<? extends Enum>... extraGroups) { Map<Enum, Long> result = new HashMap<Enum, Long>(); flow.complete(); List<Class<? extends Enum>> enums = new ArrayList<Class<? extends Enum>>(Arrays.asList(extraGroups)); enums.add(counterGroup); FlowStats stats = flow.getFlowStats(); List<FlowStepStats> stepStats = stats.getFlowStepStats(); for (FlowStepStats stepStat : stepStats) { for (Class<? extends Enum> group : enums) { for (Enum counter : group.getEnumConstants()) { long counterValue = stepStat.getCounterValue(counter); if (counterValue != 0) { if (result.containsKey(counter)) { counterValue += result.get(counter); } result.put(counter, counterValue); } } } } return result; } /** * Run the flow, and return back a Map that has entries for every requested * counter. Note that a Flow with multiple steps (Hadoop jobs) will sum the * counter values for all jobs, but warn when that happens. * * If no counters are passed, we'll return all available counters that are * defined using Enums. This means you won't get a counter back if it was * logged using a group name/counter name instead of an Enum. * * @param flow Flow to be run & counted * @param counters Which counters to return in the map. * @return Map of counter enum to counts. */ public static Map<Enum, Long> run(Flow flow, Enum... counters) { flow.complete(); Map<Enum, Long> result = new HashMap<Enum, Long>(); FlowStats stats = flow.getFlowStats(); List<FlowStepStats> stepStats = stats.getFlowStepStats(); for (FlowStepStats stepStat : stepStats) { if (counters.length == 0) { // We want all counters. for (String groupName : stepStat.getCounterGroups()) { Class<? extends Enum> groupClass; try { groupClass = (Class<? extends Enum>)FlowCounters.class.forName(groupName); } catch (ClassNotFoundException e) { // Probably a counter defined using strings vs. Enum, so skip it continue; } for (String counterName : stepStat.getCountersFor(groupClass)) { Enum counterEnum = Enum.valueOf(groupClass, counterName); long counterValue = stepStat.getCounterValue(counterEnum); if (counterValue != 0) { if (result.containsKey(counterEnum)) { counterValue += result.get(counterEnum); } result.put(counterEnum, counterValue); } } } } else { for (Enum counter : counters) { long counterValue = stepStat.getCounterValue(counter); if (counterValue != 0) { if (result.containsKey(counter)) { counterValue += result.get(counter); } result.put(counter, counterValue); } } } } // Make sure every enum is represented, so callers don't have to check for nulls. for (Enum counter : counters) { if (result.get(counter) == null) { result.put(counter, 0L); } } return result; } /** * Run the flow, and return back a Map that has String-keyed entries for every counter. * Note that a Flow with multiple steps (Hadoop jobs) will return back the sum of counter * values for all jobs. * * The map's keys are <group name>.<counter name> * * @param flow Flow to be run & counted * @return Map of counter name to counts. */ public static Map<String, Long> runAndReturnAllCounters(Flow flow) { flow.complete(); Map<String, Long> result = new HashMap<String, Long>(); FlowStats stats = flow.getFlowStats(); List<FlowStepStats> stepStats = stats.getFlowStepStats(); for (FlowStepStats stepStat : stepStats) { for (String counterGroup : stepStat.getCounterGroups()) { Collection<String> counterNames = stepStat.getCountersFor(counterGroup); for (String counterName : counterNames) { String keyName = String.format("%s.%s", counterGroup, counterName); long counterValue = stepStat.getCounterValue(counterGroup, counterName); if (counterValue != 0) { if (result.containsKey(keyName)) { counterValue += result.get(keyName); } result.put(keyName, counterValue); } } } } return result; } // This is how LocalStepStats.increment(Enum) and LocalStepStats.getCounterValue(Enum) // are currently implemented and seems to match the Hadoop internal implementation as well. public static String getCounterKey(Enum counter) { return getCounterKey(counter.getDeclaringClass().getName(), counter.name()); } // This is how we store grouped counters in the map returned by getCounters. public static String getCounterKey(String groupName, String counterName) { return groupName + "." + counterName; } public static boolean isCounterKeyInGroup(String counterKey, String groupName) { return (counterKey.startsWith(getCounterKey(groupName, ""))); } public static String getCounterNameFromCounterKey(String counterKey, String groupName) { String groupCounterKeyPrefix = getCounterKey(groupName, ""); if (counterKey.startsWith(groupCounterKeyPrefix)) { int prefixLength = groupCounterKeyPrefix.length(); return counterKey.substring(prefixLength); } return null; } // TODO Use this routine with the above code? Would need to map from Enum to name, // compare against what we get back here. public static Map<String, Long> getCounters(Flow flow) { Map<String, Long> result = new HashMap<String, Long>(); FlowStats stats = flow.getFlowStats(); List<FlowStepStats> stepStats = stats.getFlowStepStats(); for (FlowStepStats stepStat : stepStats) { Collection<String> counterGroups = stepStat.getCounterGroups(); for (String counterGroup : counterGroups) { Collection<String> counters = stepStat.getCountersFor(counterGroup); for (String counter : counters) { long counterValue = stepStat.getCounterValue(counterGroup, counter); String counterKey = getCounterKey(counterGroup, counter); if (result.containsKey(counterKey)) { counterValue += result.get(counterKey); } result.put(counterKey, counterValue); } } } return result; } }