/** * Copyright 2010-2013 Scale Unlimited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.scaleunlimited.cascading; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.scaleunlimited.cascading.hadoop.HadoopUtils; import cascading.flow.FlowProcess; import cascading.flow.FlowProcessWrapper; import cascading.flow.hadoop.HadoopFlowProcess; public class LoggingFlowProcess<Config> extends FlowProcessWrapper<Config> { private static final Logger LOGGER = LoggerFactory.getLogger(LoggingFlowProcess.class); // enum used for counting number of occurrences of each type of msg. public static enum LoggingLevels { TRACE, DEBUG, INFO, WARN, ERROR; public static LoggingLevels fromLevel(Level level) { switch (level) { case SLF4J_TRACE: return TRACE; case SLF4J_DEBUG: return DEBUG; case SLF4J_ERROR: return ERROR; case SLF4J_INFO: return INFO; case SLF4J_WARN: return WARN; default: throw new RuntimeException("Unknown level: " + level); } } } private class HadoopFlowReporter implements IFlowReporter { private Reporter _reporter; public HadoopFlowReporter(Reporter reporter) { _reporter = reporter; } // TODO VMa: fix for slf4j @Override public void setStatus(Level level, String msg) { if ((_reporter != null) && level.isGreaterOrEqual(Level.SLF4J_INFO)) { _reporter.setStatus("Cascading " + level + ": " + msg); } } @Override public void setStatus(String msg, Throwable t) { // TODO KKr - add stringified <t> (maybe from Nutch?) to msg. if (_reporter != null) { _reporter.setStatus("Cascading " + Level.SLF4J_ERROR + ": " + msg); } } } private boolean _isLocal; private List<IFlowReporter> _reporters; private Map<Enum, AtomicLong> _localCounters; public LoggingFlowProcess(FlowProcess<Config> baseProcess, IFlowReporter reporter) { super(baseProcess); init(HadoopUtils.undelegate(baseProcess), reporter); } public LoggingFlowProcess(FlowProcess<Config> baseProcess) { super(baseProcess); FlowProcess dfp = HadoopUtils.undelegate(baseProcess); if (dfp instanceof HadoopFlowProcess) { init(dfp, new HadoopFlowReporter(((HadoopFlowProcess)dfp).getReporter())); } else { init(dfp, new LoggingFlowReporter()); } } public LoggingFlowProcess(HadoopFlowProcess baseProcess) { super(baseProcess); IFlowReporter reporter = new HadoopFlowReporter(baseProcess.getReporter()); init(HadoopUtils.undelegate(baseProcess), reporter); } /** * A no-argument constructor for use during testing, when we don't have a * real Cascading FlowProcess to use. */ public LoggingFlowProcess() { super(FlowProcess.NULL); init(HadoopUtils.undelegate(FlowProcess.NULL), new LoggingFlowReporter()); } /** * @param delegateProcess that might have been hidden inside a * FlowProcessWrapper, where the latter was passed as baseProcess to the * constructor (i.e., you should pass the result of * {@link HadoopUtils#undelegate(FlowProcess)}) to this method. * @param reporter where logging will be directed. */ private void init(FlowProcess delegateProcess, IFlowReporter reporter) { _isLocal = ( (!(delegateProcess instanceof HadoopFlowProcess)) || HadoopUtils.isJobLocal(((HadoopFlowProcess) delegateProcess).getJobConf())); _localCounters = new HashMap<Enum, AtomicLong>(); _reporters = new ArrayList<IFlowReporter>(); addReporter(reporter); } public void addReporter(IFlowReporter reporter) { _reporters.add(reporter); } // @SuppressWarnings("deprecation") // public JobConf getJobConf() throws IOException { // if (getDelegate() instanceof HadoopFlowProcess) { // return ((HadoopFlowProcess)getDelegate()).getJobConf(); // } else { // return new JobConf(); // } // } // public void setStatus(String msg) { setStatus(Level.SLF4J_INFO, msg); } public void setStatus(String msg, Throwable t) { super.setStatus(msg); for (IFlowReporter reporter : _reporters) { reporter.setStatus(msg, t); } increment(LoggingLevels.ERROR, 1); } public void setStatus(Level level, String msg) { super.setStatus(msg); for (IFlowReporter reporter : _reporters) { reporter.setStatus(level, msg); } increment(LoggingLevels.fromLevel(level), 1); } @Override public void increment(Enum counter, long amount) { super.increment(counter, amount); // TODO KKr - decide if I really want to track stuff locally if (true || _isLocal) { synchronized (_localCounters) { if (_localCounters.get(counter) == null) { _localCounters.put(counter, new AtomicLong()); } } AtomicLong curCount = _localCounters.get(counter); long newValue = curCount.addAndGet(amount); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Cascading counter: " + counter + (amount > 0 ? " + " : " - ") + Math.abs(amount) + " = " + newValue); } } } @Override public void increment(String group, String counter, long amount) { super.increment(group, counter, amount); // TODO KKr - get my local counters in sync? } public void decrement(Enum counter, long amount) { increment(counter, -amount); } /** * @param counter whose value should be returned * @return current value of the counter, local to the task * <br/><br/><b>Note:</b> Only the JobTracker aggregates task counter values * to report the job-wide total. */ public long getCounter(Enum counter) { if (_isLocal) { AtomicLong count = _localCounters.get(counter); if (count != null) { return count.get(); } else { return 0; } } else { Counters counters = new Counters(); Counter hadoopCounter = counters.findCounter(counter); if (hadoopCounter != null) { return (int)hadoopCounter.getValue(); } else { return 0; } } } /** * If we're running in local mode, log current counter values. */ public void dumpCounters() { if (_isLocal) { for (Enum theEnum : _localCounters.keySet()) { LOGGER.info(String.format("Cascading counter: %s = %d", theEnum, _localCounters .get(theEnum).get())); } } // FUTURE KKr - also dump Hadoop counters to Logger? } }