/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.util;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.text.NumberFormat;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.util.StringUtils;
import org.aspectj.lang.Aspects;
import org.aspectj.lang.JoinPoint;
import org.aspectj.lang.annotation.AfterReturning;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.annotation.Pointcut;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.schema.util.LogTimerAspect;
import org.kiji.schema.util.LoggingInfo;
/**
* This aspect is invoked after the cleanup function in a mapreduce job. It
* accesses logging information gathered by the LogTimerAspect in kiji schema and
* serializes it to a local file.
*/
@ApiAudience.Framework
@ApiStability.Experimental
@Aspect
public class SerializeLoggerAspect {
private MRLogTimerAspect mMRLogTimerAspect;
private LogTimerAspect mLogTimerAspect;
private static final Logger LOG = LoggerFactory.getLogger(SerializeLoggerAspect.class);
/**
* Output directory (relative to the MapReduce job's working directory on HDFS, that is
* context.getWorkingDirectory()) to store profiling results.
*/
private static final String STATS_DIR = "kijistats";
/**
* Default constructor. Initializes the singleton LogTimerAspect for this JVM instance.
*/
protected SerializeLoggerAspect() {
if (Aspects.hasAspect(LogTimerAspect.class)) {
mLogTimerAspect = Aspects.aspectOf(LogTimerAspect.class);
} else {
throw new RuntimeException("Log Timer aspect not found!");
}
if (Aspects.hasAspect(MRLogTimerAspect.class)) {
mMRLogTimerAspect = Aspects.aspectOf(MRLogTimerAspect.class);
} else {
throw new RuntimeException("MR Log Timer aspect not found!");
}
}
/**
* Pointcut attached to cleanup code in MapReduce tasks. We can have a number of classes
* that inherit from KijiMapper/Reducer. We would like to match these only once, so that
* the code is not executed once for the parent and once for the child class. This along
* with the advice below that uses cflowbelow was the cleanest way I could find to do this.
*/
@Pointcut("execution(* org.kiji.mapreduce.KijiMapper.cleanup(..)) ||"
+ "execution(* org.kiji.mapreduce.KijiReducer.cleanup(..))")
protected void mrCleanupPoint() {
}
/**
* Logic to serialize collected profiling content to a file on HDFS. The files are stored
* in the current working directory for this context, in a folder specified by STATS_DIR. The per
* task file is named by the task attempt id.
* We obtain the profiling stats collected by the LogTimerAspect in KijiSchema. The format of the
* file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
* Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
*
* @param context The {@link TaskInputOutputContext} for this job.
* @throws IOException If the writes to HDFS fail.
*/
private void serializeToFile(TaskInputOutputContext context) throws IOException {
Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR);
FileSystem fs = FileSystem.get(context.getConfiguration());
fs.mkdirs(parentPath);
Path path = new Path(parentPath, context.getTaskAttemptID().toString());
OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8");
try {
out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), "
+ "Number of Invocations, Time per call (nanoseconds)\n");
ConcurrentHashMap<String, LoggingInfo> signatureTimeMap =
mLogTimerAspect.getSignatureTimeMap();
for (Map.Entry<String, LoggingInfo> entrySet: signatureTimeMap.entrySet()) {
writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
}
signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap();
for (Map.Entry<String, LoggingInfo> entrySet: signatureTimeMap.entrySet()) {
writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
}
} finally {
out.close();
}
}
/**
* Logic to write a profiling content for a single method signature to a file on HDFS.
* The format of the file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
* Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
*
* @param out The {@link OutputStreamWriter} for writing to the file.
* @param context The {@link TaskInputOutputContext} for this job.
* @param signature The method signature for the profile.
* @param loggingInfo The profiling information for the method.
* @throws IOException If the writes to HDFS fail.
*/
private void writeProfileInformation(OutputStreamWriter out,
TaskInputOutputContext context,
String signature,
LoggingInfo loggingInfo)
throws IOException {
// ensure that files do not end up with x.yzE7 format for floats. Instead of 1.0E3, we want
// 1000.000
NumberFormat nf = NumberFormat.getInstance();
nf.setGroupingUsed(false);
nf.setMinimumFractionDigits(1);
nf.setMaximumFractionDigits(3);
out.write(context.getJobName() + ", "
+ context.getJobID() + ", "
+ context.getTaskAttemptID() + ", "
+ signature + ", "
+ loggingInfo.toString() + ", "
+ nf.format(loggingInfo.perCallTime()) + "\n");
}
/**
* Advice for running after any functions that match PointCut "mrCleanupPoint", such as
* the cleanup call in some KijiMapper. This also ensures that the serialize code is
* called only once, not in both the parent and child class. The serializeToFile function
* serializes the profiling stats collected by the LogTimerAspect.
*
* @param thisJoinPoint The joinpoint that matched the pointcut.
*/
@AfterReturning("mrCleanupPoint() && !cflowbelow(mrCleanupPoint())")
public void afterMRCleanup(final JoinPoint thisJoinPoint) {
TaskInputOutputContext context = (TaskInputOutputContext)thisJoinPoint.getArgs()[0];
try {
serializeToFile(context);
} catch (IOException ioe) {
LOG.error("Failure writing profiling results", StringUtils.stringifyException(ioe));
}
}
}