SerializeLoggerAspect.java example

Explorer

kiji-mapreduce-master
- cdh4mr1-bridge
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        CDH4MR1KijiMRBridge.java
        CDH4MR1KijiMRBridgeFactory.java
        package-info.java
- hadoop2-hbase96-bridge
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        Hadoop2HBase96xKijiMRBridge.java
        Hadoop2HBase96xKijiMRBridgeFactory.java
        package-info.java
- kiji-mapreduce
  - src
    - main
      - java
        org
        apache
        hadoop
        mapreduce
        lib
        input
        CombineFileInputFormat.java
        CombineFileRecordReader.java
        CombineFileSplit.java
        map
        KijiMultithreadedMapper.java
        output
        MapFileOutputFormat.java
        kiji
        mapreduce
        DistributedCacheJars.java
        HFileLoader.java
        JobConfigurationException.java
        KVOutputJob.java
        KijiContext.java
        KijiMapReduceJob.java
        KijiMapReduceJobBuilder.java
        KijiMapper.java
        KijiReducer.java
        KijiTableContext.java
        KijiTableReducer.java
        MapReduceJobInput.java
        MapReduceJobOutput.java
        avro
        AvroKeyReader.java
        AvroKeyWriter.java
        AvroMapReader.java
        AvroValueReader.java
        AvroValueWriter.java
        package-info.java
        bulkimport
        AvroBulkImporter.java
        KijiBulkImportJobBuilder.java
        KijiBulkImporter.java
        impl
        BulkImportMapper.java
        KijiBulkImporters.java
        package-info.java
        package-info.java
        framework
        HBaseKijiTableInputFormat.java
        HBaseKijiTableInputFormatFactory.java
        HFileKeyValue.java
        JobHistoryCounters.java
        JobHistoryKijiTable.java
        KijiConfKeys.java
        KijiTableInputFormat.java
        KijiTableInputFormatFactory.java
        KijiTableInputJobBuilder.java
        MapReduceJobBuilder.java
        package-info.java
        gather
        GathererContext.java
        KijiGatherJobBuilder.java
        KijiGatherer.java
        impl
        GatherMapper.java
        InternalGathererContext.java
        KijiGatherers.java
        package-info.java
        package-info.java
        impl
        DirectKijiTableWriterContext.java
        HFileWriterContext.java
        HTableInputFormat.java
        HTableReader.java
        InternalKijiContext.java
        KijiMappers.java
        KijiReducers.java
        KijiTableContextFactory.java
        KijiTableMapper.java
        KijiTableSplit.java
        package-info.java
        input
        AvroKeyMapReduceJobInput.java
        AvroKeyValueMapReduceJobInput.java
        FileMapReduceJobInput.java
        HTableMapReduceJobInput.java
        KijiTableMapReduceJobInput.java
        MapReduceJobInputs.java
        SequenceFileMapReduceJobInput.java
        TextMapReduceJobInput.java
        WholeTextFileMapReduceJobInput.java
        XMLMapReduceJobInput.java
        impl
        WholeFileInputFormat.java
        WholeFileRecordReader.java
        XMLInputFormat.java
        package-info.java
        package-info.java
        kvstore
        KeyValueStore.java
        KeyValueStoreClient.java
        KeyValueStoreReader.java
        KeyValueStoreReaderFactory.java
        RequiredStores.java
        framework
        KeyValueStoreConfiguration.java
        package-info.java
        impl
        KeyValueStoreConfigSerializer.java
        KeyValueStoreConfigValidator.java
        XmlKeyValueStoreParser.java
        package-info.java
        lib
        AvroKVRecordKeyValueStore.java
        AvroRecordKeyValueStore.java
        EmptyKeyValueStore.java
        FileStoreHelper.java
        InMemoryMapKeyValueStore.java
        KijiTableKeyValueStore.java
        SeqFileKeyValueStore.java
        TextFileKeyValueStore.java
        UnconfiguredKeyValueStore.java
        package-info.java
        package-info.java
        output
        AvroKeyMapReduceJobOutput.java
        AvroKeyValueMapReduceJobOutput.java
        DirectKijiTableMapReduceJobOutput.java
        FileMapReduceJobOutput.java
        HFileMapReduceJobOutput.java
        KijiTableMapReduceJobOutput.java
        MapFileMapReduceJobOutput.java
        MapReduceJobOutputs.java
        SequenceFileMapReduceJobOutput.java
        TextMapReduceJobOutput.java
        framework
        HFileReducerMapReduceJobOutput.java
        KijiHFileOutputFormat.java
        package-info.java
        package-info.java
        package-info.java
        pivot
        KijiCellRewriter.java
        KijiPivotJobBuilder.java
        KijiPivoter.java
        impl
        KijiPivoters.java
        PivoterMapper.java
        package-info.java
        package-info.java
        produce
        KijiProduceJobBuilder.java
        KijiProducer.java
        KijiProducerOutputException.java
        ProducerContext.java
        impl
        InternalProducerContext.java
        KijiProducers.java
        ProduceMapper.java
        package-info.java
        package-info.java
        reducer
        IdentityReducer.java
        package-info.java
        tools
        KijiBulkImport.java
        KijiBulkLoad.java
        KijiGather.java
        KijiJobHistory.java
        KijiLaunchMapReduce.java
        KijiPivot.java
        KijiProduce.java
        framework
        JobIOConfKeys.java
        JobIOSpecParseException.java
        JobInputSpec.java
        JobOutputSpec.java
        JobTool.java
        KijiJobTool.java
        MapReduceJobInputFactory.java
        MapReduceJobOutputFactory.java
        package-info.java
        package-info.java
        util
        AvroMapReduce.java
        Jars.java
        Lists.java
        LruCache.java
        package-info.java
    - test
      - java
        org
        kiji
        mapreduce
        IntegrationTestJobHistoryKijiTable.java
        IntegrationTestKijiBulkLoad.java
        IntegrationTestKijiTableInputFormat.java
        IntegrationTestTableMapReducer.java
        KijiMRTestLayouts.java
        TestAvroKeyValueWriter.java
        TestAvroMapReader.java
        TestBulkImporter.java
        TestDistributedCacheJars.java
        TestGatherMapFamily.java
        TestGatherer.java
        TestGathererReducer.java
        TestKijiBulkImportJobBuilder.java
        TestKijiGatherJobBuilder.java
        TestKijiMapReduceJobBuilder.java
        TestKijiProduceJobBuilder.java
        TestLaunchMapReduce.java
        TestPivoter.java
        TestProducer.java
        TestingResources.java
        framework
        TestColumnReaderSpecOverrides.java
        TestKijiTableInputFormat.java
        input
        TestKijiTableMapReduceJobInput.java
        impl
        TestXMLInputFormat.java
        kvstore
        TestKeyValueStoreConfiguration.java
        TestKeyValueStoreReaderFactory.java
        impl
        TestXmlKeyValueStoreParser.java
        lib
        TestAvroAllKVRecordKeyValueStore.java
        TestAvroKVRecordKeyValueStore.java
        TestAvroRecordKeyValueStore.java
        TestInMemoryMapKeyValueStore.java
        TestKijiTableKeyValueStore.java
        TestSeqFileKeyValueStore.java
        TestTextFileKeyValueStore.java
        output
        TestFileMapReduceJobOutput.java
        TestKijiHFileOutputFormat.java
        TestKijiTableMapReduceJobOutput.java
        pivot
        TestKijiCellRewriter.java
        testlib
        HFileReduceJob.java
        IntegrationTestSimpleBulkImporter.java
        IntegrationTestTableMapper.java
        SimpleBulkImporter.java
        SimpleIntSumReducer.java
        SimpleTableMapReducer.java
        SimpleTableMapperAsBulkImporter.java
        SimpleTableMapperAsGatherer.java
        tools
        TestJobInputSpec.java
        TestJobOutputSpec.java
        util
        TestJars.java
        TestKijiProducers.java
        TestLists.java
        TestLruCache.java
        schema
        filter
        TestRegexQualifierColumnFilter.java
- kiji-mapreduce-archetype
  - src
    - main
      - resources
        archetype-resources
        src
        main
        java
        bulkimport
        ExampleBulkImporter.java
        package-info.java
        gather
        ExampleGatherer.java
        package-info.java
        produce
        ExampleProducer.java
        package-info.java
        reduce
        ExampleIdentityReducer.java
        package-info.java
- kiji-mapreduce-cassandra
  - src
    - main
      - java
        org
        kiji
        mapreduce
        framework
        CassandraInputSplit.java
        CassandraKijiTableInputFormat.java
        CassandraKijiTableInputFormatFactory.java
        CassandraSubSplit.java
        CassandraSubSplitCombiner.java
        CassandraSubSplitCreator.java
        CassandraTokenRange.java
        ConsistentHostOrderPolicy.java
        package-info.java
    - test
      - java
        org
        kiji
        mapreduce
        framework
        TestSubSplits.java
- platform-api
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        KijiMRPlatformBridge.java
        KijiMRPlatformBridgeFactory.java
        package-info.java
- profiling
  - src
    - main
      - java
        org
        kiji
        mapreduce
        util
        MRLogTimerAspect.java
        SerializeLoggerAspect.java

/**
 * (c) Copyright 2013 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kiji.mapreduce.util;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.text.NumberFormat;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.util.StringUtils;
import org.aspectj.lang.Aspects;
import org.aspectj.lang.JoinPoint;
import org.aspectj.lang.annotation.AfterReturning;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.annotation.Pointcut;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.schema.util.LogTimerAspect;
import org.kiji.schema.util.LoggingInfo;

/**
 * This aspect is invoked after the cleanup function in a mapreduce job. It
 * accesses logging information gathered by the LogTimerAspect in kiji schema and
 * serializes it to a local file.
 */
@ApiAudience.Framework
@ApiStability.Experimental
@Aspect
public class SerializeLoggerAspect {
  private MRLogTimerAspect mMRLogTimerAspect;
  private LogTimerAspect mLogTimerAspect;
  private static final Logger LOG = LoggerFactory.getLogger(SerializeLoggerAspect.class);
  /**
   * Output directory (relative to the MapReduce job's working directory on HDFS, that is
   * context.getWorkingDirectory()) to store profiling results.
   */
  private static final String STATS_DIR = "kijistats";

  /**
   * Default constructor. Initializes the singleton LogTimerAspect for this JVM instance.
   */
  protected SerializeLoggerAspect() {
    if (Aspects.hasAspect(LogTimerAspect.class)) {
      mLogTimerAspect = Aspects.aspectOf(LogTimerAspect.class);
    } else {
      throw new RuntimeException("Log Timer aspect not found!");
    }

    if (Aspects.hasAspect(MRLogTimerAspect.class)) {
      mMRLogTimerAspect = Aspects.aspectOf(MRLogTimerAspect.class);
    } else {
      throw new RuntimeException("MR Log Timer aspect not found!");
    }
  }

  /**
   * Pointcut attached to cleanup code in MapReduce tasks. We can have a number of classes
   * that inherit from KijiMapper/Reducer. We would like to match these only once, so that
   * the code is not executed once for the parent and once for the child class. This along
   * with the advice below that uses cflowbelow was the cleanest way I could find to do this.
   */
  @Pointcut("execution(* org.kiji.mapreduce.KijiMapper.cleanup(..)) ||"
      + "execution(* org.kiji.mapreduce.KijiReducer.cleanup(..))")
  protected void mrCleanupPoint() {
  }

  /**
   * Logic to serialize collected profiling content to a file on HDFS. The files are stored
   * in the current working directory for this context, in a folder specified by STATS_DIR. The per
   * task file is named by the task attempt id.
   * We obtain the profiling stats collected by the LogTimerAspect in KijiSchema. The format of the
   * file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
   * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
   *
   * @param context The {@link TaskInputOutputContext} for this job.
   * @throws IOException If the writes to HDFS fail.
   */
  private void serializeToFile(TaskInputOutputContext context) throws IOException {
    Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR);
    FileSystem fs = FileSystem.get(context.getConfiguration());
    fs.mkdirs(parentPath);
    Path path = new Path(parentPath, context.getTaskAttemptID().toString());
    OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8");
    try {
      out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), "
          + "Number of Invocations, Time per call (nanoseconds)\n");

      ConcurrentHashMap<String, LoggingInfo> signatureTimeMap =
          mLogTimerAspect.getSignatureTimeMap();
      for (Map.Entry<String, LoggingInfo> entrySet: signatureTimeMap.entrySet()) {
        writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
      }

      signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap();
      for (Map.Entry<String, LoggingInfo> entrySet: signatureTimeMap.entrySet()) {
        writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
      }
    } finally {
      out.close();
    }
  }

  /**
   * Logic to write a profiling content for a single method signature to a file on HDFS.
   * The format of the file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
   * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
   *
   * @param out The {@link OutputStreamWriter} for writing to the file.
   * @param context The {@link TaskInputOutputContext} for this job.
   * @param signature The method signature for the profile.
   * @param loggingInfo The profiling information for the method.
   * @throws IOException If the writes to HDFS fail.
   */
  private void writeProfileInformation(OutputStreamWriter out,
                                       TaskInputOutputContext context,
                                       String signature,
                                       LoggingInfo loggingInfo)
      throws IOException {
    // ensure that files do not end up with x.yzE7 format for floats. Instead of 1.0E3, we want
    // 1000.000
    NumberFormat nf = NumberFormat.getInstance();
    nf.setGroupingUsed(false);
    nf.setMinimumFractionDigits(1);
    nf.setMaximumFractionDigits(3);

    out.write(context.getJobName() + ", "
        + context.getJobID() + ", "
        + context.getTaskAttemptID() + ", "
        + signature + ", "
        + loggingInfo.toString() + ", "
        + nf.format(loggingInfo.perCallTime()) + "\n");
  }

  /**
   * Advice for running after any functions that match PointCut "mrCleanupPoint", such as
   * the cleanup call in some KijiMapper. This also ensures that the serialize code is
   * called only once, not in both the parent and child class. The serializeToFile function
   * serializes the profiling stats collected by the LogTimerAspect.
   *
   * @param thisJoinPoint The joinpoint that matched the pointcut.
   */
  @AfterReturning("mrCleanupPoint() && !cflowbelow(mrCleanupPoint())")
  public void afterMRCleanup(final JoinPoint thisJoinPoint) {
    TaskInputOutputContext context = (TaskInputOutputContext)thisJoinPoint.getArgs()[0];
    try {
      serializeToFile(context);
    } catch (IOException ioe) {
      LOG.error("Failure writing profiling results", StringUtils.stringifyException(ioe));
    }
  }
}