ProcessLogs.java example

Explorer

white-elephant-master
- hadoop
  - src
    - java
      - com
        linkedin
        whiteelephant
        ProcessLogs.java
        analysis
        ComputeUsagePerHour.java
        mapreduce
        MyAvroMultipleOutputs.java
        lib
        input
        CombineDocumentFileFormat.java
        CombinedTextInputFormat.java
        job
        StagedOutputJob.java
        StagedOutputJobExecutor.java
        parsing
        LineParsing.java
        ParseJobConfs.java
        ParseJobsFromLogs.java
        util
        JobStatsProcessing.java
- server
  - src
    - java
      - com
        linkedin
        whiteelephant
        TimeZoneConversion.java

/*
 * Copyright 2012 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.linkedin.whiteelephant;

import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.Properties;
import java.util.concurrent.ExecutionException;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.linkedin.whiteelephant.analysis.ComputeUsagePerHour;
import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJobExecutor;
import com.linkedin.whiteelephant.parsing.ParseJobConfs;
import com.linkedin.whiteelephant.parsing.ParseJobsFromLogs;

public class ProcessLogs implements Runnable
{
  private final Logger _log;
  private final Properties _props;
  private double _progress;  
  
  private final int _jobConcurrency;
  private final StagedOutputJobExecutor _executor;
  
  private final ParseJobsFromLogs parseJobs;
  private final ComputeUsagePerHour usagePerHour;
  private final ParseJobConfs parseJobConfs;
  
  public ProcessLogs(String name, Properties props) throws IOException {
    _log = Logger.getLogger(name);
    _props = props;    
    
    if (_props.get("job.concurrency") == null) {
      throw new IllegalArgumentException("job.concurrency is not specified.");
    }
    
    // set log level for these classes to error to suppress spewing warnings about splits 
    org.apache.log4j.Logger.getLogger("org.apache.hadoop.mapreduce.split.JobSplitWriter").setLevel(Level.ERROR);
    org.apache.log4j.Logger.getLogger("org.apache.hadoop.mapreduce.split.SplitMetaInfoReader").setLevel(Level.ERROR);

    _jobConcurrency = Integer.parseInt((String)_props.get("job.concurrency"));
    _executor = new StagedOutputJobExecutor(_jobConcurrency);
    
    parseJobs = new ParseJobsFromLogs(name, props);
    usagePerHour = new ComputeUsagePerHour(name, props);
    parseJobConfs = new ParseJobConfs(name, props);
  }
  
  public void run()
  {
    _log.info(String.format("Starting %s", getClass().getSimpleName()));

    try
    {
      System.out.println("Parsing logs");
      
      parseJobConfs.execute(_executor);
      parseJobs.execute(_executor);
      usagePerHour.execute(_executor);
      
      _executor.waitForCompletionThenShutdown();
      
      System.out.println("All tasks have completed!");
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    catch (InterruptedException e)
    {
      e.printStackTrace();
    }
    catch (ExecutionException e)
    {
      e.printStackTrace();
    }
  } 
        
  public double getProgress()
  {
    return _progress;
  }
  
  public void cancel()
  {    
    _executor.shutdownNow();
  }
  
  private static void loadProperties(Properties props, String fileName) throws IOException
  {
    FileInputStream propStream = new FileInputStream(fileName);
    props.load(propStream);
    propStream.close();
  }
  
  public static void main(String[] args) throws IOException
  {    
    if (args.length == 0)
    {
      System.out.println("The job file name is required");
      System.exit(1);
    }
    else if (args.length > 1)
    {
      System.out.println("Too many arguments.  Only the job file name is required");
      System.exit(1);
    }
    
    String jobName = args[0];
    File jobFile = new File(jobName);
    
    File[] propFiles = new File(".").listFiles(new FilenameFilter() {
      @Override
      public boolean accept(File dir, String name)
      {
        return name.endsWith(".properties");
      }
    });
    
    Properties props = new Properties();
    
    for (File propFile : propFiles)
    {
      System.out.println("Loading configuration from " + propFile.getAbsolutePath());
      loadProperties(props,propFile.getAbsolutePath());
    }
    
    if (jobFile.exists())
    {
      System.out.println("Loading configuration from " + jobFile.getAbsolutePath());
      loadProperties(props,jobFile.getAbsolutePath());
    }
    else
    {
      System.out.println("File " + jobName + " not found");
      System.exit(1);
    }    
    
    new ProcessLogs(jobName,props).run();
  }
}