/* * Copyright 2012 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.linkedin.whiteelephant; import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; import java.util.Properties; import java.util.concurrent.ExecutionException; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.linkedin.whiteelephant.analysis.ComputeUsagePerHour; import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJobExecutor; import com.linkedin.whiteelephant.parsing.ParseJobConfs; import com.linkedin.whiteelephant.parsing.ParseJobsFromLogs; public class ProcessLogs implements Runnable { private final Logger _log; private final Properties _props; private double _progress; private final int _jobConcurrency; private final StagedOutputJobExecutor _executor; private final ParseJobsFromLogs parseJobs; private final ComputeUsagePerHour usagePerHour; private final ParseJobConfs parseJobConfs; public ProcessLogs(String name, Properties props) throws IOException { _log = Logger.getLogger(name); _props = props; if (_props.get("job.concurrency") == null) { throw new IllegalArgumentException("job.concurrency is not specified."); } // set log level for these classes to error to suppress spewing warnings about splits org.apache.log4j.Logger.getLogger("org.apache.hadoop.mapreduce.split.JobSplitWriter").setLevel(Level.ERROR); org.apache.log4j.Logger.getLogger("org.apache.hadoop.mapreduce.split.SplitMetaInfoReader").setLevel(Level.ERROR); _jobConcurrency = Integer.parseInt((String)_props.get("job.concurrency")); _executor = new StagedOutputJobExecutor(_jobConcurrency); parseJobs = new ParseJobsFromLogs(name, props); usagePerHour = new ComputeUsagePerHour(name, props); parseJobConfs = new ParseJobConfs(name, props); } public void run() { _log.info(String.format("Starting %s", getClass().getSimpleName())); try { System.out.println("Parsing logs"); parseJobConfs.execute(_executor); parseJobs.execute(_executor); usagePerHour.execute(_executor); _executor.waitForCompletionThenShutdown(); System.out.println("All tasks have completed!"); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { e.printStackTrace(); } } public double getProgress() { return _progress; } public void cancel() { _executor.shutdownNow(); } private static void loadProperties(Properties props, String fileName) throws IOException { FileInputStream propStream = new FileInputStream(fileName); props.load(propStream); propStream.close(); } public static void main(String[] args) throws IOException { if (args.length == 0) { System.out.println("The job file name is required"); System.exit(1); } else if (args.length > 1) { System.out.println("Too many arguments. Only the job file name is required"); System.exit(1); } String jobName = args[0]; File jobFile = new File(jobName); File[] propFiles = new File(".").listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(".properties"); } }); Properties props = new Properties(); for (File propFile : propFiles) { System.out.println("Loading configuration from " + propFile.getAbsolutePath()); loadProperties(props,propFile.getAbsolutePath()); } if (jobFile.exists()) { System.out.println("Loading configuration from " + jobFile.getAbsolutePath()); loadProperties(props,jobFile.getAbsolutePath()); } else { System.out.println("File " + jobName + " not found"); System.exit(1); } new ProcessLogs(jobName,props).run(); } }