/*
* Copyright 2011-2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.hadoop.impala.mapreduce;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Array;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.hadoop.impala.common.ConfigurationAware;
import org.springframework.data.hadoop.impala.common.util.SecurityUtil;
import org.springframework.data.hadoop.impala.common.util.SecurityUtil.ExitTrappedException;
import org.springframework.shell.core.ExecutionProcessor;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.shell.event.ParseResult;
import org.springframework.stereotype.Component;
/**
* Commands to submit and interact with MapReduce jobs
*
* @author Jarred Li
* @author Author of <code>org.apache.hadoop.util.RunJar</code>
*/
@Component
public class MapReduceCommands extends ConfigurationAware implements ExecutionProcessor{
private JobClient jobClient;
private static final String PREFIX = "mr job ";
@Autowired
private SecurityUtil securityUtil;
@Override
public ParseResult beforeInvocation(ParseResult invocationContext) {
invocationContext = super.beforeInvocation(invocationContext);
String jobTracker = getHadoopConfiguration().get("mapred.job.tracker");
if (jobTracker != null && jobTracker.length() > 0) {
if (jobClient == null) {
init();
}
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
org.apache.hadoop.mapreduce.JobSubmissionFiles.JOB_DIR_PERMISSION.fromShort((short) 0700);
org.apache.hadoop.mapreduce.JobSubmissionFiles.JOB_FILE_PERMISSION.fromShort((short) 0644);
}
return invocationContext;
}
else{
LOG.severe("You must set Job Tracker URL before run Map Reduce commands");
throw new RuntimeException("You must set Job Tracker URL before run Map Reduce commands");
}
}
public void init() {
try {
jobClient = new JobClient(new JobConf(getHadoopConfiguration()));
} catch (IOException ex) {
LOG.severe("Cannot create job client" + ex.getMessage());
}
}
@Override
protected String failedComponentName() {
return "Map/Reduce";
}
@Override
protected boolean configurationChanged() throws Exception {
if (jobClient != null) {
LOG.info("Hadoop configuration changed, re-initializing MR...");
}
init();
return true;
}
@CliCommand(value = PREFIX + "submit", help = "Submit a Map Reduce job defined in the job file")
public void submit(@CliOption(key = { "jobfile" }, mandatory = true, help = "the configuration file for MR job") final String jobFile) {
List<String> argv = new ArrayList<String>();
argv.add("-submit");
argv.add(jobFile);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "status", help = "Query Map Reduce job status.")
public void status(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid) {
List<String> argv = new ArrayList<String>();
argv.add("-status");
argv.add(jobid);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "counter", help = "Print the counter value of the MR job")
public void counter(
@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
@CliOption(key = { "groupname" }, mandatory = true, help = "the job Id") final String groupName,
@CliOption(key = { "countername" }, mandatory = true, help = "the job Id") final String counterName) {
List<String> argv = new ArrayList<String>();
argv.add("-counter");
argv.add(jobid);
argv.add(groupName);
argv.add(counterName);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "kill", help = "Kill the Map Reduce job")
public void kill(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid) {
List<String> argv = new ArrayList<String>();
argv.add("-kill");
argv.add(jobid);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "events", help = "Print the events' detail received by jobtracker for the given range")
public void events(
@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
@CliOption(key = { "from" }, mandatory = true, help = "from event number") final String from,
@CliOption(key = { "number" }, mandatory = true, help = "total number of events") final String number) {
List<String> argv = new ArrayList<String>();
argv.add("-events");
argv.add(jobid);
argv.add(from);
argv.add(number);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "history", help = "Print job details, failed and killed job details")
public void history(@CliOption(key = { "all" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "Whether print all information") final boolean all,
@CliOption(key = { "" }, mandatory = true, help = "job output directory") final String outputDir) {
List<String> argv = new ArrayList<String>();
argv.add("-history");
if (all) {
argv.add("all");
}
argv.add(outputDir);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "list", help = "List the Map Reduce jobs")
public void list(@CliOption(key = { "all" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "Whether list all jobs") final boolean all) {
List<String> argv = new ArrayList<String>();
argv.add("-list");
if (all) {
argv.add("all");
}
run(argv.toArray(new String[0]));
}
@CliCommand(value = "mr task kill", help = "Kill the Map Reduce task")
public void killTask(@CliOption(key = { "taskid" }, mandatory = true, help = "the task Id") final String taskid) {
List<String> argv = new ArrayList<String>();
argv.add("-kill-task");
argv.add(taskid);
run(argv.toArray(new String[0]));
}
@CliCommand(value = "mr task fail", help = "Fail the Map Reduce task")
public void failTask(@CliOption(key = { "taskid" }, mandatory = true, help = "the task Id") final String taskid) {
List<String> argv = new ArrayList<String>();
argv.add("-fail-task");
argv.add(taskid);
run(argv.toArray(new String[0]));
}
@CliCommand(value = PREFIX + "set priority", help = "Change the priority of the job")
public void setPriority(
@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
@CliOption(key = { "priority" }, mandatory = true, help = "the job priority") final JobPriority priority) {
List<String> argv = new ArrayList<String>();
argv.add("-set-priority");
argv.add(jobid);
argv.add(priority.getValue());
run(argv.toArray(new String[0]));
}
public enum JobPriority {
VERY_HIGH("VERY_HIGH"), HIGH("HIGH"), NORML("NORMAL"), LOW("LOW"), VERY_LOW("VERY_LOW");
private String val;
JobPriority(String v) {
this.val = v;
}
public String getValue() {
return val;
}
}
@CliCommand(value = "mr jar", help = "Run Map Reduce job in the jar")
public void jar(
@CliOption(key = { "jarfile" }, mandatory = true, help = "jar file name") final String jarFileName,
@CliOption(key = "mainclass", mandatory = true, help = "main class name") final String mainClassName,
@CliOption(key = "args", mandatory = false, help = "input path") final String args) {
securityUtil.forbidSystemExitCall();
try {
runJar(jarFileName, mainClassName, args);
} catch (ExitTrappedException e) {
//LOG.info("The MR job call System.exit. This is prevented.");
} catch (Throwable t) {
LOG.severe("run MR job failed. Failed Message:" + t.getMessage());
} finally {
securityUtil.enableSystemExitCall();
}
}
/**
* @param jarFileName
* @param mainClassName
* @param args
* @throws Throwable
*/
public void runJar(final String jarFileName, final String mainClassName, final String args) throws Throwable {
File file = new File(jarFileName);
File tmpDir = new File(new Configuration().get("hadoop.tmp.dir"));
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
tmpDir = new File(System.getProperty("java.io.tmpdir"), "impala");
}
tmpDir.mkdirs();
if (!tmpDir.isDirectory()) {
LOG.severe("Mkdirs failed to create " + tmpDir);
}
try {
final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);
workDir.delete();
workDir.mkdirs();
if (!workDir.isDirectory()) {
LOG.severe("Mkdirs failed to create " + workDir);
return;
}
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
FileUtil.fullyDelete(workDir);
} catch (IOException e) {
}
}
});
unJar(file, workDir);
ArrayList<URL> classPath = new ArrayList<URL>();
//This is to add hadoop configuration dir to classpath so that
//user's configuration can be accessed when running the jar
File hadoopConfigurationDir = new File(workDir + Path.SEPARATOR + "impala-hadoop-configuration");
writeHadoopConfiguration(hadoopConfigurationDir, this.getHadoopConfiguration());
classPath.add(hadoopConfigurationDir.toURL());
//classPath.add(new File(System.getenv("HADOOP_CONF_DIR")).toURL());
classPath.add(new File(workDir + Path.SEPARATOR).toURL());
classPath.add(file.toURL());
classPath.add(new File(workDir, "classes" + Path.SEPARATOR).toURL());
File[] libs = new File(workDir, "lib").listFiles();
if (libs != null) {
for (int i = 0; i < libs.length; i++) {
classPath.add(libs[i].toURL());
}
}
ClassLoader loader = new URLClassLoader(classPath.toArray(new URL[0]), this.getClass().getClassLoader());
Thread.currentThread().setContextClassLoader(loader);
Class<?> mainClass = Class.forName(mainClassName, true, loader);
Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(String.class, 0).getClass() });
String[] newArgs = args.split(" ");
main.invoke(null, new Object[] { newArgs });
} catch (Exception e) {
if (e instanceof InvocationTargetException) {
if (e.getCause() instanceof ExitTrappedException) {
throw (ExitTrappedException) e.getCause();
}
}
else {
throw e;
}
}
}
/**
* wirte the Hadoop configuration to one directory,
* file name is "core-site.xml", "hdfs-site.xml" and "mapred-site.xml".
*
* @param configDir the directory that the file be written
* @param config Hadoop configuration
*
*/
public void writeHadoopConfiguration(File configDir, Configuration config) {
configDir.mkdirs();
try {
FileOutputStream fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "core-site.xml"));
config.writeXml(fos);
fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "hdfs-site.xml"));
config.writeXml(fos);
fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "mapred-site.xml"));
config.writeXml(fos);
} catch (Exception e) {
LOG.severe("Save user's configuration failed. Message:" + e.getMessage());
}
}
private void unJar(File jarFile, File toDir) throws Throwable {
JarFile jar = new JarFile(jarFile);
try {
Enumeration entries = jar.entries();
while (entries.hasMoreElements()) {
JarEntry entry = (JarEntry) entries.nextElement();
if (!entry.isDirectory()) {
InputStream in = jar.getInputStream(entry);
try {
File file = new File(toDir, entry.getName());
if (!file.getParentFile().mkdirs()) {
if (!file.getParentFile().isDirectory()) {
throw new IOException("Mkdirs failed to create " + file.getParentFile().toString());
}
}
OutputStream out = new FileOutputStream(file);
try {
byte[] buffer = new byte[8192];
int i;
while ((i = in.read(buffer)) != -1) {
out.write(buffer, 0, i);
}
} finally {
out.close();
}
} finally {
in.close();
}
}
}
} catch (Throwable t) {
throw t;
} finally {
jar.close();
}
}
private void run(String[] argv) {
try {
jobClient.run(argv);
} catch (Throwable t) {
LOG.severe("run MR job failed. Failed Message:" + t.getMessage());
}
}
}