package org.apache.sqoop.spark; import java.io.Serializable; import java.util.ListIterator; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.sqoop.common.Direction; import org.apache.sqoop.core.ConfigurationConstants; import org.apache.sqoop.core.SqoopServer; import org.apache.sqoop.driver.JobManager; import org.apache.sqoop.driver.JobRequest; import org.apache.sqoop.job.spark.SparkDestroyerUtil; import org.apache.sqoop.model.MJob; import org.apache.sqoop.model.MSubmission; import org.apache.sqoop.request.HttpEventContext; import org.apache.sqoop.submission.spark.SqoopSparkDriver; public class SqoopSparkJob implements Serializable { private MJob job; private static CommandLineParser parser; static class SqoopGnuParser extends GnuParser { private final boolean ignoreUnrecognizedOption; public SqoopGnuParser(final boolean ignoreUnrecognizedOption) { this.ignoreUnrecognizedOption = ignoreUnrecognizedOption; } @SuppressWarnings("rawtypes") @Override protected void processOption(final String arg, final ListIterator iter) throws ParseException { boolean hasOption = getOptions().hasOption(arg); // this allows us to parse the options for command and then parse again // based on command if (hasOption || !ignoreUnrecognizedOption) { super.processOption(arg, iter); } } } SqoopSparkJob() { parser = new SqoopGnuParser(true); } public void setJob(MJob job) { this.job = job; } public static CommandLine parseArgs(Options options, String[] args) { CommandLine commandLineArgs; try { // parse the command line arguments commandLineArgs = parser.parse(options, args, false); } catch (ParseException pe) { throw new RuntimeException("Parsing failed for command option:", pe); } return commandLineArgs; } @SuppressWarnings("static-access") public static void addCommonOptions(Options options) { options.addOption(OptionBuilder.withLongOpt("numL").withDescription("loader parallelism") .hasArg().withArgName("numLoaders").create()); options.addOption(OptionBuilder.withLongOpt("numE").withDescription("extractor parallelism") .hasArg().withArgName("numExtractors").create()); options.addOption(OptionBuilder.withLongOpt("defaultExtractors").withDescription("default extractor parallelism") .hasArg().withArgName("defaultExtractors").create()); options.addOption(OptionBuilder.withLongOpt("confDir").withDescription("config dir for sqoop") .hasArg().isRequired().withArgName("confDir").create()); } public SparkConf init(CommandLine cArgs) throws ClassNotFoundException { System.setProperty(ConfigurationConstants.SYSPROP_CONFIG_DIR, cArgs.getOptionValue("confDir")); // by default it is local, override based on the submit parameter SparkConf conf = new SparkConf().setAppName("sqoop-spark").setMaster("local"); if (cArgs.getOptionValue("defaultExtractors") != null) { conf.set(SqoopSparkDriver.DEFAULT_EXTRACTORS, cArgs.getOptionValue("defaultExtractors")); } if (cArgs.getOptionValue("numL") != null) { conf.set(SqoopSparkDriver.NUM_LOADERS, cArgs.getOptionValue("numL")); } // hack to load extra classes directly Class.forName("com.mysql.jdbc.Driver"); SqoopServer.initialize(); return conf; } public void execute(SparkConf conf, JavaSparkContext context) throws Exception { if (job == null) { throw new RuntimeException("Job not set for spark execution"); } HttpEventContext ctx = new HttpEventContext(); // TODO: use standard username ctx.setUsername("spark-sqoop"); MSubmission mSubmission = JobManager.getInstance().createJobSubmission(ctx, job.getPersistenceId()); JobRequest jobRequest = JobManager.getInstance().createJobRequest(job.getPersistenceId(), mSubmission); JobManager.getInstance().prepareJob(jobRequest); SqoopSparkDriver.execute(jobRequest, conf, context); SparkDestroyerUtil.executeDestroyer(true, jobRequest, Direction.FROM); SparkDestroyerUtil.executeDestroyer(true, jobRequest, Direction.TO); SqoopServer.destroy(); } }