package org.apache.sqoop.spark;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.sqoop.common.Direction;
import org.apache.sqoop.driver.Driver;
import org.apache.sqoop.model.MConfigList;
import org.apache.sqoop.model.MConnector;
import org.apache.sqoop.model.MDriverConfig;
import org.apache.sqoop.model.MFromConfig;
import org.apache.sqoop.model.MJob;
import org.apache.sqoop.model.MLink;
import org.apache.sqoop.model.MLinkConfig;
import org.apache.sqoop.model.MToConfig;
import org.apache.sqoop.repository.RepositoryManager;
public class SqoopJDBCHDFSJob {
@SuppressWarnings("static-access")
static Options createOptions() {
Options options = new Options();
options.addOption(OptionBuilder.withLongOpt("jdbcString")
.withDescription("jdbc connection string").hasArg().isRequired()
.withArgName("jdbcConnectionString").create());
options.addOption(OptionBuilder.withLongOpt("u").withDescription("jdbc username").hasArg()
.isRequired().withArgName("username").create());
options.addOption(OptionBuilder.withLongOpt("p").withDescription("jdbc password").hasArg()
.withArgName("password").create());
options.addOption(OptionBuilder.withLongOpt("table").withDescription("jdbc table").hasArg()
.isRequired().withArgName("table").create());
options.addOption(OptionBuilder.withLongOpt("partitionCol")
.withDescription("jdbc table parition column").hasArg().withArgName("pc").create());
options.addOption(OptionBuilder.withLongOpt("outputDir").withDescription("hdfs output dir")
.hasArg().isRequired().withArgName("outputDir").create());
SqoopSparkJob.addCommonOptions(options);
return options;
}
public static void main(String[] args) throws Exception {
final SqoopSparkJob sparkJob = new SqoopSparkJob();
CommandLine cArgs = SqoopSparkJob.parseArgs(createOptions(), args);
SparkConf conf = sparkJob.init(cArgs);
JavaSparkContext context = new JavaSparkContext(conf);
MConnector fromConnector = RepositoryManager.getInstance().getRepository()
.findConnector("generic-jdbc-connector");
MConnector toConnector = RepositoryManager.getInstance().getRepository()
.findConnector("hdfs-connector");
MLinkConfig fromLinkConfig = fromConnector.getLinkConfig();
MLinkConfig toLinkConfig = toConnector.getLinkConfig();
MLink fromLink = new MLink(fromConnector.getPersistenceId(), fromLinkConfig);
fromLink.setName("jdbcLink-" + System.currentTimeMillis());
fromLink.getConnectorLinkConfig().getStringInput("linkConfig.jdbcDriver")
.setValue("com.mysql.jdbc.Driver");
fromLink.getConnectorLinkConfig().getStringInput("linkConfig.connectionString")
.setValue(cArgs.getOptionValue("jdbcString"));
fromLink.getConnectorLinkConfig().getStringInput("linkConfig.username")
.setValue(cArgs.getOptionValue("u"));
fromLink.getConnectorLinkConfig().getStringInput("linkConfig.password")
.setValue(cArgs.getOptionValue("p"));
RepositoryManager.getInstance().getRepository().createLink(fromLink);
MLink toLink = new MLink(toConnector.getPersistenceId(), toLinkConfig);
toLink.setName("hdfsLink-" + System.currentTimeMillis());
toLink.getConnectorLinkConfig().getStringInput("linkConfig.confDir")
.setValue(cArgs.getOptionValue("outputDir"));
RepositoryManager.getInstance().getRepository().createLink(toLink);
MFromConfig fromJobConfig = fromConnector.getFromConfig();
MToConfig toJobConfig = toConnector.getToConfig();
MJob sqoopJob = new MJob(fromConnector.getPersistenceId(), toConnector.getPersistenceId(),
fromLink.getPersistenceId(), toLink.getPersistenceId(), fromJobConfig, toJobConfig, Driver
.getInstance().getDriver().getDriverConfig());
MConfigList fromConfig = sqoopJob.getJobConfig(Direction.FROM);
fromConfig.getStringInput("fromJobConfig.tableName").setValue(cArgs.getOptionValue("table"));
fromConfig.getStringInput("fromJobConfig.partitionColumn").setValue(
cArgs.getOptionValue("paritionCol"));
MToConfig toConfig = sqoopJob.getToJobConfig();
toConfig.getStringInput("toJobConfig.outputDirectory").setValue(
cArgs.getOptionValue("outputDir") + System.currentTimeMillis());
MDriverConfig driverConfig = sqoopJob.getDriverConfig();
if (cArgs.getOptionValue("numE") != null) {
driverConfig.getIntegerInput("throttlingConfig.numExtractors").setValue(
Integer.valueOf(cArgs.getOptionValue("numE")));
}
if (cArgs.getOptionValue("numL") != null) {
driverConfig.getIntegerInput("throttlingConfig.numLoaders").setValue(
Integer.valueOf(cArgs.getOptionValue("numL")));
}
RepositoryManager.getInstance().getRepository().createJob(sqoopJob);
sparkJob.setJob(sqoopJob);
sparkJob.execute(conf, context);
}
}