package com.thinkbiganalytics.nifi.v2.sqoop.utils; /*- * #%L * thinkbig-nifi-hadoop-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.nifi.v2.sqoop.PasswordMode; import com.thinkbiganalytics.nifi.v2.sqoop.enums.CompressionAlgorithm; import com.thinkbiganalytics.nifi.v2.sqoop.enums.ExtractDataFormat; import com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy; import com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveNullEncodingStrategy; import com.thinkbiganalytics.nifi.v2.sqoop.enums.SqoopLoadStrategy; import com.thinkbiganalytics.nifi.v2.sqoop.enums.TargetHdfsDirExistsStrategy; import com.thinkbiganalytics.nifi.v2.sqoop.security.DecryptPassword; import org.apache.nifi.logging.ComponentLog; import java.util.Arrays; import java.util.List; import javax.annotation.Nonnull; /** * A class to build a sqoop command that can be run on the command line */ public class SqoopBuilder { private final static String SPACE_STRING = " "; private final static String START_SPACE_QUOTE = " \""; private final static String END_QUOTE_SPACE = "\" "; private final static String QUOTE = "\""; private final static String EQUAL_STRING = "="; private final static String STAR_STRING = "*"; private final static String MASK_STRING = "*****"; private final static String UNABLE_TO_DECRYPT_STRING = "UNABLE_TO_DECRYPT_ENCRYPTED_PASSWORD"; private final static String sourcePasswordLoaderClassLabel = "-Dorg.apache.sqoop.credentials.loader.class"; private final static String sourcePasswordLoaderClassValue = "org.apache.sqoop.util.password.CryptoFileLoader"; private final static String sourcePasswordPassphraseLabel = "-Dorg.apache.sqoop.credentials.loader.crypto.passphrase"; private final static String sourceConnectionStringLabel = "--connect"; private final static String sourceUserNameLabel = "--username"; private final static String sourcePasswordHdfsFileLabel = "--password-file"; private final static String sourcePasswordClearTextLabel = "--password"; private final static String sourceConnectionManagerLabel = "--connection-manager"; private final static String sourceDriverLabel = "--driver"; private final static String sourceTableNameLabel = "--table"; private final static String sourceTableFieldsLabel = "--columns"; private final static String sourceTableWhereClauseLabel = "--where"; private final static String sourceTableSplitFieldLabel = "--split-by"; private final static String targetHdfsDirectoryLabel = "--target-dir"; private final static String targetHdfsDirDeleteLabel = "--delete-target-dir"; private final static String extractDataFormatTextLabel = "--as-textfile"; private final static String extractDataFormatAvroLabel = "--as-avrodatafile"; private final static String extractDataFormatSequenceFileLabel = "--as-sequencefile"; private final static String extractDataFormatParquetLabel = "--as-parquetfile"; private final static String sourceAutoSetToOneMapperLabel = "--autoreset-to-one-mapper"; private final static String clusterMapTasksLabel = "--num-mappers"; private final static String incrementalStrategyLabel = "--incremental"; private final static String incrementalAppendStrategyLabel = "append"; private final static String incrementalLastModifiedStrategyLabel = "lastmodified"; private final static String sourceCheckColumnNameLabel = "--check-column"; private final static String sourceCheckColumnLastValueLabel = "--last-value"; private final static String sourceBoundaryQueryLabel = "--boundary-query"; private final static String clusterUIJobNameLabel = "--mapreduce-job-name"; private final static String targetHiveDropDelimLabel = "--hive-drop-import-delims"; private final static String targetHiveReplaceDelimLabel = "--hive-delims-replacement"; private final static String targetHiveNullEncodingStrategyNullStringLabel = "--null-string '\\\\N'"; private final static String targetHiveNullEncodingStrategyNullNonStringLabel = "--null-non-string '\\\\N'"; private final static String targetHdfsFileFieldDelimiterLabel = "--fields-terminated-by"; private final static String targetHdfsFileRecordDelimiterLabel = "--lines-terminated-by"; private final static String targetCompressLabel = "--compress"; private final static String targetCompressionCodecLabel = "--compression-codec"; private final static String targetColumnTypeMappingLabel = "--map-column-java"; private final static String sqoopCodeGenDirectoryLabel = "--outdir"; private final static String sourceSpecificOptionsLabel = "--"; private final static String sourceSpecificSqlServerSchemaLabel = "--schema"; private final static String operationName = "sqoop"; private final static String operationType = "import"; private final Integer DEFAULT_CLUSTER_MAP_TASKS = 4; private final HiveNullEncodingStrategy targetHiveNullEncodingStrategy = HiveNullEncodingStrategy.ENCODE_STRING_AND_NONSTRING; private String sourcePasswordPassphrase; private String sourceConnectionString; private String sourceUserName; private PasswordMode passwordMode; private String sourcePasswordHdfsFile; private String sourceEnteredPassword; private String sourceConnectionManager; private String sourceDriver; private String sourceTableName; private String sourceTableFields; private String sourceTableWhereClause; private String sourceTableSplitField; private String targetHdfsDirectory; private TargetHdfsDirExistsStrategy targetHdfsDirExistsStrategy; private ExtractDataFormat extractDataFormat; private Integer clusterMapTasks = DEFAULT_CLUSTER_MAP_TASKS; private SqoopLoadStrategy sourceLoadStrategy; private String sourceCheckColumnName; private String sourceCheckColumnLastValue; private String sourceBoundaryQuery; private String clusterUIJobName; private HiveDelimStrategy targetHiveDelimStrategy; private String targetHiveReplaceDelim; private String targetHdfsFileFieldDelimiter; private String targetHdfsFileRecordDelimiter; private String targetCompressionCodec; private Boolean targetCompressFlag = false; private String targetColumnTypeMapping; private String sqoopCodeGenDirectory; private Boolean sourceSpecificOptions = false; private String sourceSpecificSqlServerSchema; private ComponentLog logger = null; /** * Set logger * * @param logger Logger * @return {@link SqoopBuilder} */ public SqoopBuilder setLogger(ComponentLog logger) { this.logger = logger; logger.info("Logger set to {}", new Object[]{this.logger}); return this; } /** * Set connection string for source system * * @param sourceConnectionString source connection string * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceConnectionString(String sourceConnectionString) { this.sourceConnectionString = sourceConnectionString; logMessage("info", "Source Connection String", this.sourceConnectionString); return this; } /** * Set user name for connecting to source system * * @param sourceUserName user name * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceUserName(String sourceUserName) { this.sourceUserName = sourceUserName; logMessage("info", "Source User Name", this.sourceUserName); return this; } /** * Set password mode for providing password to connect to source system * * @param passwordMode {@link PasswordMode} * @return {@link SqoopBuilder} */ public SqoopBuilder setPasswordMode(PasswordMode passwordMode) { this.passwordMode = passwordMode; logMessage("info", "Source Password Mode", this.passwordMode); return this; } /** * Set location of password file on HDFS * * @param sourcePasswordHdfsFile location on HDFS * @return {@link SqoopBuilder} */ public SqoopBuilder setSourcePasswordHdfsFile(String sourcePasswordHdfsFile) { this.sourcePasswordHdfsFile = sourcePasswordHdfsFile; logMessage("info", "Source Password File (HDFS)", MASK_STRING); return this; } /** * Set passphrase used to generate encrypted password * * @param sourcePasswordPassphrase passphrase * @return {@link SqoopBuilder} */ public SqoopBuilder setSourcePasswordPassphrase(String sourcePasswordPassphrase) { this.sourcePasswordPassphrase = sourcePasswordPassphrase; logMessage("info", "Source Password Passphrase", MASK_STRING); return this; } /** * Set password entered (clear text / encrypted) * * @param sourceEnteredPassword password string * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceEnteredPassword(String sourceEnteredPassword) { this.sourceEnteredPassword = sourceEnteredPassword; logMessage("info", "Source Entered Password", MASK_STRING); return this; } /** * Set Connection Manager class for source system * * @param sourceConnectionManager connection manager class * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceConnectionManager(String sourceConnectionManager) { this.sourceConnectionManager = sourceConnectionManager; logMessage("info", "Source Connection Manager", this.sourceConnectionManager); return this; } /** * Set JDBC driver for source system * * @param sourceDriver source driver * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceDriver(String sourceDriver) { this.sourceDriver = sourceDriver; logMessage("info", "Source Driver", this.sourceDriver); return this; } /** * Set source table name to extract from * * @param sourceTableName table name * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceTableName(String sourceTableName) { this.sourceTableName = sourceTableName; logMessage("info", "Source Table Name", this.sourceTableName); return this; } /** * Set fields to get from source table (comma-separated). Use * to indicate all fields. * * @param sourceTableFields source fields separated by comma / * * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceTableFields(String sourceTableFields) { if (sourceTableFields.trim().equals(STAR_STRING)) { // all fields this.sourceTableFields = sourceTableFields.trim(); } else { // selected fields List<String> fieldList = Arrays.asList(sourceTableFields.split(",")); this.sourceTableFields = getQueryFieldsForStatement(fieldList); } logMessage("info", "Source Table Fields", this.sourceTableFields); return this; } /** * Set WHERE clause to filter extract from source table * * @param sourceTableWhereClause WHERE clause * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceTableWhereClause(String sourceTableWhereClause) { this.sourceTableWhereClause = sourceTableWhereClause; logMessage("info", "Source Table Where Clause", sourceTableWhereClause); return this; } /** * Set load strategy for source system (full/incremental) * * @param sourceLoadStrategy {@link SqoopLoadStrategy} * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceLoadStrategy(SqoopLoadStrategy sourceLoadStrategy) { this.sourceLoadStrategy = sourceLoadStrategy; logMessage("info", "Source Load Strategy", this.sourceLoadStrategy); return this; } /** * Set column to check for last modified time / id for incremental load.<br> * Not needed for full load. * * @param sourceCheckColumnName column name to check * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceCheckColumnName(String sourceCheckColumnName) { this.sourceCheckColumnName = sourceCheckColumnName; logMessage("info", "Source Check Column Name", this.sourceCheckColumnName); return this; } /** * Last value extracted for incremental load mode. <br> * Not needed for full load. * * @param sourceCheckColumnLastValue last value extracted * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceCheckColumnLastValue(String sourceCheckColumnLastValue) { this.sourceCheckColumnLastValue = sourceCheckColumnLastValue; logMessage("info", "Source Check Column Last Value", this.sourceCheckColumnLastValue); return this; } /** * Set a field can be used for splitting units of work in parallel. By default, single-field primary key is used if available. * * @param sourceSplitByField field name * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceSplitByField(String sourceSplitByField) { this.sourceTableSplitField = sourceSplitByField; logMessage("info", "Source Split By Field", this.sourceTableSplitField); return this; } /** * Set boundary query to get max and min values for split-by-field column * * @param sourceBoundaryQuery boundary query * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceBoundaryQuery(String sourceBoundaryQuery) { this.sourceBoundaryQuery = sourceBoundaryQuery; logMessage("info", "Source Boundary Query", this.sourceBoundaryQuery); return this; } /** * Set number of mappers to use for extract * * @param clusterMapTasks number of mappers * @return {@link SqoopBuilder} */ public SqoopBuilder setClusterMapTasks(Integer clusterMapTasks) { if (clusterMapTasks > 0) { this.clusterMapTasks = clusterMapTasks; logMessage("info", "Number of Cluster Map Tasks", this.clusterMapTasks); } return this; } /** * Set job name to show in cluster UI * * @param clusterUIJobName job name * @return {@link SqoopBuilder} */ public SqoopBuilder setClusterUIJobName(String clusterUIJobName) { this.clusterUIJobName = clusterUIJobName; logMessage("info", "Cluster UI Job Name", this.clusterUIJobName); return this; } /** * Set target directory to land the extracted data in * * @param targetHdfsDirectory HDFS directory * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHdfsDirectory(String targetHdfsDirectory) { this.targetHdfsDirectory = targetHdfsDirectory; logMessage("info", "Target HDFS Directory", this.targetHdfsDirectory); return this; } /** * Set strategy for handling the case where target HDFS directory exists * * @param targetHdfsDirExistsStrategy strategy {@link TargetHdfsDirExistsStrategy} * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHdfsDirExistsStrategy(TargetHdfsDirExistsStrategy targetHdfsDirExistsStrategy) { this.targetHdfsDirExistsStrategy = targetHdfsDirExistsStrategy; logMessage("info", "Target HDFS Directory - If Exists?", this.targetHdfsDirExistsStrategy); return this; } /** * Set format to land the extracted data in on HDFS * * @param extractDataFormat {@link ExtractDataFormat} * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetExtractDataFormat(ExtractDataFormat extractDataFormat) { this.extractDataFormat = extractDataFormat; logMessage("info", "Extract Data Format", this.extractDataFormat); return this; } /** * Set field delimiter when landing data in HDFS * * @param targetHdfsFileFieldDelimiter field delimiter * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHdfsFileFieldDelimiter(String targetHdfsFileFieldDelimiter) { this.targetHdfsFileFieldDelimiter = targetHdfsFileFieldDelimiter; logMessage("info", "Target HDFS File Field Delimiter", this.targetHdfsFileFieldDelimiter); return this; } /** * Set record delimiter when landing data in HDFS * * @param targetHdfsFileRecordDelimiter record delimiter * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHdfsFileRecordDelimiter(String targetHdfsFileRecordDelimiter) { this.targetHdfsFileRecordDelimiter = targetHdfsFileRecordDelimiter; logMessage("info", "Target HDFS File Record Delimiter", this.targetHdfsFileRecordDelimiter); return this; } /** * Set strategy to handle Hive-specific delimiters (\n, \r, \01) * * @param targetHiveDelimStrategy delimiter strategy {@link HiveDelimStrategy} * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHiveDelimStrategy(HiveDelimStrategy targetHiveDelimStrategy) { this.targetHiveDelimStrategy = targetHiveDelimStrategy; logMessage("info", "Target Hive Delimiter Strategy", this.targetHiveDelimStrategy); return this; } /** * Set replacement delimiter for Hive * * @param targetHiveReplaceDelim replacement delimiter * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetHiveReplaceDelim(String targetHiveReplaceDelim) { this.targetHiveReplaceDelim = targetHiveReplaceDelim; logMessage("info", "Target Hive Replace Delim", this.targetHiveReplaceDelim); return this; } /** * Set compression algorithm for data landing in HDFS * * @param targetCompressionAlgorithm compression algorithm * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetCompressionAlgorithm(CompressionAlgorithm targetCompressionAlgorithm) { this.targetCompressionCodec = getCompressionCodecClass(targetCompressionAlgorithm); logMessage("info", "Target Compression Algorithm", targetCompressionAlgorithm); return this; } /** * Set mapping to use for source columns (SQL type) to target (Java type). * * @param targetColumnTypeMapping mapping as COLUMN=Type pairs separated by comma. Example: PO_ID=Integer,PO_DETAILS=String * @return {@link SqoopBuilder} */ public SqoopBuilder setTargetColumnTypeMapping(String targetColumnTypeMapping) { this.targetColumnTypeMapping = targetColumnTypeMapping; logMessage("info", "Target Column Type Mapping", this.targetColumnTypeMapping); return this; } /** * Set output directory where Sqoop should create the generated code artifacts * * @param sqoopCodeGenDirectory full directory path * @return {@link SqoopBuilder} */ public SqoopBuilder setSqoopCodeGenDirectory(String sqoopCodeGenDirectory) { this.sqoopCodeGenDirectory = sqoopCodeGenDirectory; logMessage("info", "Sqoop Code Generation Directory", this.sqoopCodeGenDirectory); return this; } /** * Set source specific (SQL Server) option - schema * * @param sourceSpecificSqlServerSchema schema in SQL Server (default is generally 'dbo') * @return {@link SqoopBuilder} */ public SqoopBuilder setSourceSpecificSqlServerSchema(String sourceSpecificSqlServerSchema) { if ((sourceSpecificSqlServerSchema != null) && (!sourceSpecificSqlServerSchema.isEmpty())) { this.sourceSpecificSqlServerSchema = sourceSpecificSqlServerSchema; sourceSpecificOptions = true; logMessage("info", "Source Specific - SQL Server Schema", this.sourceSpecificSqlServerSchema); } else { sourceSpecificOptions = false; } return this; } /* * Get the compression codec class */ private String getCompressionCodecClass(CompressionAlgorithm compressionAlgorithm) { String compressionCodecClass = null; if (compressionAlgorithm != null) { switch (compressionAlgorithm) { case NONE: compressionCodecClass = null; break; case GZIP: compressionCodecClass = "org.apache.hadoop.io.compress.GzipCodec"; break; case SNAPPY: compressionCodecClass = "org.apache.hadoop.io.compress.SnappyCodec"; break; case BZIP2: compressionCodecClass = "org.apache.hadoop.io.compress.BZip2Codec"; break; case LZO: compressionCodecClass = "com.hadoop.compression.lzo.LzoCodec"; break; } } if (compressionCodecClass != null) { targetCompressFlag = true; } logMessage("info", "Compression Codec", this.targetCompressionCodec); logMessage("info", "Compress output", this.targetCompressFlag); return compressionCodecClass; } /* * Log message as per level */ private void logMessage(@Nonnull String level, @Nonnull String property, @Nonnull Object value) { switch (level) { case "debug": if (logger != null) { logger.debug("{} set to: {}", new Object[]{property, value}); } break; case "info": if (logger != null) { logger.info("{} set to: {}", new Object[]{property, value}); } break; case "trace": if (logger != null) { logger.trace("{} set to: {}", new Object[]{property, value}); } break; case "warn": if (logger != null) { logger.warn("{} set to: {}", new Object[]{property, value}); } break; case "error": if (logger != null) { logger.error("{} set to: {}", new Object[]{property, value}); } break; default: if (logger != null) { logger.info("{} set to: {}", new Object[]{property, value}); } } } /** * Build a sqoop command * * @return sqoop command */ public String build() { return buildSqoopCommand(); } /* * Build the sqoop command */ private String buildSqoopCommand() { StringBuffer commandStringBuffer = new StringBuffer(); /* Identify operation */ commandStringBuffer.append(operationName) //sqoop .append(SPACE_STRING) .append(operationType) //import .append(SPACE_STRING); /* Handle encrypted password file */ if (passwordMode == PasswordMode.ENCRYPTED_ON_HDFS_FILE) { commandStringBuffer.append(sourcePasswordLoaderClassLabel) //-Dorg.apache.sqoop.credentials.loader.class .append(EQUAL_STRING) .append(QUOTE) .append(sourcePasswordLoaderClassValue) //org.apache.sqoop.util.password.CryptoFileLoader .append(END_QUOTE_SPACE) .append(sourcePasswordPassphraseLabel) //-Dorg.apache.sqoop.credentials.loader.crypto.passphrase .append(EQUAL_STRING) .append(QUOTE) .append(sourcePasswordPassphrase) //"user provided" .append(END_QUOTE_SPACE); } /* Handle authentication */ commandStringBuffer .append(sourceConnectionStringLabel) //--connect .append(START_SPACE_QUOTE) .append(sourceConnectionString) //"user provided" .append(END_QUOTE_SPACE) .append(sourceUserNameLabel) //--username .append(START_SPACE_QUOTE) .append(sourceUserName) //"user provided" .append(END_QUOTE_SPACE); /* Handle password modes */ if (passwordMode == PasswordMode.ENCRYPTED_ON_HDFS_FILE) { commandStringBuffer.append(sourcePasswordHdfsFileLabel) //--password-file .append(START_SPACE_QUOTE) .append(sourcePasswordHdfsFile) //"user provided" .append(END_QUOTE_SPACE); } else if (passwordMode == PasswordMode.CLEAR_TEXT_ENTRY || passwordMode == PasswordMode.ENCRYPTED_TEXT_ENTRY) { if (passwordMode == PasswordMode.ENCRYPTED_TEXT_ENTRY) { try { sourceEnteredPassword = DecryptPassword.decryptPassword(sourceEnteredPassword, sourcePasswordPassphrase); logger.info("Entered encrypted password was decrypted successfully."); } catch (Exception e) { sourceEnteredPassword = UNABLE_TO_DECRYPT_STRING; logger.error("Unable to decrypt entered password (encrypted, Base 64). [{}]", new Object[]{e.getMessage()}); } } commandStringBuffer.append(sourcePasswordClearTextLabel) //--password .append(START_SPACE_QUOTE) .append(sourceEnteredPassword) //"user provided" .append(END_QUOTE_SPACE); } if ((sourceConnectionManager != null) && (!sourceConnectionManager.isEmpty())) { commandStringBuffer.append(sourceConnectionManagerLabel) //--connection-manager .append(START_SPACE_QUOTE) .append(sourceConnectionManager) //"user provided" .append(END_QUOTE_SPACE); } if ((sourceDriver != null) && (!sourceDriver.isEmpty())) { commandStringBuffer.append(sourceDriverLabel) //--driver .append(START_SPACE_QUOTE) .append(sourceDriver) //"user provided" .append(END_QUOTE_SPACE); } /* Handle table details */ commandStringBuffer.append(sourceTableNameLabel) //--table .append(START_SPACE_QUOTE) .append(sourceTableName) //"user provided" .append(END_QUOTE_SPACE); if (!sourceTableFields.trim().equals(STAR_STRING)) { commandStringBuffer .append(sourceTableFieldsLabel) //--columns .append(START_SPACE_QUOTE) .append(sourceTableFields) //"generated from user provided value" .append(END_QUOTE_SPACE); } if (sourceTableWhereClause != null) { commandStringBuffer .append(sourceTableWhereClauseLabel) //--where .append(START_SPACE_QUOTE) .append(sourceTableWhereClause) //"user provided" .append(END_QUOTE_SPACE); } /* Handle splits */ if (sourceTableSplitField != null) { commandStringBuffer .append(sourceTableSplitFieldLabel) //--split-by .append(START_SPACE_QUOTE) .append(sourceTableSplitField) //"user provided" .append(END_QUOTE_SPACE); } else { commandStringBuffer .append(sourceAutoSetToOneMapperLabel) //--autoreset-to-one-mapper .append(SPACE_STRING); } /* Handle HDFS landing data parameters */ commandStringBuffer .append(targetHdfsDirectoryLabel) //--target-dir .append(START_SPACE_QUOTE) .append(targetHdfsDirectory) //"user provided" .append(END_QUOTE_SPACE); if (targetHdfsDirExistsStrategy == TargetHdfsDirExistsStrategy.DELETE_DIR_AND_IMPORT) { commandStringBuffer .append(targetHdfsDirDeleteLabel) //--delete-target-dir .append(SPACE_STRING); } switch (extractDataFormat) { case TEXT: commandStringBuffer.append(extractDataFormatTextLabel); //--as-textfile break; case AVRO: commandStringBuffer.append(extractDataFormatAvroLabel); //--as-avrodatafile break; case SEQUENCE_FILE: commandStringBuffer.append(extractDataFormatSequenceFileLabel); //--as-sequencefile break; case PARQUET: commandStringBuffer.append(extractDataFormatParquetLabel); //--as-parquetfile break; default: commandStringBuffer.append(extractDataFormatTextLabel); //--as-textfile break; } commandStringBuffer.append(SPACE_STRING); commandStringBuffer .append(targetHdfsFileFieldDelimiterLabel) //--fields-terminated-by .append(START_SPACE_QUOTE) .append(targetHdfsFileFieldDelimiter) //"user provided" .append(END_QUOTE_SPACE); commandStringBuffer .append(targetHdfsFileRecordDelimiterLabel) //--lines-terminated-by .append(START_SPACE_QUOTE) .append(targetHdfsFileRecordDelimiter) //"user provided" .append(END_QUOTE_SPACE); /* Handle incremental load parameters */ if (sourceLoadStrategy != SqoopLoadStrategy.FULL_LOAD) { commandStringBuffer .append(incrementalStrategyLabel) //--incremental .append(SPACE_STRING); if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND) { commandStringBuffer.append(incrementalAppendStrategyLabel); //append } else if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED) { commandStringBuffer.append(incrementalLastModifiedStrategyLabel); //lastmodified } commandStringBuffer .append(SPACE_STRING) .append(sourceCheckColumnNameLabel) //--check-column .append(START_SPACE_QUOTE) .append(sourceCheckColumnName) //"user provided" .append(END_QUOTE_SPACE) .append(sourceCheckColumnLastValueLabel) //--last-value .append(START_SPACE_QUOTE) .append(sourceCheckColumnLastValue) //"user provided/watermark service" .append(END_QUOTE_SPACE); } /* Handle Hive related parameters */ if (targetHiveDelimStrategy == HiveDelimStrategy.DROP) { commandStringBuffer .append(targetHiveDropDelimLabel) //--hive-drop-import-delims .append(SPACE_STRING); } /* else if (targetHiveDelimStrategy == HiveDelimStrategy.KEEP) { // Do nothing. Keeping for readability. }*/ else if (targetHiveDelimStrategy == HiveDelimStrategy.REPLACE) { commandStringBuffer .append(targetHiveReplaceDelimLabel) //--hive-delims-replacement .append(START_SPACE_QUOTE) .append(targetHiveReplaceDelim) //"user provided" .append(END_QUOTE_SPACE); } if (targetHiveNullEncodingStrategy == HiveNullEncodingStrategy.ENCODE_STRING_AND_NONSTRING) { commandStringBuffer .append(targetHiveNullEncodingStrategyNullStringLabel) //--null-string '\\\\N' .append(SPACE_STRING) .append(targetHiveNullEncodingStrategyNullNonStringLabel) //--null-non-string '\\\\N' .append(SPACE_STRING); } /* else if (targetHiveNullEncodingStrategy == HiveNullEncodingStrategy.DO_NOT_ENCODE) { // Do nothing. Keeping for readability. }*/ else if (targetHiveNullEncodingStrategy == HiveNullEncodingStrategy.ENCODE_ONLY_STRING) { commandStringBuffer .append(targetHiveNullEncodingStrategyNullStringLabel) //--null-string '\\\\N' .append(SPACE_STRING); } else if (targetHiveNullEncodingStrategy == HiveNullEncodingStrategy.ENCODE_ONLY_NONSTRING) { commandStringBuffer .append(targetHiveNullEncodingStrategyNullNonStringLabel) //--null-non-string '\\\\N' .append(SPACE_STRING); } /* Handle other job parameters */ if (sourceBoundaryQuery != null) { commandStringBuffer .append(sourceBoundaryQueryLabel) //--boundary-query .append(START_SPACE_QUOTE) .append(sourceBoundaryQuery) //"user provided" .append(END_QUOTE_SPACE); } commandStringBuffer.append(clusterMapTasksLabel) //--num-mappers .append(START_SPACE_QUOTE) .append(clusterMapTasks) //"user-provided-value" .append(END_QUOTE_SPACE); if (targetCompressFlag) { commandStringBuffer .append(targetCompressLabel) //--compress .append(SPACE_STRING) .append(targetCompressionCodecLabel) //--compression-codec .append(START_SPACE_QUOTE) .append(targetCompressionCodec) //"user provided" .append(END_QUOTE_SPACE); } if ((targetColumnTypeMapping != null) && (!targetColumnTypeMapping.isEmpty())) { commandStringBuffer .append(targetColumnTypeMappingLabel) //--map-column-java .append(START_SPACE_QUOTE) .append(targetColumnTypeMapping) //"user provided" .append(END_QUOTE_SPACE); } if ((sqoopCodeGenDirectory != null) && (!sqoopCodeGenDirectory.isEmpty())) { commandStringBuffer .append(sqoopCodeGenDirectoryLabel) //--outdir .append(START_SPACE_QUOTE) .append(sqoopCodeGenDirectory) //"user provided" .append(END_QUOTE_SPACE); } if (clusterUIJobName != null) { commandStringBuffer .append(clusterUIJobNameLabel) //--mapreduce-job-name .append(START_SPACE_QUOTE) .append(clusterUIJobName) //"user-provided-value" .append(QUOTE); } /* Handle source specific options */ if (sourceSpecificOptions) { commandStringBuffer .append(SPACE_STRING) .append(sourceSpecificOptionsLabel) //-- .append(SPACE_STRING); if (sourceSpecificSqlServerSchema != null) { commandStringBuffer .append(sourceSpecificSqlServerSchemaLabel) //--schema .append(START_SPACE_QUOTE) .append(sourceSpecificSqlServerSchema) //"user-provided-value" .append(END_QUOTE_SPACE); } } return commandStringBuffer.toString(); } /* * Get the list of fields as a string separated by commas */ private String getQueryFieldsForStatement(List<String> fields) { int totalFields = fields.size(); StringBuilder queryFieldsBuilder = new StringBuilder(); for (int i = 0; i < totalFields; i++) { queryFieldsBuilder.append(fields.get(i).trim()); if (i != (totalFields - 1)) { queryFieldsBuilder.append(","); } } return queryFieldsBuilder.toString(); } }