package com.thinkbiganalytics.nifi.v2.sqoop.utils; /*- * #%L * thinkbig-nifi-hadoop-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.nifi.v2.sqoop.process.SqoopProcessResult; import org.apache.nifi.logging.ComponentLog; import java.util.HashMap; import java.util.Map; import javax.annotation.Nonnull; /** * Helpful methods to use with Sqoop processors */ public class SqoopUtils { private final static String NO_NEW_ROWS_IDENTIFIER = "No new rows detected since last import"; /** * Get credentials/parameters for which values must be masked * * @return credentials/parameters map.<br>Key = credential, Value = delimiter between credential and value */ public Map<String, String> getCredentialsToMask() { final String EQUAL_STRING = "="; final String SPACE_STRING = " "; final String PASSPHRASE_IDENTIFIER = "-Dorg.apache.sqoop.credentials.loader.crypto.passphrase"; final String PASSWORD_FILE_IDENTIFIER = "--password-file"; final String PASSWORD_IDENTIFIER = "--password"; Map<String, String> credentialsToMask = new HashMap<>(); credentialsToMask.put(PASSPHRASE_IDENTIFIER, EQUAL_STRING); credentialsToMask.put(PASSWORD_FILE_IDENTIFIER, SPACE_STRING); credentialsToMask.put(PASSWORD_IDENTIFIER, SPACE_STRING); return credentialsToMask; } /** * Mask credentials in sqoop command * * @param sqoopCommand sqoop command executed * @param credentialIdentifiers map of credentials for which values must be masked. <br>Key = credential, Value = delimiter between credential and value * @return sqoop command with credentials masked */ public String maskCredentials(String sqoopCommand, Map<String, String> credentialIdentifiers) { final String MASK = "*****"; for (Map.Entry<String, String> credentialIdentifier : credentialIdentifiers.entrySet()) { if (!sqoopCommand.contains(credentialIdentifier.getKey())) { continue; } int startPosCredentialIdentifier = sqoopCommand.indexOf(credentialIdentifier.getKey()); int startPosValue = sqoopCommand.indexOf(credentialIdentifier.getValue(), startPosCredentialIdentifier); int endPosValue = sqoopCommand.indexOf(" ", startPosValue + 1); sqoopCommand = sqoopCommand.substring(0, startPosValue + 1) + MASK + sqoopCommand.substring(endPosValue, sqoopCommand.length()); } return sqoopCommand; } /** * Get count of records extracted * * @param sqoopProcessResult {@link SqoopProcessResult} * @param logger Logger * @return extraction record count */ public long getSqoopRecordCount(SqoopProcessResult sqoopProcessResult, ComponentLog logger) { String[] logLines = sqoopProcessResult.getLogLines(); if ((sqoopProcessResult.getExitValue() != 0) || (logLines[0] == null)) { logger.warn("Skipping attempt to retrieve number of records extracted"); return -1; } //Example of logLines[0]: //16/10/12 21:50:03 INFO mapreduce.ImportJobBase: Retrieved 2 records. //16/10/21 02:05:41 INFO tool.ImportTool: No new rows detected since last import. String recordCountLogLine = logLines[0]; if (recordCountLogLine.contains(NO_NEW_ROWS_IDENTIFIER)) { return 0; } final String START_RECORD_COUNT_IDENTIFIER = "Retrieved"; final String END_RECORD_COUNT_IDENTIFIER = "records."; int start = recordCountLogLine.indexOf(START_RECORD_COUNT_IDENTIFIER); int end = recordCountLogLine.indexOf(END_RECORD_COUNT_IDENTIFIER); String numberString = recordCountLogLine.substring(start + START_RECORD_COUNT_IDENTIFIER.length(), end).trim(); try { return Long.parseLong(numberString); } catch (Exception e) { logger.warn("Unable to parse number of records extracted. " + e.getMessage()); return -1; } } /** * Get count of records exported (for sqoop export job) * * @param sqoopExportProcessResult {@link SqoopProcessResult} * @param logger Logger * @return export record count */ public long getSqoopExportRecordCount(SqoopProcessResult sqoopExportProcessResult, ComponentLog logger) { String[] logLines = sqoopExportProcessResult.getLogLines(); if ((sqoopExportProcessResult.getExitValue() != 0) || (logLines[0] == null)) { logger.warn("Skipping attempt to retrieve number of records exported"); return -1; } //Example of logLines[0]: //16/11/17 00:25:14 INFO mapreduce.ExportJobBase: Exported 4 records. //In case of no records to export, the above will report: Exported 0 records. String recordExportCountLogLine = logLines[0]; final String START_EXPORT_RECORD_COUNT_IDENTIFIER = "Exported"; final String END_EXPORT_RECORD_COUNT_IDENTIFIER = "records."; int start = recordExportCountLogLine.indexOf(START_EXPORT_RECORD_COUNT_IDENTIFIER); int end = recordExportCountLogLine.indexOf(END_EXPORT_RECORD_COUNT_IDENTIFIER); String numberString = recordExportCountLogLine.substring(start + START_EXPORT_RECORD_COUNT_IDENTIFIER.length(), end).trim(); try { return Long.parseLong(numberString); } catch (Exception e) { logger.warn("Unable to parse number of records exported. " + e.getMessage()); return -1; } } /** * Get next high watermark value for incremental load * * @param sqoopProcessResult {@link SqoopProcessResult} * @return new high watermark value */ public String getNewHighWatermark(SqoopProcessResult sqoopProcessResult) { String[] logLines = sqoopProcessResult.getLogLines(); final String NO_UPDATE = "NO_UPDATE"; final String LAST_VALUE_IDENTIFIER = "--last-value"; if ((sqoopProcessResult.getExitValue() != 0) || (logLines.length <= 1)) { return NO_UPDATE; } else if ((logLines[0] != null) && (logLines[0].contains(NO_NEW_ROWS_IDENTIFIER))) { return NO_UPDATE; } else { if (logLines[1] == null) { return NO_UPDATE; } //16/10/18 23:37:11 INFO tool.ImportTool: --last-value 1006 String newHighWaterMarkLogLine = logLines[1]; int end = newHighWaterMarkLogLine.length(); int start = newHighWaterMarkLogLine.indexOf(LAST_VALUE_IDENTIFIER); return newHighWaterMarkLogLine.substring(start + LAST_VALUE_IDENTIFIER.length(), end).trim(); } } /** * Check if source relational system is Teradata * * @param sourceConnectionString Connection string for source relational system * @return true/false indicating if source system is Teradata */ public Boolean isTeradataDatabase(@Nonnull String sourceConnectionString) { final String TERADATA_IDENTIFIER = "jdbc:teradata"; return sourceConnectionString.toLowerCase().contains(TERADATA_IDENTIFIER); } /** * Check target column type mappings input format. Expected format is key=value pairs separated by comma. No spaces. * * @param valueToCheck A value to check * @return true/false indicating if value is correctly formatted */ public Boolean checkMappingInput(@Nonnull String valueToCheck) { final String SPACE_STRING = " "; final char COMMA_CHAR = ','; final char EQUAL_CHAR = '='; if (valueToCheck.contains(SPACE_STRING)) { return false; } int commaCount = 0; int equalCount = 0; for (char c : valueToCheck.toCharArray()) { if (c == COMMA_CHAR) { commaCount++; } else if (c == EQUAL_CHAR) { equalCount++; } } return (equalCount - commaCount - 1) == 0; } }