/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract.jdbc;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.extractor.extract.Command;
import gobblin.source.extractor.extract.CommandOutput;
import gobblin.source.extractor.schema.Schema;
import gobblin.source.extractor.utils.Utils;
import gobblin.source.extractor.watermark.Predicate;
import gobblin.source.extractor.watermark.WatermarkType;
import gobblin.source.workunit.WorkUnit;
/**
* Teradata extractor using JDBC protocol
*
* @author ypopov
*/
@Slf4j
public class TeradataExtractor extends JdbcExtractor {
private static final String TERADATA_TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss";
private static final String TERADATA_DATE_FORMAT = "yyyy-MM-dd";
private static final String TERADATA_HOUR_FORMAT = "HH";
private static final long SAMPLE_RECORD_COUNT = -1;
private static final String ELEMENT_DATA_TYPE = "string";
private static final String TERADATA_SAMPLE_CLAUSE = " sample ";
private static final Gson gson = new Gson();
public TeradataExtractor(WorkUnitState workUnitState) {
super(workUnitState);
}
@Override
public List<Command> getSchemaMetadata(String schema, String entity) throws SchemaException {
log.debug("Build query to get schema");
List<Command> commands = new ArrayList<Command>();
String inputQuery = this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
String metadataSql, predicate = "1=0";
if(isNullOrEmpty(inputQuery)) {
metadataSql = "select * from " + schema + "." + entity;
} else {
metadataSql = this.removeSampleClauseFromQuery(inputQuery);
}
metadataSql = SqlQueryUtils.addPredicate(metadataSql, predicate);
commands.add(JdbcExtractor.getCommand(metadataSql, JdbcCommand.JdbcCommandType.QUERY));
return commands;
}
@Override
public List<Command> getHighWatermarkMetadata(String schema, String entity, String watermarkColumn,
List<Predicate> predicateList) throws HighWatermarkException {
log.debug("Build query to get high watermark");
List<Command> commands = new ArrayList<Command>();
String columnProjection = "max(" + Utils.getCoalesceColumnNames(watermarkColumn) + ")";
String watermarkFilter = this.concatPredicates(predicateList);
String query = this.getExtractSql();
if (isNullOrEmpty(watermarkFilter)) {
watermarkFilter = "1=1";
}
query = query.replace(this.getOutputColumnProjection(), columnProjection)
.replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
return commands;
}
@Override
public List<Command> getCountMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws RecordCountException {
log.debug("Build query to get source record count");
List<Command> commands = new ArrayList<Command>();
String columnProjection = "CAST(COUNT(1) AS BIGINT)";
String watermarkFilter = this.concatPredicates(predicateList);
String query = this.getExtractSql();
if (isNullOrEmpty(watermarkFilter)) {
watermarkFilter = "1=1";
}
query = query.replace(this.getOutputColumnProjection(), columnProjection)
.replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
String sampleFilter = this.constructSampleClause();
query = query + sampleFilter;
if (!isNullOrEmpty(sampleFilter)) {
query = "SELECT " + columnProjection + " FROM (" + query.replace(columnProjection, "1 as t") + ") temp";
}
commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
return commands;
}
@Override
public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws DataRecordException {
log.debug("Build query to extract data");
List<Command> commands = new ArrayList<Command>();
int fetchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE,
ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE);
String watermarkFilter = this.concatPredicates(predicateList);
String query = this.getExtractSql();
if (isNullOrEmpty(watermarkFilter)) {
watermarkFilter = "1=1";
}
query = query.replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
String sampleFilter = this.constructSampleClause();
query = query + sampleFilter;
commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
commands.add(JdbcExtractor.getCommand(fetchSize, JdbcCommand.JdbcCommandType.FETCHSIZE));
return commands;
}
@Override
public Map<String, String> getDataTypeMap() {
Map<String, String> dataTypeMap =
ImmutableMap.<String, String>builder()
.put("byteint", "int")
.put("smallint", "int")
.put("integer", "int")
.put("bigint", "long")
.put("float", "float")
.put("decimal", "double")
.put("char", "string")
.put("varchar", "string")
.put("byte", "bytes")
.put("varbyte", "bytes")
.put("date", "date")
.put("time", "time")
.put("timestamp", "timestamp")
.put("clob", "string")
.put("blob", "string")
.put("structured udt", "array")
.put("double precision", "float")
.put("numeric", "double")
.put("real", "float")
.put("character", "string")
.put("char varying", "string")
.put("character varying", "string")
.put("long varchar", "string")
.put("interval", "string")
.build();
return dataTypeMap;
}
@Override
public Iterator<JsonElement> getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit,
List<Predicate> predicateList) throws IOException {
return null;
}
@Override
public String getConnectionUrl() {
String urlPrefix = "jdbc:teradata://";
String host = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME);
checkArgument(!isNullOrEmpty(host), "Connectionn host cannot be null or empty at %s", ConfigurationKeys.SOURCE_CONN_HOST_NAME);
String port = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_PORT,"1025");
String database = this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA);
String defaultUrl = urlPrefix + host.trim() + "/TYPE=FASTEXPORT,DATABASE=" + database.trim() + ",DBS_PORT=" + port.trim() ;
// use custom url from source.conn.host if Teradata jdbc url available
return host.contains(urlPrefix) ? host.trim() : defaultUrl;
}
@Override
public long exractSampleRecordCountFromQuery(String query) {
if (isNullOrEmpty(query)) {
return SAMPLE_RECORD_COUNT;
}
long recordcount = SAMPLE_RECORD_COUNT;
String limit = null;
String inputQuery = query.toLowerCase();
int limitIndex = inputQuery.indexOf(TERADATA_SAMPLE_CLAUSE);
if (limitIndex > 0) {
limit = query.substring(limitIndex + TERADATA_SAMPLE_CLAUSE.length()).trim();
}
if (!isNullOrEmpty(limit)) {
try {
recordcount = Long.parseLong(limit);
} catch (Exception e) {
log.error("Ignoring incorrect limit value in input query: {}", limit);
}
}
return recordcount;
}
@Override
public String removeSampleClauseFromQuery(String query) {
if (isNullOrEmpty(query)) {
return null;
}
String limitString = "";
String inputQuery = query.toLowerCase();
int limitIndex = inputQuery.indexOf(TERADATA_SAMPLE_CLAUSE);
if (limitIndex > 0) {
limitString = query.substring(limitIndex);
}
return query.replace(limitString, "");
}
@Override
public String constructSampleClause() {
long sampleRowCount = this.getSampleRecordCount();
if (sampleRowCount >= 0) {
return TERADATA_SAMPLE_CLAUSE + sampleRowCount;
}
return "";
}
@Override
public String getWatermarkSourceFormat(WatermarkType watermarkType) {
String columnFormat = null;
switch (watermarkType) {
case TIMESTAMP:
columnFormat = TERADATA_TIMESTAMP_FORMAT;
break;
case DATE:
columnFormat = TERADATA_DATE_FORMAT;
break;
case HOUR:
columnFormat = TERADATA_HOUR_FORMAT;
break;
case SIMPLE:
break;
default:
log.error("Watermark type {} not recognized", watermarkType.toString());
}
return columnFormat;
}
@Override
public String getHourPredicateCondition(String column, long value, String valueFormat, String operator) {
log.debug("Getting hour predicate for Teradata");
String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_HOUR_FORMAT);
return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
}
@Override
public String getDatePredicateCondition(String column, long value, String valueFormat, String operator) {
log.debug("Getting date predicate for Teradata");
String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_DATE_FORMAT);
return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
}
@Override
public String getTimestampPredicateCondition(String column, long value, String valueFormat, String operator) {
log.debug("Getting timestamp predicate for Teradata");
String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_TIMESTAMP_FORMAT);
return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
}
@Override
public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException, IOException {
log.debug("Extract schema from resultset");
ResultSet resultset = null;
Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator();
if (itr.hasNext()) {
resultset = itr.next();
} else {
throw new SchemaException("Failed to get schema from Teradata - empty schema resultset");
}
JsonArray fieldJsonArray = new JsonArray();
try {
Schema schema = new Schema();
ResultSetMetaData rsmd = resultset.getMetaData();
String columnName, columnTypeName;
for (int i = 1; i <= rsmd.getColumnCount(); i++) {
columnName = rsmd.getColumnName(i);
columnTypeName = rsmd.getColumnTypeName(i);
schema.setColumnName(columnName);
List<String> mapSymbols = null;
JsonObject newDataType = this.convertDataType(columnName, columnTypeName, ELEMENT_DATA_TYPE, mapSymbols);
schema.setDataType(newDataType);
schema.setLength(rsmd.getColumnDisplaySize(i));
schema.setPrecision(rsmd.getPrecision(i));
schema.setScale(rsmd.getScale(i));
schema.setNullable(rsmd.isNullable(i) == ResultSetMetaData.columnNullable);
schema.setComment(rsmd.getColumnLabel(i));
String jsonStr = gson.toJson(schema);
JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
fieldJsonArray.add(obj);
}
} catch (Exception e) {
throw new SchemaException("Failed to get schema from Teradaa; error - " + e.getMessage(), e);
}
return fieldJsonArray;
}
}