package com.thinkbiganalytics.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.google.common.base.Preconditions;
import com.thinkbiganalytics.nifi.thrift.api.RowVisitor;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.SchemaBuilder.FieldAssembler;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.commons.lang3.CharUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Timestamp;
import java.sql.Types;
import javax.annotation.Nonnull;
import static java.sql.Types.ARRAY;
import static java.sql.Types.BIGINT;
import static java.sql.Types.BINARY;
import static java.sql.Types.BIT;
import static java.sql.Types.BLOB;
import static java.sql.Types.BOOLEAN;
import static java.sql.Types.CHAR;
import static java.sql.Types.CLOB;
import static java.sql.Types.DATE;
import static java.sql.Types.DECIMAL;
import static java.sql.Types.DOUBLE;
import static java.sql.Types.FLOAT;
import static java.sql.Types.INTEGER;
import static java.sql.Types.LONGNVARCHAR;
import static java.sql.Types.LONGVARBINARY;
import static java.sql.Types.LONGVARCHAR;
import static java.sql.Types.NCHAR;
import static java.sql.Types.NUMERIC;
import static java.sql.Types.NVARCHAR;
import static java.sql.Types.REAL;
import static java.sql.Types.ROWID;
import static java.sql.Types.SMALLINT;
import static java.sql.Types.TIME;
import static java.sql.Types.TIMESTAMP;
import static java.sql.Types.TINYINT;
import static java.sql.Types.VARBINARY;
import static java.sql.Types.VARCHAR;
/**
* JDBC / SQL common functions.
*/
public class JdbcCommon {
public static Logger logger = LoggerFactory.getLogger(JdbcCommon.class);
/**
* Converts the specified SQL result set to a delimited text file written to the specified output stream.
*
* @param rs the SQL result set
* @param outStream the output stream for the delimited text file
* @param visitor records position of the result set
* @param delimiter the column delimiter for the delimited text file
* @return the number of rows written
* @throws SQLException if a SQL error occurs while reading the result set
* @throws IOException if an I/O error occurs while writing to the output stream
*/
public static long convertToDelimitedStream(final ResultSet rs, final OutputStream outStream, final RowVisitor visitor, String delimiter) throws SQLException, IOException {
// avoid overflowing log with redundant messages
int dateConversionWarning = 0;
if (rs == null || rs.getMetaData() == null) {
logger.warn("Received empty resultset or no metadata.");
return 0;
}
OutputStreamWriter writer = new OutputStreamWriter(outStream);
final ResultSetMetaData meta = rs.getMetaData();
final DelimiterEscaper escaper = new DelimiterEscaper(delimiter);
// Write header
final int nrOfColumns = meta.getColumnCount();
StringBuffer sb = new StringBuffer();
for (int i = 1; i <= nrOfColumns; i++) {
String columnName = meta.getColumnName(i);
sb.append(escaper.translate(columnName));
if (i != nrOfColumns) {
sb.append(delimiter);
} else {
sb.append("\n");
}
}
writer.append(sb.toString());
long nrOfRows = 0;
while (rs.next()) {
if (visitor != null) {
visitor.visitRow(rs);
}
sb = new StringBuffer();
nrOfRows++;
for (int i = 1; i <= nrOfColumns; i++) {
String val = null;
int colType = meta.getColumnType(i);
if (colType == Types.DATE || colType == Types.TIMESTAMP) {
Timestamp sqlDate = null;
try {
// Extract timestamp
sqlDate = extractSqlDate(rs, i);
} catch (Exception e) {
// Still failed, maybe exotic date type
if (dateConversionWarning++ < 10) {
logger.warn("{} is not convertible to timestamp or date", rs.getMetaData().getColumnName(i));
}
}
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), colType, sqlDate);
}
if (sqlDate != null) {
DateTimeFormatter formatter = ISODateTimeFormat.dateTime().withZoneUTC();
val = formatter.print(new DateTime(sqlDate.getTime()));
}
} else if (colType == Types.TIME) {
Time time = rs.getTime(i);
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), colType, time);
}
DateTimeFormatter formatter = ISODateTimeFormat.time().withZoneUTC();
val = formatter.print(new DateTime(time.getTime()));
} else {
val = rs.getString(i);
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), colType, val);
}
}
sb.append((val == null ? "" : escaper.translate(val)));
if (i != nrOfColumns) {
sb.append(delimiter);
} else {
sb.append("\n");
}
}
writer.append(sb.toString());
}
writer.flush();
return nrOfRows;
}
/**
* Extracts a resultset col to a SQL timestamp
*/
private static Timestamp extractSqlDate(ResultSet rs, int col) throws SQLException {
Timestamp sqlDate = null;
try {
// Extract timestamp
sqlDate = rs.getTimestamp(col);
} catch (SQLException e) {
// Attempt to extract date
Date sqlDateDate = rs.getDate(col);
if (sqlDateDate != null) {
Long timeInMillis = sqlDateDate.getTime();
sqlDate = new Timestamp(timeInMillis);
}
}
return sqlDate;
}
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final RowVisitor visitor, final Schema schema) throws SQLException, IOException {
int dateConversionWarning = 0;
final GenericRecord rec = new GenericData.Record(schema);
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(schema, outStream);
final ResultSetMetaData meta = rs.getMetaData();
final int nrOfColumns = meta.getColumnCount();
long nrOfRows = 0;
while (rs.next()) {
if (visitor != null) {
visitor.visitRow(rs);
}
for (int i = 1; i <= nrOfColumns; i++) {
final int javaSqlType = meta.getColumnType(i);
final Object value = rs.getObject(i);
if (value == null) {
rec.put(i - 1, null);
} else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == ARRAY || javaSqlType == BLOB || javaSqlType == CLOB) {
// bytes requires little bit different handling
byte[] bytes = rs.getBytes(i);
ByteBuffer bb = ByteBuffer.wrap(bytes);
rec.put(i - 1, bb);
} else if (value instanceof Byte) {
// tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
// But value is returned by JDBC as java.lang.Byte
// (at least H2 JDBC works this way)
// direct put to avro record results:
// org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
rec.put(i - 1, ((Byte) value).intValue());
} else if (value instanceof BigDecimal || value instanceof BigInteger) {
// Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
rec.put(i - 1, value.toString());
} else if (value instanceof Number || value instanceof Boolean) {
rec.put(i - 1, value);
} else if (value instanceof Date) {
final DateTimeFormatter formatter = ISODateTimeFormat.dateTime().withZoneUTC();
rec.put(i - 1, formatter.print(new DateTime(((Date) value).getTime())));
} else if (value instanceof Time) {
final DateTimeFormatter formatter = ISODateTimeFormat.time().withZoneUTC();
rec.put(i - 1, formatter.print(new DateTime(((Time) value).getTime())));
} else if (value instanceof Timestamp) {
final DateTimeFormatter formatter = ISODateTimeFormat.dateTime().withZoneUTC();
rec.put(i - 1, formatter.print(new DateTime(((Timestamp) value).getTime())));
} else {
// The different types that we support are numbers (int, long, double, float),
// as well as boolean values and Strings. Since Avro doesn't provide
// timestamp types, we want to convert those to Strings. So we will cast anything other
// than numbers or booleans to strings by using the toString() method.
rec.put(i - 1, value.toString());
}
//notify the visitor
if (javaSqlType == Types.DATE || javaSqlType == Types.TIMESTAMP) {
Timestamp sqlDate = null;
try {
// Extract timestamp
sqlDate = extractSqlDate(rs, i);
} catch (Exception e) {
if (dateConversionWarning++ < 10) {
logger.warn("{} is not convertible to timestamp or date", rs.getMetaData().getColumnName(i));
}
}
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, sqlDate);
}
} else if (javaSqlType == Types.TIME) {
Time time = rs.getTime(i);
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, time);
}
} else {
if (visitor != null) {
visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, (value != null) ? value.toString() : null);
}
}
}
dataFileWriter.append(rec);
nrOfRows += 1;
}
return nrOfRows;
}
}
public static Schema createSchema(final ResultSet rs) throws SQLException {
final ResultSetMetaData meta = rs.getMetaData();
final int nrOfColumns = meta.getColumnCount();
String tableName = "";
try {
tableName = meta.getTableName(1);
} catch (SQLException e) {
// ignored
}
if (StringUtils.isBlank(tableName)) {
tableName = "NiFi_ExecuteSQL_Record";
}
final FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
// Some missing Avro types - Decimal, Date types. May need some additional work.
for (int i = 1; i <= nrOfColumns; i++) {
switch (meta.getColumnType(i)) {
case CHAR:
case LONGNVARCHAR:
case LONGVARCHAR:
case NCHAR:
case NVARCHAR:
case VARCHAR:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
break;
case BIT:
case BOOLEAN:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
break;
case INTEGER:
if (meta.isSigned(i)) {
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
} else {
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
}
break;
case SMALLINT:
case TINYINT:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
break;
case BIGINT:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
break;
// java.sql.RowId is interface, is seems to be database
// implementation specific, let's convert to String
case ROWID:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
break;
case FLOAT:
case REAL:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault();
break;
case DOUBLE:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault();
break;
// Did not find direct suitable type, need to be clarified!!!!
case DECIMAL:
case NUMERIC:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
break;
// Did not find direct suitable type, need to be clarified!!!!
case DATE:
case TIME:
case TIMESTAMP:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
break;
case BINARY:
case VARBINARY:
case LONGVARBINARY:
case ARRAY:
case BLOB:
case CLOB:
builder.name(meta.getColumnName(i)).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault();
break;
default:
throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type");
}
}
return builder.endRecord();
}
/**
* Escapes values in delimited text files.
*/
static class DelimiterEscaper extends CharSequenceTranslator {
/**
* Character for quoting values
*/
private static final char QUOTE = '"';
/**
* String for quoting values
*/
private static final String QUOTE_STR = String.valueOf(QUOTE);
/**
* Character for escaping values
*/
private static final char BACKSLASH = '\\';
/**
* String for escaping values
*/
private static final String BACKSLASH_STR = String.valueOf(BACKSLASH);
/**
* Strings that, if found, require a value to be escaped
*/
private final String[] searchStrings;
/**
* Replacements for the search strings found
*/
private final String[] replacementStrings;
/**
* Constructs a {@code DelimiterEscaper} with the specified delimiter.
*
* @param delimiter the delimiter
*/
DelimiterEscaper(@Nonnull final String delimiter) {
searchStrings = new String[]{delimiter, QUOTE_STR, Character.toString('\n'), Character.toString('\r') };
replacementStrings = new String[]{delimiter, BACKSLASH_STR+QUOTE_STR, "\\\\n", "\\\\r"};
}
@Override
public int translate(@Nonnull final CharSequence input, final int index, @Nonnull final Writer out) throws IOException {
Preconditions.checkState(index == 0, "Unsupported translation index %d", index);
String inputString = input.toString();
if (StringUtils.containsAny(inputString, searchStrings)) {
out.write(QUOTE);
out.write(StringUtils.replaceEach(inputString, searchStrings, replacementStrings));
out.write(QUOTE);
} else {
out.write(input.toString());
}
return Character.codePointCount(input, 0, input.length());
}
}
/**
* Get schema in the format for setting up the feed table
* @param schema Avro Schema
* @return formatted schema for setting up feed table
*/
public static String getAvroSchemaForFeedSetup (Schema schema) {
if (schema == null) {
return "";
}
final String PIPE = "|";
final String description = "";
final String primaryKey = "0";
final String createdTracker = "0";
final String updatedTracker = "0";
final String newLine = "\n";
StringBuffer retVal = new StringBuffer();
int totalFields = schema.getFields().size();
int counter = 1;
for (Schema.Field field: schema.getFields()) {
String name = field.name().toLowerCase();
String dataType = field.schema().getType().name().toLowerCase();
if (dataType.equals("union")) {
for (Schema fieldSchemaType: field.schema().getTypes()) {
if (!fieldSchemaType.getName().toLowerCase().equals("null")) {
dataType = getHiveTypeForAvroType(fieldSchemaType.getName().toLowerCase());
break;
}
}
if (dataType.equals("union")) {
dataType = "void";
}
}
else {
dataType = getHiveTypeForAvroType(dataType);
}
retVal.append(name)
.append(PIPE)
.append(dataType)
.append(PIPE)
.append(description)
.append(PIPE)
.append(primaryKey)
.append(PIPE)
.append(createdTracker)
.append(PIPE)
.append(updatedTracker);
if (counter++ < totalFields) {
retVal.append(newLine);
}
}
return retVal.toString();
}
/*
* Mapping of Avro's data types to Hive's data types
*/
private static String getHiveTypeForAvroType(String avroType) {
String hiveType;
switch (avroType) {
case "null":
hiveType = "void";
break;
case "boolean":
hiveType = "boolean";
break;
case "int":
hiveType = "int";
break;
case "long":
hiveType = "bigint";
break;
case "float":
hiveType = "float";
break;
case "double":
hiveType = "double";
break;
case "bytes":
hiveType = "binary";
break;
case "string":
hiveType = "string";
break;
case "record":
hiveType = "struct";
break;
case "map":
hiveType = "map";
break;
case "list":
hiveType = "array";
break;
case "enum":
hiveType = "string";
break;
case "fixed":
hiveType = "binary";
break;
default:
hiveType = "string";
break;
}
return hiveType;
}
}