/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.transforms;
import org.apache.kafka.common.cache.Cache;
import org.apache.kafka.common.cache.LRUCache;
import org.apache.kafka.common.cache.SynchronizedCache;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.data.ConnectSchema;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.transforms.util.SchemaUtil;
import org.apache.kafka.connect.transforms.util.SimpleConfig;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import static org.apache.kafka.connect.transforms.util.Requirements.requireMap;
import static org.apache.kafka.connect.transforms.util.Requirements.requireStruct;
public abstract class Cast<R extends ConnectRecord<R>> implements Transformation<R> {
// TODO: Currently we only support top-level field casting. Ideally we could use a dotted notation in the spec to
// allow casting nested fields.
public static final String OVERVIEW_DOC =
"Cast fields or the entire key or value to a specific type, e.g. to force an integer field to a smaller "
+ "width. Only simple primitive types are supported -- integers, floats, boolean, and string. "
+ "<p/>Use the concrete transformation type designed for the record key (<code>" + Key.class.getName() + "</code>) "
+ "or value (<code>" + Value.class.getName() + "</code>).";
public static final String SPEC_CONFIG = "spec";
public static final ConfigDef CONFIG_DEF = new ConfigDef()
.define(SPEC_CONFIG, ConfigDef.Type.LIST, ConfigDef.NO_DEFAULT_VALUE, new ConfigDef.Validator() {
@SuppressWarnings("unchecked")
@Override
public void ensureValid(String name, Object valueObject) {
List<String> value = (List<String>) valueObject;
if (value == null || value.isEmpty()) {
throw new ConfigException("Must specify at least one field to cast.");
}
parseFieldTypes(value);
}
@Override
public String toString() {
return "list of colon-delimited pairs, e.g. <code>foo:bar,abc:xyz</code>";
}
},
ConfigDef.Importance.HIGH,
"List of fields and the type to cast them to of the form field1:type,field2:type to cast fields of "
+ "Maps or Structs. A single type to cast the entire value. Valid types are int8, int16, int32, "
+ "int64, float32, float64, boolean, and string.");
private static final String PURPOSE = "cast types";
private static final Set<Schema.Type> SUPPORTED_CAST_TYPES = new HashSet<>(
Arrays.asList(Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64,
Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN, Schema.Type.STRING)
);
// As a special case for casting the entire value (e.g. the incoming key is a int64 but you know it could be an
// int32 and want the smaller width), we use an otherwise invalid field name in the cast spec to track this.
private static final String WHOLE_VALUE_CAST = null;
private Map<String, Schema.Type> casts;
private Schema.Type wholeValueCastType;
private Cache<Schema, Schema> schemaUpdateCache;
@Override
public void configure(Map<String, ?> props) {
final SimpleConfig config = new SimpleConfig(CONFIG_DEF, props);
casts = parseFieldTypes(config.getList(SPEC_CONFIG));
wholeValueCastType = casts.get(WHOLE_VALUE_CAST);
schemaUpdateCache = new SynchronizedCache<>(new LRUCache<Schema, Schema>(16));
}
@Override
public R apply(R record) {
if (operatingSchema(record) == null) {
return applySchemaless(record);
} else {
return applyWithSchema(record);
}
}
@Override
public ConfigDef config() {
return CONFIG_DEF;
}
@Override
public void close() {
}
private R applySchemaless(R record) {
if (wholeValueCastType != null) {
return newRecord(record, null, castValueToType(operatingValue(record), wholeValueCastType));
}
final Map<String, Object> value = requireMap(operatingValue(record), PURPOSE);
final HashMap<String, Object> updatedValue = new HashMap<>(value);
for (Map.Entry<String, Schema.Type> fieldSpec : casts.entrySet()) {
String field = fieldSpec.getKey();
updatedValue.put(field, castValueToType(value.get(field), fieldSpec.getValue()));
}
return newRecord(record, null, updatedValue);
}
private R applyWithSchema(R record) {
Schema valueSchema = operatingSchema(record);
Schema updatedSchema = getOrBuildSchema(valueSchema);
// Whole-record casting
if (wholeValueCastType != null)
return newRecord(record, updatedSchema, castValueToType(operatingValue(record), wholeValueCastType));
// Casting within a struct
final Struct value = requireStruct(operatingValue(record), PURPOSE);
final Struct updatedValue = new Struct(updatedSchema);
for (Field field : value.schema().fields()) {
final Object origFieldValue = value.get(field);
final Schema.Type targetType = casts.get(field.name());
final Object newFieldValue = targetType != null ? castValueToType(origFieldValue, targetType) : origFieldValue;
updatedValue.put(updatedSchema.field(field.name()), newFieldValue);
}
return newRecord(record, updatedSchema, updatedValue);
}
private Schema getOrBuildSchema(Schema valueSchema) {
Schema updatedSchema = schemaUpdateCache.get(valueSchema);
if (updatedSchema != null)
return updatedSchema;
final SchemaBuilder builder;
if (wholeValueCastType != null) {
builder = SchemaUtil.copySchemaBasics(valueSchema, convertFieldType(wholeValueCastType));
} else {
builder = SchemaUtil.copySchemaBasics(valueSchema, SchemaBuilder.struct());
for (Field field : valueSchema.fields()) {
SchemaBuilder fieldBuilder =
convertFieldType(casts.containsKey(field.name()) ? casts.get(field.name()) : field.schema().type());
if (field.schema().isOptional())
fieldBuilder.optional();
if (field.schema().defaultValue() != null)
fieldBuilder.defaultValue(castValueToType(field.schema().defaultValue(), fieldBuilder.type()));
builder.field(field.name(), fieldBuilder.build());
}
}
if (valueSchema.isOptional())
builder.optional();
if (valueSchema.defaultValue() != null)
builder.defaultValue(castValueToType(valueSchema.defaultValue(), builder.type()));
updatedSchema = builder.build();
schemaUpdateCache.put(valueSchema, updatedSchema);
return updatedSchema;
}
private SchemaBuilder convertFieldType(Schema.Type type) {
switch (type) {
case INT8:
return SchemaBuilder.int8();
case INT16:
return SchemaBuilder.int16();
case INT32:
return SchemaBuilder.int32();
case INT64:
return SchemaBuilder.int64();
case FLOAT32:
return SchemaBuilder.float32();
case FLOAT64:
return SchemaBuilder.float64();
case BOOLEAN:
return SchemaBuilder.bool();
case STRING:
return SchemaBuilder.string();
default:
throw new DataException("Unexpected type in Cast transformation: " + type);
}
}
private static Object castValueToType(Object value, Schema.Type targetType) {
try {
if (value == null) return null;
Schema.Type inferredType = ConnectSchema.schemaType(value.getClass());
if (inferredType == null) {
throw new DataException("Cast transformation was passed a value of type " + value.getClass()
+ " which is not supported by Connect's data API");
}
// Ensure the type we are trying to cast from is supported
validCastType(inferredType, FieldType.INPUT);
switch (targetType) {
case INT8:
return castToInt8(value);
case INT16:
return castToInt16(value);
case INT32:
return castToInt32(value);
case INT64:
return castToInt64(value);
case FLOAT32:
return castToFloat32(value);
case FLOAT64:
return castToFloat64(value);
case BOOLEAN:
return castToBoolean(value);
case STRING:
return castToString(value);
default:
throw new DataException(targetType.toString() + " is not supported in the Cast transformation.");
}
} catch (NumberFormatException e) {
throw new DataException("Value (" + value.toString() + ") was out of range for requested data type", e);
}
}
private static byte castToInt8(Object value) {
if (value instanceof Number)
return ((Number) value).byteValue();
else if (value instanceof Boolean)
return ((boolean) value) ? (byte) 1 : (byte) 0;
else if (value instanceof String)
return Byte.parseByte((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static short castToInt16(Object value) {
if (value instanceof Number)
return ((Number) value).shortValue();
else if (value instanceof Boolean)
return ((boolean) value) ? (short) 1 : (short) 0;
else if (value instanceof String)
return Short.parseShort((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static int castToInt32(Object value) {
if (value instanceof Number)
return ((Number) value).intValue();
else if (value instanceof Boolean)
return ((boolean) value) ? 1 : 0;
else if (value instanceof String)
return Integer.parseInt((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static long castToInt64(Object value) {
if (value instanceof Number)
return ((Number) value).longValue();
else if (value instanceof Boolean)
return ((boolean) value) ? (long) 1 : (long) 0;
else if (value instanceof String)
return Long.parseLong((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static float castToFloat32(Object value) {
if (value instanceof Number)
return ((Number) value).floatValue();
else if (value instanceof Boolean)
return ((boolean) value) ? 1.f : 0.f;
else if (value instanceof String)
return Float.parseFloat((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static double castToFloat64(Object value) {
if (value instanceof Number)
return ((Number) value).doubleValue();
else if (value instanceof Boolean)
return ((boolean) value) ? 1. : 0.;
else if (value instanceof String)
return Double.parseDouble((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static boolean castToBoolean(Object value) {
if (value instanceof Number)
return ((Number) value).longValue() != 0L;
else if (value instanceof Boolean)
return (Boolean) value;
else if (value instanceof String)
return Boolean.parseBoolean((String) value);
else
throw new DataException("Unexpected type in Cast transformation: " + value.getClass());
}
private static String castToString(Object value) {
return value.toString();
}
protected abstract Schema operatingSchema(R record);
protected abstract Object operatingValue(R record);
protected abstract R newRecord(R record, Schema updatedSchema, Object updatedValue);
private static Map<String, Schema.Type> parseFieldTypes(List<String> mappings) {
final Map<String, Schema.Type> m = new HashMap<>();
boolean isWholeValueCast = false;
for (String mapping : mappings) {
final String[] parts = mapping.split(":");
if (parts.length > 2) {
throw new ConfigException(ReplaceField.ConfigName.RENAME, mappings, "Invalid rename mapping: " + mapping);
}
if (parts.length == 1) {
Schema.Type targetType = Schema.Type.valueOf(parts[0].trim().toUpperCase(Locale.ROOT));
m.put(WHOLE_VALUE_CAST, validCastType(targetType, FieldType.OUTPUT));
isWholeValueCast = true;
} else {
Schema.Type type;
try {
type = Schema.Type.valueOf(parts[1].trim().toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new ConfigException("Invalid type found in casting spec: " + parts[1].trim(), e);
}
m.put(parts[0].trim(), validCastType(type, FieldType.OUTPUT));
}
}
if (isWholeValueCast && mappings.size() > 1) {
throw new ConfigException("Cast transformations that specify a type to cast the entire value to "
+ "may ony specify a single cast in their spec");
}
return m;
}
private enum FieldType {
INPUT, OUTPUT
}
private static Schema.Type validCastType(Schema.Type type, FieldType fieldType) {
if (!SUPPORTED_CAST_TYPES.contains(type)) {
String message = "Cast transformation does not support casting to/from " + type
+ "; supported types are " + SUPPORTED_CAST_TYPES;
switch (fieldType) {
case INPUT:
throw new DataException(message);
case OUTPUT:
throw new ConfigException(message);
}
}
return type;
}
public static final class Key<R extends ConnectRecord<R>> extends Cast<R> {
@Override
protected Schema operatingSchema(R record) {
return record.keySchema();
}
@Override
protected Object operatingValue(R record) {
return record.key();
}
@Override
protected R newRecord(R record, Schema updatedSchema, Object updatedValue) {
return record.newRecord(record.topic(), record.kafkaPartition(), updatedSchema, updatedValue, record.valueSchema(), record.value(), record.timestamp());
}
}
public static final class Value<R extends ConnectRecord<R>> extends Cast<R> {
@Override
protected Schema operatingSchema(R record) {
return record.valueSchema();
}
@Override
protected Object operatingValue(R record) {
return record.value();
}
@Override
protected R newRecord(R record, Schema updatedSchema, Object updatedValue) {
return record.newRecord(record.topic(), record.kafkaPartition(), record.keySchema(), record.key(), updatedSchema, updatedValue, record.timestamp());
}
}
}