/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.transforms;
import org.apache.kafka.common.cache.Cache;
import org.apache.kafka.common.cache.LRUCache;
import org.apache.kafka.common.cache.SynchronizedCache;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.data.ConnectSchema;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.transforms.util.SchemaUtil;
import org.apache.kafka.connect.transforms.util.SimpleConfig;
import java.util.LinkedHashMap;
import java.util.Map;
import static org.apache.kafka.connect.transforms.util.Requirements.requireMap;
import static org.apache.kafka.connect.transforms.util.Requirements.requireStruct;
public abstract class Flatten<R extends ConnectRecord<R>> implements Transformation<R> {
public static final String OVERVIEW_DOC =
"Flatten a nested data structure, generating names for each field by concatenating the field names at each "
+ "level with a configurable delimiter character. Applies to Struct when schema present, or a Map "
+ "in the case of schemaless data. The default delimiter is '.'."
+ "<p/>Use the concrete transformation type designed for the record key (<code>" + Key.class.getName() + "</code>) "
+ "or value (<code>" + Value.class.getName() + "</code>).";
private static final String DELIMITER_CONFIG = "delimiter";
private static final String DELIMITER_DEFAULT = ".";
public static final ConfigDef CONFIG_DEF = new ConfigDef()
.define(DELIMITER_CONFIG, ConfigDef.Type.STRING, DELIMITER_DEFAULT, ConfigDef.Importance.MEDIUM,
"Delimiter to insert between field names from the input record when generating field names for the "
+ "output record");
private static final String PURPOSE = "flattening";
private String delimiter;
private Cache<Schema, Schema> schemaUpdateCache;
@Override
public void configure(Map<String, ?> props) {
final SimpleConfig config = new SimpleConfig(CONFIG_DEF, props);
delimiter = config.getString(DELIMITER_CONFIG);
schemaUpdateCache = new SynchronizedCache<>(new LRUCache<Schema, Schema>(16));
}
@Override
public R apply(R record) {
if (operatingSchema(record) == null) {
return applySchemaless(record);
} else {
return applyWithSchema(record);
}
}
@Override
public void close() {
}
@Override
public ConfigDef config() {
return CONFIG_DEF;
}
protected abstract Schema operatingSchema(R record);
protected abstract Object operatingValue(R record);
protected abstract R newRecord(R record, Schema updatedSchema, Object updatedValue);
private R applySchemaless(R record) {
final Map<String, Object> value = requireMap(operatingValue(record), PURPOSE);
final Map<String, Object> newValue = new LinkedHashMap<>();
applySchemaless(value, "", newValue);
return newRecord(record, null, newValue);
}
private void applySchemaless(Map<String, Object> originalRecord, String fieldNamePrefix, Map<String, Object> newRecord) {
for (Map.Entry<String, Object> entry : originalRecord.entrySet()) {
final String fieldName = fieldName(fieldNamePrefix, entry.getKey());
Object value = entry.getValue();
if (value == null) {
newRecord.put(fieldName(fieldNamePrefix, entry.getKey()), null);
return;
}
Schema.Type inferredType = ConnectSchema.schemaType(value.getClass());
if (inferredType == null) {
throw new DataException("Flatten transformation was passed a value of type " + value.getClass()
+ " which is not supported by Connect's data API");
}
switch (inferredType) {
case INT8:
case INT16:
case INT32:
case INT64:
case FLOAT32:
case FLOAT64:
case BOOLEAN:
case STRING:
case BYTES:
newRecord.put(fieldName(fieldNamePrefix, entry.getKey()), entry.getValue());
break;
case MAP:
final Map<String, Object> fieldValue = requireMap(entry.getValue(), PURPOSE);
applySchemaless(fieldValue, fieldName, newRecord);
break;
default:
throw new DataException("Flatten transformation does not support " + entry.getValue().getClass()
+ " for record without schemas (for field " + fieldName + ").");
}
}
}
private R applyWithSchema(R record) {
final Struct value = requireStruct(operatingValue(record), PURPOSE);
Schema updatedSchema = schemaUpdateCache.get(value.schema());
if (updatedSchema == null) {
final SchemaBuilder builder = SchemaUtil.copySchemaBasics(value.schema(), SchemaBuilder.struct());
Struct defaultValue = (Struct) value.schema().defaultValue();
buildUpdatedSchema(value.schema(), "", builder, value.schema().isOptional(), defaultValue);
updatedSchema = builder.build();
schemaUpdateCache.put(value.schema(), updatedSchema);
}
final Struct updatedValue = new Struct(updatedSchema);
buildWithSchema(value, "", updatedValue);
return newRecord(record, updatedSchema, updatedValue);
}
/**
* Build an updated Struct Schema which flattens all nested fields into a single struct, handling cases where
* optionality and default values of the flattened fields are affected by the optionality and default values of
* parent/ancestor schemas (e.g. flattened field is optional because the parent schema was optional, even if the
* schema itself is marked as required).
* @param schema the schema to translate
* @param fieldNamePrefix the prefix to use on field names, i.e. the delimiter-joined set of ancestor field names
* @param newSchema the flattened schema being built
* @param optional true if any ancestor schema is optional
* @param defaultFromParent the default value, if any, included via the parent/ancestor schemas
*/
private void buildUpdatedSchema(Schema schema, String fieldNamePrefix, SchemaBuilder newSchema, boolean optional, Struct defaultFromParent) {
for (Field field : schema.fields()) {
final String fieldName = fieldName(fieldNamePrefix, field.name());
final boolean fieldIsOptional = optional || field.schema().isOptional();
Object fieldDefaultValue = null;
if (field.schema().defaultValue() != null) {
fieldDefaultValue = field.schema().defaultValue();
} else if (defaultFromParent != null) {
fieldDefaultValue = defaultFromParent.get(field);
}
switch (field.schema().type()) {
case INT8:
case INT16:
case INT32:
case INT64:
case FLOAT32:
case FLOAT64:
case BOOLEAN:
case STRING:
case BYTES:
newSchema.field(fieldName, convertFieldSchema(field.schema(), fieldIsOptional, fieldDefaultValue));
break;
case STRUCT:
buildUpdatedSchema(field.schema(), fieldName, newSchema, fieldIsOptional, (Struct) fieldDefaultValue);
break;
default:
throw new DataException("Flatten transformation does not support " + field.schema().type()
+ " for record without schemas (for field " + fieldName + ").");
}
}
}
/**
* Convert the schema for a field of a Struct with a primitive schema to the schema to be used for the flattened
* version, taking into account that we may need to override optionality and default values in the flattened version
* to take into account the optionality and default values of parent/ancestor schemas
* @param orig the original schema for the field
* @param optional whether the new flattened field should be optional
* @param defaultFromParent the default value either taken from the existing field or provided by the parent
*/
private Schema convertFieldSchema(Schema orig, boolean optional, Object defaultFromParent) {
// Note that we don't use the schema translation cache here. It might save us a bit of effort, but we really
// only care about caching top-level schema translations.
final SchemaBuilder builder = SchemaUtil.copySchemaBasics(orig);
if (optional)
builder.optional();
if (defaultFromParent != null)
builder.defaultValue(defaultFromParent);
return builder.build();
}
private void buildWithSchema(Struct record, String fieldNamePrefix, Struct newRecord) {
for (Field field : record.schema().fields()) {
final String fieldName = fieldName(fieldNamePrefix, field.name());
switch (field.schema().type()) {
case INT8:
case INT16:
case INT32:
case INT64:
case FLOAT32:
case FLOAT64:
case BOOLEAN:
case STRING:
case BYTES:
newRecord.put(fieldName, record.get(field));
break;
case STRUCT:
buildWithSchema(record.getStruct(field.name()), fieldName, newRecord);
break;
default:
throw new DataException("Flatten transformation does not support " + field.schema().type()
+ " for record without schemas (for field " + fieldName + ").");
}
}
}
private String fieldName(String prefix, String fieldName) {
return prefix.isEmpty() ? fieldName : (prefix + delimiter + fieldName);
}
public static class Key<R extends ConnectRecord<R>> extends Flatten<R> {
@Override
protected Schema operatingSchema(R record) {
return record.keySchema();
}
@Override
protected Object operatingValue(R record) {
return record.key();
}
@Override
protected R newRecord(R record, Schema updatedSchema, Object updatedValue) {
return record.newRecord(record.topic(), record.kafkaPartition(), updatedSchema, updatedValue, record.valueSchema(), record.value(), record.timestamp());
}
}
public static class Value<R extends ConnectRecord<R>> extends Flatten<R> {
@Override
protected Schema operatingSchema(R record) {
return record.valueSchema();
}
@Override
protected Object operatingValue(R record) {
return record.value();
}
@Override
protected R newRecord(R record, Schema updatedSchema, Object updatedValue) {
return record.newRecord(record.topic(), record.kafkaPartition(), record.keySchema(), record.key(), updatedSchema, updatedValue, record.timestamp());
}
}
}