/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.nifi.processors.kite;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.IndexedRecord;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
/**
* Responsible for converting records of one Avro type to another. Supports
* syntax like "record.field" to unpack fields and will try to do simple type
* conversion.
*/
public class AvroRecordConverter {
private final Schema inputSchema;
private final Schema outputSchema;
// Store this from output field to input field so we can look up by output.
private final Map<String, String> fieldMapping;
private final Locale locale;
private static final Locale DEFAULT_LOCALE = Locale.getDefault();
/**
* @param inputSchema
* Schema of input record objects
* @param outputSchema
* Schema of output record objects
* @param fieldMapping
* Map from field name in input record to field name in output
* record.
*/
public AvroRecordConverter(Schema inputSchema, Schema outputSchema,
Map<String, String> fieldMapping) {
this(inputSchema, outputSchema, fieldMapping, DEFAULT_LOCALE);
}
/**
* @param inputSchema
* Schema of input record objects
* @param outputSchema
* Schema of output record objects
* @param fieldMapping
* Map from field name in input record to field name in output
* record.
* @param locale
* Locale to use
*/
public AvroRecordConverter(Schema inputSchema, Schema outputSchema,
Map<String, String> fieldMapping, Locale locale) {
this.inputSchema = inputSchema;
this.outputSchema = outputSchema;
// Need to reverse this map.
this.fieldMapping = Maps
.newHashMapWithExpectedSize(fieldMapping.size());
for (Map.Entry<String, String> entry : fieldMapping.entrySet()) {
this.fieldMapping.put(entry.getValue(), entry.getKey());
}
this.locale = locale;
}
/**
* @return Any fields in the output schema that are not mapped or are mapped
* by a non-existent input field.
*/
public Collection<String> getUnmappedFields() {
List<String> result = Lists.newArrayList();
for (Field f : outputSchema.getFields()) {
String fieldName = f.name();
if (fieldMapping.containsKey(fieldName)) {
fieldName = fieldMapping.get(fieldName);
}
Schema currentSchema = inputSchema;
while (fieldName.contains(".")) {
// Recurse down the schema to find the right field.
int dotIndex = fieldName.indexOf('.');
String entityName = fieldName.substring(0, dotIndex);
// Get the schema. In case we had an optional record, choose
// just the record.
currentSchema = getNonNullSchema(currentSchema);
if (currentSchema.getField(entityName) == null) {
// Tried to step into a schema that doesn't exist. Break out
// of the loop
break;
}
currentSchema = currentSchema.getField(entityName).schema();
fieldName = fieldName.substring(dotIndex + 1);
}
if (currentSchema == null
|| getNonNullSchema(currentSchema).getField(fieldName) == null) {
result.add(f.name());
}
}
return result;
}
/**
* Converts one record to another given a input and output schema plus
* explicit mappings for certain target fields.
*
* @param input
* Input record to convert conforming to the inputSchema this
* converter was created with.
* @return Record converted to the outputSchema this converter was created
* with.
* @throws AvroConversionException
* When schemas do not match or illegal conversions are
* attempted, such as when numeric data fails to parse.
*/
public Record convert(Record input) throws AvroConversionException {
Record result = new Record(outputSchema);
for (Field outputField : outputSchema.getFields()) {
// Default to matching by name
String inputFieldName = outputField.name();
if (fieldMapping.containsKey(outputField.name())) {
inputFieldName = fieldMapping.get(outputField.name());
}
IndexedRecord currentRecord = input;
Schema currentSchema = getNonNullSchema(inputSchema);
while (inputFieldName.contains(".")) {
// Recurse down the schema to find the right field.
int dotIndex = inputFieldName.indexOf('.');
String entityName = inputFieldName.substring(0, dotIndex);
// Get the record object
Object innerRecord = currentRecord.get(currentSchema.getField(
entityName).pos());
if (innerRecord == null) {
// Probably hit a null record here. Just break out of the
// loop so that null object will be passed to convertData
// below.
currentRecord = null;
break;
}
if (innerRecord != null
&& !(innerRecord instanceof IndexedRecord)) {
throw new AvroConversionException(inputFieldName
+ " stepped through a non-record");
}
currentRecord = (IndexedRecord) innerRecord;
// Get the schema. In case we had an optional record, choose
// just the record.
currentSchema = currentSchema.getField(entityName).schema();
currentSchema = getNonNullSchema(currentSchema);
inputFieldName = inputFieldName.substring(dotIndex + 1);
}
// Current should now be in the right place to read the record.
Field f = currentSchema.getField(inputFieldName);
if (currentRecord == null) {
// We may have stepped into a null union type and gotten a null
// result.
Schema s = null;
if (f != null) {
s = f.schema();
}
result.put(outputField.name(),
convertData(null, s, outputField.schema()));
} else {
result.put(
outputField.name(),
convertData(currentRecord.get(f.pos()), f.schema(),
outputField.schema()));
}
}
return result;
}
public Schema getInputSchema() {
return inputSchema;
}
public Schema getOutputSchema() {
return outputSchema;
}
/**
* Converts the data from one schema to another. If the types are the same,
* no change will be made, but simple conversions will be attempted for
* other types.
*
* @param content
* The data to convert, generally taken from a field in an input
* Record.
* @param inputSchema
* The schema of the content object
* @param outputSchema
* The schema to convert to.
* @return The content object, converted to the output schema.
* @throws AvroConversionException
* When conversion is impossible, either because the output type
* is not supported or because numeric data failed to parse.
*/
private Object convertData(Object content, Schema inputSchema,
Schema outputSchema) throws AvroConversionException {
if (content == null) {
// No conversion can happen here.
if (supportsNull(outputSchema)) {
return null;
}
throw new AvroConversionException("Output schema " + outputSchema
+ " does not support null");
}
Schema nonNillInput = getNonNullSchema(inputSchema);
Schema nonNillOutput = getNonNullSchema(outputSchema);
if (nonNillInput.getType().equals(nonNillOutput.getType())) {
return content;
} else {
if (nonNillOutput.getType() == Schema.Type.STRING) {
return content.toString();
}
// For the non-string cases of these, we will try to convert through
// string using Scanner to validate types. This means we could
// return questionable results when a String starts with a number
// but then contains other content
Scanner scanner = new Scanner(content.toString());
scanner.useLocale(locale);
switch (nonNillOutput.getType()) {
case LONG:
if (scanner.hasNextLong()) {
return scanner.nextLong();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to long");
}
case INT:
if (scanner.hasNextInt()) {
return scanner.nextInt();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to int");
}
case DOUBLE:
if (scanner.hasNextDouble()) {
return scanner.nextDouble();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to double");
}
case FLOAT:
if (scanner.hasNextFloat()) {
return scanner.nextFloat();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to float");
}
default:
throw new AvroConversionException("Cannot convert to type "
+ nonNillOutput.getType());
}
}
}
/**
* If s is a union schema of some type with null, returns that type.
* Otherwise just return schema itself.
*
* Does not handle unions of schemas with anything except null and one type.
*
* @param s
* Schema to remove nillable from.
* @return The Schema of the non-null part of a the union, if the input was
* a union type. Otherwise returns the input schema.
*/
protected static Schema getNonNullSchema(Schema s) {
// Handle the case where s is a union type. Assert that this must be a
// union that only includes one non-null type.
if (s.getType() == Schema.Type.UNION) {
List<Schema> types = s.getTypes();
boolean foundOne = false;
Schema result = s;
for (Schema type : types) {
if (!type.getType().equals(Schema.Type.NULL)) {
Preconditions.checkArgument(foundOne == false,
"Cannot handle union of two non-null types");
foundOne = true;
result = type;
}
}
return result;
} else {
return s;
}
}
protected static boolean supportsNull(Schema s) {
if (s.getType() == Schema.Type.NULL) {
return true;
} else if (s.getType() == Schema.Type.UNION) {
for (Schema type : s.getTypes()) {
if (type.getType() == Schema.Type.NULL) {
return true;
}
}
}
return false;
}
/**
* Exception thrown when Avro conversion fails.
*/
public class AvroConversionException extends Exception {
public AvroConversionException(String string, IOException e) {
super(string, e);
}
public AvroConversionException(String string) {
super(string);
}
}
}