/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.serialization.record.util; import java.math.BigInteger; import java.sql.Date; import java.sql.Time; import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.TimeZone; import java.util.function.Consumer; import java.util.function.Supplier; import org.apache.nifi.serialization.SimpleRecordSchema; import org.apache.nifi.serialization.record.DataType; import org.apache.nifi.serialization.record.MapRecord; import org.apache.nifi.serialization.record.Record; import org.apache.nifi.serialization.record.RecordField; import org.apache.nifi.serialization.record.RecordFieldType; import org.apache.nifi.serialization.record.RecordSchema; import org.apache.nifi.serialization.record.type.ChoiceDataType; import org.apache.nifi.serialization.record.type.RecordDataType; public class DataTypeUtils { private static final TimeZone gmt = TimeZone.getTimeZone("gmt"); public static Object convertType(final Object value, final DataType dataType, final String fieldName) { return convertType(value, dataType, () -> getDateFormat(RecordFieldType.DATE.getDefaultFormat()), () -> getDateFormat(RecordFieldType.TIME.getDefaultFormat()), () -> getDateFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()), fieldName); } public static DateFormat getDateFormat(final RecordFieldType fieldType, final Supplier<DateFormat> dateFormat, final Supplier<DateFormat> timeFormat, final Supplier<DateFormat> timestampFormat) { switch (fieldType) { case DATE: return dateFormat.get(); case TIME: return timeFormat.get(); case TIMESTAMP: return timestampFormat.get(); } return null; } public static Object convertType(final Object value, final DataType dataType, final Supplier<DateFormat> dateFormat, final Supplier<DateFormat> timeFormat, final Supplier<DateFormat> timestampFormat, final String fieldName) { if (value == null) { return null; } switch (dataType.getFieldType()) { case BIGINT: return toBigInt(value, fieldName); case BOOLEAN: return toBoolean(value, fieldName); case BYTE: return toByte(value, fieldName); case CHAR: return toCharacter(value, fieldName); case DATE: return toDate(value, dateFormat, fieldName); case DOUBLE: return toDouble(value, fieldName); case FLOAT: return toFloat(value, fieldName); case INT: return toInteger(value, fieldName); case LONG: return toLong(value, fieldName); case SHORT: return toShort(value, fieldName); case STRING: return toString(value, () -> getDateFormat(dataType.getFieldType(), dateFormat, timeFormat, timestampFormat)); case TIME: return toTime(value, timeFormat, fieldName); case TIMESTAMP: return toTimestamp(value, timestampFormat, fieldName); case ARRAY: return toArray(value, fieldName); case MAP: return toMap(value, fieldName); case RECORD: final RecordDataType recordType = (RecordDataType) dataType; final RecordSchema childSchema = recordType.getChildSchema(); return toRecord(value, childSchema, fieldName); case CHOICE: { final ChoiceDataType choiceDataType = (ChoiceDataType) dataType; final DataType chosenDataType = chooseDataType(value, choiceDataType); if (chosenDataType == null) { throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " for field " + fieldName + " to any of the following available Sub-Types for a Choice: " + choiceDataType.getPossibleSubTypes()); } return convertType(value, chosenDataType, fieldName); } } return null; } public static boolean isCompatibleDataType(final Object value, final DataType dataType) { switch (dataType.getFieldType()) { case ARRAY: return isArrayTypeCompatible(value); case BIGINT: return isBigIntTypeCompatible(value); case BOOLEAN: return isBooleanTypeCompatible(value); case BYTE: return isByteTypeCompatible(value); case CHAR: return isCharacterTypeCompatible(value); case DATE: return isDateTypeCompatible(value, dataType.getFormat()); case DOUBLE: return isDoubleTypeCompatible(value); case FLOAT: return isFloatTypeCompatible(value); case INT: return isIntegerTypeCompatible(value); case LONG: return isLongTypeCompatible(value); case RECORD: return isRecordTypeCompatible(value); case SHORT: return isShortTypeCompatible(value); case TIME: return isTimeTypeCompatible(value, dataType.getFormat()); case TIMESTAMP: return isTimestampTypeCompatible(value, dataType.getFormat()); case STRING: return isStringTypeCompatible(value); case MAP: return isMapTypeCompatible(value); case CHOICE: { final DataType chosenDataType = chooseDataType(value, (ChoiceDataType) dataType); return chosenDataType != null; } } return false; } public static DataType chooseDataType(final Object value, final ChoiceDataType choiceType) { for (final DataType subType : choiceType.getPossibleSubTypes()) { if (isCompatibleDataType(value, subType)) { if (subType.getFieldType() == RecordFieldType.CHOICE) { return chooseDataType(value, (ChoiceDataType) subType); } return subType; } } return null; } public static Record toRecord(final Object value, final RecordSchema recordSchema, final String fieldName) { if (value == null) { return null; } if (value instanceof Record) { return ((Record) value); } if (value instanceof Map) { if (recordSchema == null) { throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Record for field " + fieldName + " because the value is a Map but no Record Schema was provided"); } final Map<?, ?> map = (Map<?, ?>) value; final Map<String, Object> coercedValues = new HashMap<>(); for (final Map.Entry<?, ?> entry : map.entrySet()) { final Object keyValue = entry.getKey(); if (keyValue == null) { continue; } final String key = keyValue.toString(); final Optional<DataType> desiredTypeOption = recordSchema.getDataType(key); if (!desiredTypeOption.isPresent()) { continue; } final Object rawValue = entry.getValue(); final Object coercedValue = convertType(rawValue, desiredTypeOption.get(), fieldName); coercedValues.put(key, coercedValue); } return new MapRecord(recordSchema, coercedValues); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Record for field " + fieldName); } public static boolean isRecordTypeCompatible(final Object value) { return value != null && value instanceof Record; } public static Object[] toArray(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Object[]) { return (Object[]) value; } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Object Array for field " + fieldName); } public static boolean isArrayTypeCompatible(final Object value) { return value != null && value instanceof Object[]; } @SuppressWarnings("unchecked") public static Map<String, Object> toMap(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Map) { final Map<?, ?> original = (Map<?, ?>) value; boolean keysAreStrings = true; for (final Object key : original.keySet()) { if (!(key instanceof String)) { keysAreStrings = false; } } if (keysAreStrings) { return (Map<String, Object>) value; } final Map<String, Object> transformed = new HashMap<>(); for (final Map.Entry<?, ?> entry : original.entrySet()) { final Object key = entry.getKey(); if (key == null) { transformed.put(null, entry.getValue()); } else { transformed.put(key.toString(), entry.getValue()); } } return transformed; } if (value instanceof Record) { final Record record = (Record) value; final RecordSchema recordSchema = record.getSchema(); if (recordSchema == null) { throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type Record to Map for field " + fieldName + " because Record does not have an associated Schema"); } final Map<String, Object> map = new HashMap<>(); for (final String recordFieldName : recordSchema.getFieldNames()) { map.put(recordFieldName, record.getValue(recordFieldName)); } return map; } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Map for field " + fieldName); } public static boolean isMapTypeCompatible(final Object value) { return value != null && value instanceof Map; } public static String toString(final Object value, final Supplier<DateFormat> format) { if (value == null) { return null; } if (value instanceof String) { return (String) value; } if (format == null && value instanceof java.util.Date) { return String.valueOf(((java.util.Date) value).getTime()); } if (value instanceof java.util.Date) { return formatDate((java.util.Date) value, format); } return value.toString(); } private static String formatDate(final java.util.Date date, final Supplier<DateFormat> formatSupplier) { final DateFormat dateFormat = formatSupplier.get(); if (dateFormat == null) { return String.valueOf((date).getTime()); } return dateFormat.format(date); } public static String toString(final Object value, final String format) { if (value == null) { return null; } if (value instanceof String) { return (String) value; } if (format == null && value instanceof java.util.Date) { return String.valueOf(((java.util.Date) value).getTime()); } if (value instanceof java.sql.Date) { return getDateFormat(format).format((java.util.Date) value); } if (value instanceof java.sql.Time) { return getDateFormat(format).format((java.util.Date) value); } if (value instanceof java.sql.Timestamp) { return getDateFormat(format).format((java.util.Date) value); } if (value instanceof java.util.Date) { return getDateFormat(format).format((java.util.Date) value); } if (value instanceof Object[]) { return Arrays.toString((Object[]) value); } return value.toString(); } public static boolean isStringTypeCompatible(final Object value) { return value != null; } public static java.sql.Date toDate(final Object value, final Supplier<DateFormat> format, final String fieldName) { if (value == null) { return null; } if (value instanceof Date) { return (Date) value; } if (value instanceof Number) { final long longValue = ((Number) value).longValue(); return new Date(longValue); } if (value instanceof String) { try { final String string = ((String) value).trim(); if (string.isEmpty()) { return null; } if (format == null) { return new Date(Long.parseLong(string)); } final DateFormat dateFormat = format.get(); if (dateFormat == null) { return new Date(Long.parseLong(string)); } final java.util.Date utilDate = dateFormat.parse(string); return new Date(utilDate.getTime()); } catch (final ParseException | NumberFormatException e) { throw new IllegalTypeConversionException("Could not convert value [" + value + "] of type java.lang.String to Date because the value is not in the expected date format: " + format + " for field " + fieldName); } } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Date for field " + fieldName); } public static boolean isDateTypeCompatible(final Object value, final String format) { if (value == null) { return false; } if (value instanceof java.util.Date || value instanceof Number) { return true; } if (value instanceof String) { if (format == null) { return isInteger((String) value); } try { getDateFormat(format).parse((String) value); return true; } catch (final ParseException e) { return false; } } return false; } private static boolean isInteger(final String value) { if (value == null || value.isEmpty()) { return false; } for (int i = 0; i < value.length(); i++) { if (!Character.isDigit(value.charAt(i))) { return false; } } return true; } public static Time toTime(final Object value, final Supplier<DateFormat> format, final String fieldName) { if (value == null) { return null; } if (value instanceof Time) { return (Time) value; } if (value instanceof Number) { final long longValue = ((Number) value).longValue(); return new Time(longValue); } if (value instanceof String) { try { final String string = ((String) value).trim(); if (string.isEmpty()) { return null; } if (format == null) { return new Time(Long.parseLong(string)); } final DateFormat dateFormat = format.get(); if (dateFormat == null) { return new Time(Long.parseLong(string)); } final java.util.Date utilDate = dateFormat.parse(string); return new Time(utilDate.getTime()); } catch (final ParseException e) { throw new IllegalTypeConversionException("Could not convert value [" + value + "] of type java.lang.String to Time for field " + fieldName + " because the value is not in the expected date format: " + format); } } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Time for field " + fieldName); } public static DateFormat getDateFormat(final String format) { if (format == null) { return null; } final DateFormat df = new SimpleDateFormat(format); df.setTimeZone(gmt); return df; } public static boolean isTimeTypeCompatible(final Object value, final String format) { return isDateTypeCompatible(value, format); } public static Timestamp toTimestamp(final Object value, final Supplier<DateFormat> format, final String fieldName) { if (value == null) { return null; } if (value instanceof Timestamp) { return (Timestamp) value; } if (value instanceof Number) { final long longValue = ((Number) value).longValue(); return new Timestamp(longValue); } if (value instanceof String) { try { final String string = ((String) value).trim(); if (string.isEmpty()) { return null; } if (format == null) { return new Timestamp(Long.parseLong(string)); } final DateFormat dateFormat = format.get(); if (dateFormat == null) { return new Timestamp(Long.parseLong(string)); } final java.util.Date utilDate = dateFormat.parse(string); return new Timestamp(utilDate.getTime()); } catch (final ParseException e) { throw new IllegalTypeConversionException("Could not convert value [" + value + "] of type java.lang.String to Timestamp for field " + fieldName + " because the value is not in the expected date format: " + format); } } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Timestamp for field " + fieldName); } public static boolean isTimestampTypeCompatible(final Object value, final String format) { return isDateTypeCompatible(value, format); } public static BigInteger toBigInt(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof BigInteger) { return (BigInteger) value; } if (value instanceof Long) { return BigInteger.valueOf((Long) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to BigInteger for field " + fieldName); } public static boolean isBigIntTypeCompatible(final Object value) { return value == null && (value instanceof BigInteger || value instanceof Long); } public static Boolean toBoolean(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Boolean) { return (Boolean) value; } if (value instanceof String) { final String string = (String) value; if (string.equalsIgnoreCase("true")) { return Boolean.TRUE; } else if (string.equalsIgnoreCase("false")) { return Boolean.FALSE; } } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Boolean for field " + fieldName); } public static boolean isBooleanTypeCompatible(final Object value) { if (value == null) { return false; } if (value instanceof Boolean) { return true; } if (value instanceof String) { final String string = (String) value; return string.equalsIgnoreCase("true") || string.equalsIgnoreCase("false"); } return false; } public static Double toDouble(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).doubleValue(); } if (value instanceof String) { return Double.parseDouble((String) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Double for field " + fieldName); } public static boolean isDoubleTypeCompatible(final Object value) { return isNumberTypeCompatible(value, s -> Double.parseDouble(s)); } private static boolean isNumberTypeCompatible(final Object value, final Consumer<String> stringValueVerifier) { if (value == null) { return false; } if (value instanceof Number) { return true; } if (value instanceof String) { try { stringValueVerifier.accept((String) value); return true; } catch (final NumberFormatException nfe) { return false; } } return false; } public static Float toFloat(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).floatValue(); } if (value instanceof String) { return Float.parseFloat((String) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Float for field " + fieldName); } public static boolean isFloatTypeCompatible(final Object value) { return isNumberTypeCompatible(value, s -> Float.parseFloat(s)); } public static Long toLong(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).longValue(); } if (value instanceof String) { return Long.parseLong((String) value); } if (value instanceof java.util.Date) { return ((java.util.Date) value).getTime(); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Long for field " + fieldName); } public static boolean isLongTypeCompatible(final Object value) { if (value == null) { return false; } if (value instanceof Number) { return true; } if (value instanceof java.util.Date) { return true; } if (value instanceof String) { try { Long.parseLong((String) value); return true; } catch (final NumberFormatException nfe) { return false; } } return false; } public static Integer toInteger(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).intValue(); } if (value instanceof String) { return Integer.parseInt((String) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Integer for field " + fieldName); } public static boolean isIntegerTypeCompatible(final Object value) { return isNumberTypeCompatible(value, s -> Integer.parseInt(s)); } public static Short toShort(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).shortValue(); } if (value instanceof String) { return Short.parseShort((String) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Short for field " + fieldName); } public static boolean isShortTypeCompatible(final Object value) { return isNumberTypeCompatible(value, s -> Short.parseShort(s)); } public static Byte toByte(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Number) { return ((Number) value).byteValue(); } if (value instanceof String) { return Byte.parseByte((String) value); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Byte for field " + fieldName); } public static boolean isByteTypeCompatible(final Object value) { return isNumberTypeCompatible(value, s -> Byte.parseByte(s)); } public static Character toCharacter(final Object value, final String fieldName) { if (value == null) { return null; } if (value instanceof Character) { return ((Character) value); } if (value instanceof CharSequence) { final CharSequence charSeq = (CharSequence) value; if (charSeq.length() == 0) { throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Character because it has a length of 0 for field " + fieldName); } return charSeq.charAt(0); } throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Character for field " + fieldName); } public static boolean isCharacterTypeCompatible(final Object value) { return value != null && (value instanceof Character || (value instanceof CharSequence && ((CharSequence) value).length() > 0)); } public static RecordSchema merge(final RecordSchema thisSchema, final RecordSchema otherSchema) { if (thisSchema == null) { return otherSchema; } if (otherSchema == null) { return thisSchema; } final List<RecordField> otherFields = otherSchema.getFields(); if (otherFields.isEmpty()) { return thisSchema; } final List<RecordField> thisFields = thisSchema.getFields(); if (thisFields.isEmpty()) { return otherSchema; } final Map<String, Integer> fieldIndices = new HashMap<>(); final List<RecordField> fields = new ArrayList<>(); for (int i = 0; i < thisFields.size(); i++) { final RecordField field = thisFields.get(i); final Integer index = Integer.valueOf(i); fieldIndices.put(field.getFieldName(), index); for (final String alias : field.getAliases()) { fieldIndices.put(alias, index); } fields.add(field); } for (final RecordField otherField : otherFields) { Integer fieldIndex = fieldIndices.get(otherField.getFieldName()); // Find the field in 'thisSchema' that corresponds to 'otherField', // if one exists. if (fieldIndex == null) { for (final String alias : otherField.getAliases()) { fieldIndex = fieldIndices.get(alias); if (fieldIndex != null) { break; } } } // If there is no field with the same name then just add 'otherField'. if (fieldIndex == null) { fields.add(otherField); continue; } // Merge the two fields, if necessary final RecordField thisField = fields.get(fieldIndex); if (isMergeRequired(thisField, otherField)) { final RecordField mergedField = merge(thisField, otherField); fields.set(fieldIndex, mergedField); } } return new SimpleRecordSchema(fields); } private static boolean isMergeRequired(final RecordField thisField, final RecordField otherField) { if (!thisField.getDataType().equals(otherField.getDataType())) { return true; } if (!thisField.getAliases().equals(otherField.getAliases())) { return true; } if (!Objects.equals(thisField.getDefaultValue(), otherField.getDefaultValue())) { return true; } return false; } public static RecordField merge(final RecordField thisField, final RecordField otherField) { final String fieldName = thisField.getFieldName(); final Set<String> aliases = new HashSet<>(); aliases.addAll(thisField.getAliases()); aliases.addAll(otherField.getAliases()); final Object defaultValue; if (thisField.getDefaultValue() == null && otherField.getDefaultValue() != null) { defaultValue = otherField.getDefaultValue(); } else { defaultValue = thisField.getDefaultValue(); } final DataType dataType; if (thisField.getDataType().equals(otherField.getDataType())) { dataType = thisField.getDataType(); } else { dataType = RecordFieldType.CHOICE.getChoiceDataType(thisField.getDataType(), otherField.getDataType()); } return new RecordField(fieldName, dataType, defaultValue, aliases); } public static boolean isScalarValue(final DataType dataType, final Object value) { final RecordFieldType fieldType = dataType.getFieldType(); final RecordFieldType chosenType; if (fieldType == RecordFieldType.CHOICE) { final ChoiceDataType choiceDataType = (ChoiceDataType) dataType; final DataType chosenDataType = chooseDataType(value, choiceDataType); if (chosenDataType == null) { return false; } chosenType = chosenDataType.getFieldType(); } else { chosenType = fieldType; } switch (chosenType) { case ARRAY: case MAP: case RECORD: return false; } return true; } }