/* * Copyright 2016 Hortonworks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hortonworks.registries.schemaregistry.avro; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.hortonworks.registries.schemaregistry.SchemaResolver; import com.hortonworks.registries.schemaregistry.SchemaVersionKey; import com.hortonworks.registries.schemaregistry.SchemaVersionRetriever; import com.hortonworks.registries.schemaregistry.errors.CyclicSchemaDependencyException; import com.hortonworks.registries.schemaregistry.errors.InvalidSchemaException; import com.hortonworks.registries.schemaregistry.errors.SchemaNotFoundException; import org.apache.avro.Schema; import org.codehaus.jackson.node.NullNode; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import static org.apache.avro.Schema.Type.RECORD; /** * Avro implementation of {@link SchemaResolver} which resolves all the dependent schemas and builds an effective schema. * * List of dependent schemas can be added with `includeSchemas` attribute in avro schema. This contains name and version * of each schema as mentioned below. * - name : unique name of that schema in schema registry which is {@link com.hortonworks.registries.schemaregistry.SchemaMetadata#name}. * - version : version number of the schema being used which is {@link SchemaVersionKey#version}. * When this property is not mentioned then it is considered to be latest version of that schema when * it builds effective schema. * * Example of Avro schema containing dependencies is given below. * * account schema: * <pre> {@code * { "name": "account", "namespace": "com.hortonworks.types", "includeSchemas": [ { "name": "utils", "version": "2", } ], "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "id", "type": "com.hortonworks.datatypes.uuid" } ] } * }</pre> * * dependent utils schema: * <pre> {@code * { "name": "uuid", "type": "record", "namespace": "com.hortonworks.datatypes", "doc": "A Universally Unique Identifier, in canonical form in lowercase. This is generated from java.util.UUID Example: de305d54-75b4-431b-adb2-eb6b9e546014", "fields": [ { "name": "value", "type": "string", "default": "" } ] } * * }</pre> * */ public class AvroSchemaResolver implements SchemaResolver { private enum SchemaParsingState { PARSING, PARSED } private final SchemaVersionRetriever schemaVersionRetriever; public AvroSchemaResolver(SchemaVersionRetriever schemaVersionRetriever) { this.schemaVersionRetriever = schemaVersionRetriever; } @Override public String resolveSchema(SchemaVersionKey schemaVersionKey) throws InvalidSchemaException, SchemaNotFoundException { Map<String, SchemaParsingState> schemaParsingStates = new HashMap<>(); schemaParsingStates.put(schemaVersionKey.getSchemaName(), SchemaParsingState.PARSING); return getResultantSchema(schemaVersionKey, schemaParsingStates); } public String resolveSchema(String schemaText) throws InvalidSchemaException, SchemaNotFoundException { Map<String, SchemaParsingState> schemaParsingStates = new HashMap<>(); return getResultantSchema(schemaText, schemaParsingStates); } private String getResultantSchema(SchemaVersionKey schemaVersionKey, Map<String, SchemaParsingState> schemaParsingStates) throws InvalidSchemaException, SchemaNotFoundException { String schemaText = schemaVersionRetriever.retrieveSchemaVersion(schemaVersionKey).getSchemaText(); return getResultantSchema(schemaText, schemaParsingStates); } private String getResultantSchema(String schemaText, Map<String, SchemaParsingState> schemaParsingStates) throws InvalidSchemaException, SchemaNotFoundException { Map<String, Schema> complexTypes = traverseIncludedSchemaTypes(schemaText, schemaParsingStates); Schema.Parser parser = new Schema.Parser(); parser.addTypes(complexTypes); Schema schema = parser.parse(schemaText); Set<String> visitingTypes = new HashSet<>(); Schema updatedSchema = handleUnionFieldsWithNull(schema, visitingTypes); return (schema == updatedSchema && complexTypes.isEmpty()) ? schemaText : updatedSchema.toString(); } public Schema handleUnionFieldsWithNull(Schema schema, Set<String> visitingTypes) { if (visitingTypes.contains(schema.getFullName())) { return schema; } visitingTypes.add(schema.getFullName()); Schema updatedRootSchema = schema; if (schema.getType() == RECORD) { List<Schema.Field> fields = updatedRootSchema.getFields(); List<Schema.Field> updatedFields = new ArrayList<>(fields.size()); boolean hasUnionType = false; for (Schema.Field field : fields) { Schema fieldSchema = field.schema(); // check for union boolean currentFieldTypeIsUnion = fieldSchema.getType() == Schema.Type.UNION; if (currentFieldTypeIsUnion) { // check for the fields with in union // if it is union and first type is null then set default value as null if (fieldSchema.getTypes().get(0).getType() == Schema.Type.NULL) { hasUnionType = true; } } else { // go through non-union fields, which may be records Schema updatedFieldSchema = handleUnionFieldsWithNull(fieldSchema, visitingTypes); if (fieldSchema != updatedFieldSchema) { hasUnionType = true; } } updatedFields.add(new Schema.Field(field.name(), fieldSchema, field.doc(), currentFieldTypeIsUnion ? NullNode.getInstance() : null, field.order())); } if (hasUnionType) { updatedRootSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); updatedRootSchema.setFields(updatedFields); for (String alias : schema.getAliases()) { updatedRootSchema.addAlias(alias); } for (Map.Entry<String, org.codehaus.jackson.JsonNode> nodeEntry : schema.getJsonProps().entrySet()) { updatedRootSchema.addProp(nodeEntry.getKey(), nodeEntry.getValue()); } } } return updatedRootSchema; } private Schema updateUnionFields(Schema schema) { Schema updatedSchema = schema; List<Schema.Field> fields = schema.getFields(); boolean hasUnionType = false; List<Schema.Field> updatedFields = new ArrayList<>(fields.size()); for (Schema.Field field : fields) { Schema fieldSchema = field.schema(); Schema.Field updatedField = field; // if it is union and first type is null then set default value as null if (fieldSchema.getType() == Schema.Type.UNION && fieldSchema.getTypes().get(0).getType() == Schema.Type.NULL) { updatedField = new Schema.Field(field.name(), fieldSchema, field.doc(), NullNode.getInstance(), field.order()); hasUnionType = true; } updatedFields.add(updatedField); } if (hasUnionType) { updatedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); updatedSchema.setFields(updatedFields); for (String alias : schema.getAliases()) { updatedSchema.addAlias(alias); } for (Map.Entry<String, org.codehaus.jackson.JsonNode> nodeEntry : schema.getJsonProps().entrySet()) { updatedSchema.addProp(nodeEntry.getKey(), nodeEntry.getValue()); } } return updatedSchema; } private Map<String, Schema> traverseIncludedSchemaTypes(String schemaText, Map<String, SchemaParsingState> schemaParsingStates) throws InvalidSchemaException, SchemaNotFoundException { List<SchemaVersionKey> includedSchemaVersions = getIncludedSchemaVersions(schemaText); if (includedSchemaVersions == null || includedSchemaVersions.isEmpty()) { return Collections.emptyMap(); } Map<String, Schema> schemaTypes = new HashMap<>(); for (SchemaVersionKey schemaVersionKey : includedSchemaVersions) { Map<String, Schema> collectedSchemas = collectSchemaTypes(schemaVersionKey, schemaParsingStates); if (collectedSchemas != null) { schemaTypes.putAll(collectedSchemas); } } return schemaTypes; } private Map<String, Schema> collectSchemaTypes(SchemaVersionKey schemaVersionKey, Map<String, SchemaParsingState> schemaParsingStates) throws SchemaNotFoundException, InvalidSchemaException { String schemaName = schemaVersionKey.getSchemaName(); SchemaParsingState schemaParsingState = schemaParsingStates.putIfAbsent(schemaName, SchemaParsingState.PARSING); // if it is already parsed then the respective schema types would have been already collected. if (SchemaParsingState.PARSED == schemaParsingState) { return null; } // if it is in parsing state earlier and it is visted again then ther eis circular dependency!! if (SchemaParsingState.PARSING == schemaParsingState) { throw new CyclicSchemaDependencyException("Cyclic dependency of schema imports with schema [" + schemaName + "]"); } // this schema is not yet parsed till now if (schemaParsingState == null) { Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse(getResultantSchema(schemaVersionKey, schemaParsingStates)); Map<String, Schema> complexTypes = new HashMap<>(); collectComplexTypes(schema, complexTypes); schemaParsingStates.put(schemaName, SchemaParsingState.PARSED); return complexTypes; } throw new IllegalStateException("Schema parsing with schema version " + schemaVersionKey + " is in invalid state!!"); } private void collectComplexTypes(Schema schema, Map<String, Schema> complexTypes) { switch (schema.getType()) { case RECORD: complexTypes.put(schema.getFullName(), schema); List<Schema.Field> fields = schema.getFields(); for (Schema.Field field : fields) { Schema fieldSchema = field.schema(); collectComplexTypes(fieldSchema, complexTypes); } break; case ARRAY: complexTypes.put(schema.getFullName(), schema); collectComplexTypes(schema.getElementType(), complexTypes); break; case UNION: complexTypes.put(schema.getFullName(), schema); List<Schema> unionSchemas = schema.getTypes(); for (Schema schemaEntry : unionSchemas) { collectComplexTypes(schemaEntry, complexTypes); } break; case MAP: complexTypes.put(schema.getFullName(), schema); collectComplexTypes(schema.getValueType(), complexTypes); break; default: } } private List<SchemaVersionKey> getIncludedSchemaVersions(String schemaText) throws InvalidSchemaException { JsonNode jsonNode = null; try { jsonNode = new ObjectMapper().readTree(schemaText); } catch (IOException e) { throw new InvalidSchemaException(e); } JsonNode includeSchemaNodes = jsonNode.get("includeSchemas"); List<SchemaVersionKey> includedSchemaVersions = new ArrayList<>(); if (includeSchemaNodes != null) { if (!includeSchemaNodes.isArray()) { throw new InvalidSchemaException("includeSchemas should be an array of strings"); } for (JsonNode includeSchema : includeSchemaNodes) { String name = includeSchema.get("name").asText(); JsonNode versionNode = includeSchema.get("version"); int version = versionNode != null ? versionNode.asInt() : SchemaVersionKey.LATEST_VERSION; includedSchemaVersions.add(new SchemaVersionKey(name, version)); } } return includedSchemaVersions; } }