/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.converter.filter; import java.util.List; import java.util.Map; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * A class that removes specific fields from a (possibly recursive) Avro schema. * * @author Ziyang Liu */ public class AvroSchemaFieldRemover { private static final Splitter SPLITTER_ON_COMMA = Splitter.on(',').trimResults().omitEmptyStrings(); private static final Splitter SPLITTER_ON_DOT = Splitter.on('.').trimResults().omitEmptyStrings(); private static final AvroSchemaFieldRemover DO_NOTHING_INSTANCE = new AvroSchemaFieldRemover(); private final Map<String, AvroSchemaFieldRemover> children = Maps.newHashMap(); /** * @param fieldNames Field names to be removed from the Avro schema. Contains comma-separated fully-qualified * field names, e.g., "header.memberId,mobileHeader.osVersion". */ public AvroSchemaFieldRemover(String fieldNames) { this.addChildren(fieldNames); } private AvroSchemaFieldRemover() { this(""); } private void addChildren(String fieldNames) { for (String fieldName : SPLITTER_ON_COMMA.splitToList(fieldNames)) { List<String> fieldNameComponents = SPLITTER_ON_DOT.splitToList(fieldName); if (!fieldNameComponents.isEmpty()) { this.addChildren(fieldNameComponents, 0); } } } private void addChildren(List<String> fieldNameComponents, int level) { Preconditions.checkArgument(fieldNameComponents.size() > level); if (!this.children.containsKey(fieldNameComponents.get(level))) { this.children.put(fieldNameComponents.get(level), new AvroSchemaFieldRemover()); } if (level < fieldNameComponents.size() - 1) { this.children.get(fieldNameComponents.get(level)).addChildren(fieldNameComponents, level + 1); } } /** * @param schema The Avro schema where the specified fields should be removed from. * @return A new Avro schema with the specified fields removed. */ public Schema removeFields(Schema schema) { return removeFields(schema, Maps.<String, Schema> newHashMap()); } private Schema removeFields(Schema schema, Map<String, Schema> schemaMap) { switch (schema.getType()) { case RECORD: if (schemaMap.containsKey(schema.getFullName())) { return schemaMap.get(schema.getFullName()); } return this.removeFieldsFromRecords(schema, schemaMap); case UNION: return this.removeFieldsFromUnion(schema, schemaMap); case ARRAY: return this.removeFieldsFromArray(schema, schemaMap); case MAP: return this.removeFieldsFromMap(schema, schemaMap); default: return schema; } } private Schema removeFieldsFromRecords(Schema schema, Map<String, Schema> schemaMap) { Schema newRecord = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); // Put an incomplete schema into schemaMap to avoid re-processing a recursive field. // The fields in the incomplete schema will be populated once the current schema is completely processed. schemaMap.put(schema.getFullName(), newRecord); List<Field> newFields = Lists.newArrayList(); for (Field field : schema.getFields()) { if (!this.shouldRemove(field)) { Field newField; if (this.children.containsKey(field.name())) { newField = new Field(field.name(), this.children.get(field.name()).removeFields(field.schema(), schemaMap), field.doc(), field.defaultValue()); } else { newField = new Field(field.name(), DO_NOTHING_INSTANCE.removeFields(field.schema(), schemaMap), field.doc(), field.defaultValue()); } newFields.add(newField); } } newRecord.setFields(newFields); return newRecord; } private boolean shouldRemove(Field field) { // A field should be removed if it is the last component in a specified field name, // e.g., "memberId" in "header.memberId". return this.children.containsKey(field.name()) && this.children.get(field.name()).children.isEmpty(); } private Schema removeFieldsFromUnion(Schema schema, Map<String, Schema> schemaMap) { List<Schema> newUnion = Lists.newArrayList(); for (Schema unionType : schema.getTypes()) { newUnion.add(this.removeFields(unionType, schemaMap)); } return Schema.createUnion(newUnion); } private Schema removeFieldsFromArray(Schema schema, Map<String, Schema> schemaMap) { return Schema.createArray(this.removeFields(schema.getElementType(), schemaMap)); } private Schema removeFieldsFromMap(Schema schema, Map<String, Schema> schemaMap) { return Schema.createMap(this.removeFields(schema.getValueType(), schemaMap)); } }