/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.trevni.avro; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.IdentityHashMap; import org.apache.trevni.ColumnMetaData; import org.apache.trevni.ValueType; import org.apache.trevni.TrevniRuntimeException; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; /** Utility that computes the column layout of a schema. */ class AvroColumnator { private Schema schema; private List<ColumnMetaData> columns = new ArrayList<ColumnMetaData>(); private List<Integer> arrayWidths = new ArrayList<Integer>(); public AvroColumnator(Schema schema) { this.schema = schema; columnize(null, schema, null, false); } /** Return columns for the schema. */ public ColumnMetaData[] getColumns() { return columns.toArray(new ColumnMetaData[columns.size()]); } /** Return array giving the number of columns immediately following each * column that are descendents of that column. */ public int[] getArrayWidths() { int[] result = new int[arrayWidths.size()]; int i = 0; for (Integer width : arrayWidths) result[i++] = width; return result; } private Map<Schema,Schema> seen = new IdentityHashMap<Schema,Schema>(); private void columnize(String path, Schema s, ColumnMetaData parent, boolean isArray) { if (isSimple(s)) { if (path == null) path = s.getFullName(); addColumn(path, simpleValueType(s), parent, isArray); return; } if (seen.containsKey(s)) // catch recursion throw new TrevniRuntimeException("Cannot shred recursive schemas: "+s); seen.put(s, s); switch (s.getType()) { case MAP: path = path == null ? ">" : path+">"; int start = columns.size(); ColumnMetaData p = addColumn(path, ValueType.NULL, parent, true); addColumn(p(path,"key", ""), ValueType.STRING, p, false); columnize(p(path,"value", ""), s.getValueType(), p, false); arrayWidths.set(start, columns.size()-start); // fixup with actual width break; case RECORD: for (Field field : s.getFields()) // flatten fields to columns columnize(p(path, field.name(), "#"), field.schema(), parent, isArray); break; case ARRAY: path = path == null ? "[]" : path+"[]"; addArrayColumn(path, s.getElementType(), parent); break; case UNION: for (Schema branch : s.getTypes()) // array per non-null branch if (branch.getType() != Schema.Type.NULL) addArrayColumn(p(path, branch, "/"), branch, parent); break; default: throw new TrevniRuntimeException("Unknown schema: "+s); } seen.remove(s); } private String p(String parent, Schema child, String sep) { if (child.getType() == Schema.Type.UNION) return parent; return p(parent, child.getFullName(), sep); } private String p(String parent, String child, String sep) { return parent == null ? child : parent + sep + child; } private ColumnMetaData addColumn(String path, ValueType type, ColumnMetaData parent, boolean isArray) { ColumnMetaData column = new ColumnMetaData(path, type); if (parent != null) column.setParent(parent); column.isArray(isArray); columns.add(column); arrayWidths.add(1); // placeholder return column; } private void addArrayColumn(String path, Schema element, ColumnMetaData parent) { if (path == null) path = element.getFullName(); if (isSimple(element)) { // optimize simple arrays addColumn(path, simpleValueType(element), parent, true); return; } // complex array: insert a parent column with lengths int start = columns.size(); ColumnMetaData array = addColumn(path, ValueType.NULL, parent, true); columnize(path, element, array, false); arrayWidths.set(start, columns.size()-start); // fixup with actual width } static boolean isSimple(Schema s) { switch (s.getType()) { case NULL: case BOOLEAN: case INT: case LONG: case FLOAT: case DOUBLE: case BYTES: case STRING: case ENUM: case FIXED: return true; default: return false; } } private ValueType simpleValueType(Schema s) { switch (s.getType()) { case NULL: return ValueType.NULL; case BOOLEAN:return ValueType.BOOLEAN; case INT: return ValueType.INT; case LONG: return ValueType.LONG; case FLOAT: return ValueType.FLOAT; case DOUBLE: return ValueType.DOUBLE; case BYTES: return ValueType.BYTES; case STRING: return ValueType.STRING; case ENUM: return ValueType.INT; case FIXED: return ValueType.BYTES; default: throw new TrevniRuntimeException("Unknown schema: "+s); } } }