/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.test.rowSet; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; import org.apache.drill.exec.record.MaterializedField; /** * Row set schema presented as a number of distinct "views" for various * purposes: * <ul> * <li>Batch schema: the schema used by a VectorContainer.</li> * <li>Physical schema: the schema expressed as a hierarchy of * tuples with the top tuple representing the row, nested tuples * representing maps.</li> * <li>Access schema: a flattened schema with all scalar columns * at the top level, and with map columns pulled out into a separate * collection. The flattened-scalar view is the one used to write to, * and read from, the row set.</li> * </ul> * Allows easy creation of multiple row sets from the same schema. * Each schema is immutable, which is fine for tests in which we * want known inputs and outputs. */ public class RowSetSchema { /** * Logical description of a column. A logical column is a * materialized field. For maps, also includes a logical schema * of the map. */ public static class LogicalColumn { protected final String fullName; protected final int accessIndex; protected int flatIndex; protected final MaterializedField field; /** * Schema of the map. Includes only those fields directly within * the map; does not include fields from nested tuples. */ protected PhysicalSchema mapSchema; public LogicalColumn(String fullName, int accessIndex, MaterializedField field) { this.fullName = fullName; this.accessIndex = accessIndex; this.field = field; } private void updateStructure(int index, PhysicalSchema children) { flatIndex = index; mapSchema = children; } public int accessIndex() { return accessIndex; } public int flatIndex() { return flatIndex; } public boolean isMap() { return mapSchema != null; } public PhysicalSchema mapSchema() { return mapSchema; } public MaterializedField field() { return field; } public String fullName() { return fullName; } } /** * Implementation of a tuple name space. Tuples allow both indexed and * named access to their members. * * @param <T> the type of object representing each column */ public static class NameSpace<T> { private final Map<String,Integer> nameSpace = new HashMap<>(); private final List<T> columns = new ArrayList<>(); public int add(String key, T value) { int index = columns.size(); nameSpace.put(key, index); columns.add(value); return index; } public T get(int index) { return columns.get(index); } public T get(String key) { int index = getIndex(key); if (index == -1) { return null; } return get(index); } public int getIndex(String key) { Integer index = nameSpace.get(key); if (index == null) { return -1; } return index; } public int count() { return columns.size(); } } /** * Provides a non-flattened, physical view of the schema. The top-level * row includes maps, maps expand to a nested tuple schema. This view * corresponds, more-or-less, to the physical storage of vectors in * a vector accessible or vector container. */ private static class TupleSchemaImpl implements TupleSchema { private NameSpace<LogicalColumn> columns; public TupleSchemaImpl(NameSpace<LogicalColumn> ns) { this.columns = ns; } @Override public MaterializedField column(int index) { return logicalColumn(index).field(); } public LogicalColumn logicalColumn(int index) { return columns.get(index); } @Override public MaterializedField column(String name) { LogicalColumn col = columns.get(name); return col == null ? null : col.field(); } @Override public int columnIndex(String name) { return columns.getIndex(name); } @Override public int count() { return columns.count(); } } /** * Represents the flattened view of the schema used to get and set columns. * Represents a left-to-right, depth-first traversal of the row and map * columns. Holds only materialized vectors (non-maps). For completeness, * provides access to maps also via separate methods, but this is generally * of little use. */ public static class FlattenedSchema extends TupleSchemaImpl { protected final TupleSchemaImpl maps; public FlattenedSchema(NameSpace<LogicalColumn> cols, NameSpace<LogicalColumn> maps) { super(cols); this.maps = new TupleSchemaImpl(maps); } public LogicalColumn logicalMap(int index) { return maps.logicalColumn(index); } public MaterializedField map(int index) { return maps.column(index); } public MaterializedField map(String name) { return maps.column(name); } public int mapIndex(String name) { return maps.columnIndex(name); } public int mapCount() { return maps.count(); } } /** * Physical schema of a row set showing the logical hierarchy of fields * with map fields as first-class fields. Map members appear as children * under the map, much as they appear in the physical value-vector * implementation. */ public static class PhysicalSchema { protected final NameSpace<LogicalColumn> schema = new NameSpace<>(); public LogicalColumn column(int index) { return schema.get(index); } public LogicalColumn column(String name) { return schema.get(name); } public int count() { return schema.count(); } public NameSpace<LogicalColumn> nameSpace() { return schema; } } private static class SchemaExpander { private final PhysicalSchema physicalSchema; private final NameSpace<LogicalColumn> cols = new NameSpace<>(); private final NameSpace<LogicalColumn> maps = new NameSpace<>(); public SchemaExpander(BatchSchema schema) { physicalSchema = expand("", schema); } private PhysicalSchema expand(String prefix, Iterable<MaterializedField> fields) { PhysicalSchema physical = new PhysicalSchema(); for (MaterializedField field : fields) { String name = prefix + field.getName(); int index; LogicalColumn colSchema = new LogicalColumn(name, physical.count(), field); physical.schema.add(field.getName(), colSchema); PhysicalSchema children = null; if (field.getType().getMinorType() == MinorType.MAP) { index = maps.add(name, colSchema); children = expand(name + ".", field.getChildren()); } else { index = cols.add(name, colSchema); } colSchema.updateStructure(index, children); } return physical; } } private final BatchSchema batchSchema; private final TupleSchemaImpl accessSchema; private final FlattenedSchema flatSchema; private final PhysicalSchema physicalSchema; public RowSetSchema(BatchSchema schema) { batchSchema = schema; SchemaExpander expander = new SchemaExpander(schema); physicalSchema = expander.physicalSchema; accessSchema = new TupleSchemaImpl(physicalSchema.nameSpace()); flatSchema = new FlattenedSchema(expander.cols, expander.maps); } /** * A hierarchical schema that includes maps, with maps expanding * to a nested tuple schema. Not used at present; this is intended * to be the bases of non-flattened accessors if we find the need. * @return the hierarchical access schema */ public TupleSchema hierarchicalAccess() { return accessSchema; } /** * A flattened (left-to-right, depth-first traversal) of the non-map * columns in the row. Used to define the column indexes in the * get methods for row readers and the set methods for row writers. * @return the flattened access schema */ public FlattenedSchema flatAccess() { return flatSchema; } /** * Internal physical schema in hierarchical order. Mostly used to create * the other schemas, but may be of use in special cases. Has the same * structure as the batch schema, but with additional information. * @return a tree-structured physical schema */ public PhysicalSchema physical() { return physicalSchema; } /** * The batch schema used by the Drill runtime. Represents a tree-structured * list of top-level fields, including maps. Maps contain a nested schema. * @return the batch schema used by the Drill runtime */ public BatchSchema batch() { return batchSchema; } /** * Convert this schema to a new batch schema that includes the specified * selection vector mode. * @param svMode selection vector mode for the new schema * @return the new batch schema */ public BatchSchema toBatchSchema(SelectionVectorMode svMode) { List<MaterializedField> fields = new ArrayList<>(); for (MaterializedField field : batchSchema) { fields.add(field); } return new BatchSchema(svMode, fields); } }