/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.test.rowSet; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.record.HyperVectorWrapper; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.record.selection.SelectionVector4; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.ColumnReader; import org.apache.drill.exec.vector.accessor.ColumnWriter; import org.apache.drill.exec.vector.accessor.TupleReader; import org.apache.drill.exec.vector.accessor.TupleWriter; /** * A row set is a collection of rows stored as value vectors. Elsewhere in * Drill we call this a "record batch", but that term has been overloaded to * mean the runtime implementation of an operator... * <p> * A row set encapsulates a set of vectors and provides access to Drill's * various "views" of vectors: {@link VectorContainer}, * {@link VectorAccessible}, etc. * <p> * A row set is defined by a {@link RowSetSchema}. For testing purposes, a row * set has a fixed schema; we don't allow changing the set of vectors * dynamically. * <p> * The row set also provides a simple way to write and read records using the * {@link RowSetWriter} and {@link RowSetReader} interfaces. As per Drill * conventions, a row set can be written (once), read many times, and finally * cleared. * <p> * Drill provides a large number of vector (data) types. Each requires a * type-specific way to set data. The row set writer uses a {@link ColumnWriter} * to set each value in a way unique to the specific data type. Similarly, the * row set reader provides a {@link ColumnReader} interface. In both cases, * columns can be accessed by index number (as defined in the schema) or * by name. * <p> * A row set follows a schema. The schema starts as a * {@link BatchSchema}, but is parsed and restructured into a variety of * forms. In the original form, maps contain their value vectors. In the * flattened form, all vectors for all maps (and the top-level tuple) are * collected into a single structure. Since this structure is for testing, * this somewhat-static structure works just file; we don't need the added * complexity that comes from building the schema and data dynamically. * <p> * Putting this all together, the typical life-cycle flow is: * <ul> * <li>Define the schema using {@link RowSetSchema#builder()}.</li> * <li>Create the row set from the schema.</li> * <li>Populate the row set using a writer from {@link #writer(int)}.</li> * <li>Optionally add a selection vector: {@link #makeSv2()}.</li> * <li>Process the vector container using the code under test.</li> * <li>Retrieve the results using a reader from {@link #reader()}.</li> * <li>Dispose of vector memory with {@link #clear()}.</li> * </ul> */ public interface RowSet { /** * Interface for writing values to a row set. Only available * for newly-created, single, direct row sets. Eventually, if * we want to allow updating a row set, we have to create a * new row set with the updated columns, then merge the new * and old row sets to create a new immutable row set. */ public interface RowSetWriter extends TupleWriter { void setRow(Object...values); boolean valid(); int index(); void save(); void done(); } /** * Reader for all types of row sets. */ public interface RowSetReader extends TupleReader { /** * Total number of rows in the row set. * @return total number of rows */ int size(); boolean next(); int index(); void set(int index); /** * Batch index: 0 for a single batch, batch for the current * row is a hyper-batch. * @return index of the batch for the current row */ int batchIndex(); /** * The index of the underlying row which may be indexed by an * Sv2 or Sv4. * * @return */ int rowIndex(); boolean valid(); } boolean isExtendable(); boolean isWritable(); VectorAccessible vectorAccessible(); VectorContainer container(); int rowCount(); RowSetWriter writer(); RowSetReader reader(); void clear(); RowSetSchema schema(); BufferAllocator allocator(); SelectionVectorMode indirectionType(); void print(); /** * Return the size in memory of this record set, including indirection * vectors, null vectors, offset vectors and the entire (used and unused) * data vectors. * * @return memory size in bytes */ int size(); BatchSchema batchSchema(); /** * Row set that manages a single batch of rows. */ public interface SingleRowSet extends RowSet { ValueVector[] vectors(); SingleRowSet toIndirect(); SelectionVector2 getSv2(); } /** * Single row set which is empty and allows writing. * Once writing is complete, the row set becomes an * immutable direct row set. */ public interface ExtendableRowSet extends SingleRowSet { void allocate(int recordCount); void setRowCount(int rowCount); RowSetWriter writer(int initialRowCount); } /** * Row set comprised of multiple single row sets, along with * an indirection vector (SV4). */ public interface HyperRowSet extends RowSet { SelectionVector4 getSv4(); HyperVectorWrapper<ValueVector> getHyperVector(int i); } }