/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.test.rowSet;
import java.util.ArrayList;
import java.util.List;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.HyperVectorWrapper;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.record.selection.SelectionVector4;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.AccessorUtilities;
import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor;
import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory;
import org.apache.drill.exec.vector.complex.AbstractMapVector;
import org.apache.drill.test.rowSet.RowSet.HyperRowSet;
import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema;
import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
/**
* Implements a row set wrapper around a collection of "hyper vectors."
* A hyper-vector is a logical vector formed by a series of physical vectors
* stacked on top of one another. To make a row set, we have a hyper-vector
* for each column. Another way to visualize this is as a "hyper row set":
* a stacked collection of single row sets: each column is represented by a
* vector per row set, with each vector in a row set having the same number
* of rows. An SV4 then provides a uniform index into the rows in the
* hyper set. A hyper row set is read-only.
*/
public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {
/**
* Read-only row index into the hyper row set with batch and index
* values mapping via an SV4.
*/
public static class HyperRowIndex extends BoundedRowIndex {
private final SelectionVector4 sv4;
public HyperRowIndex(SelectionVector4 sv4) {
super(sv4.getCount());
this.sv4 = sv4;
}
@Override
public int index() {
return AccessorUtilities.sv4Index(sv4.get(rowIndex));
}
@Override
public int batch( ) {
return AccessorUtilities.sv4Batch(sv4.get(rowIndex));
}
}
/**
* Vector accessor used by the column accessors to obtain the vector for
* each column value. That is, position 0 might be batch 4, index 3,
* while position 1 might be batch 1, index 7, and so on.
*/
public static class HyperVectorAccessor implements VectorAccessor {
private final HyperRowIndex rowIndex;
private final ValueVector[] vectors;
public HyperVectorAccessor(HyperVectorWrapper<ValueVector> hvw, HyperRowIndex rowIndex) {
this.rowIndex = rowIndex;
vectors = hvw.getValueVectors();
}
@Override
public ValueVector vector() {
return vectors[rowIndex.batch()];
}
}
/**
* Build a hyper row set by restructuring a hyper vector bundle into a uniform
* shape. Consider this schema: <pre><code>
* { a: 10, b: { c: 20, d: { e: 30 } } }</code></pre>
* <p>
* The hyper container, with two batches, has this structure:
* <table border="1">
* <tr><th>Batch</th><th>a</th><th>b</th></tr>
* <tr><td>0</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
* <tr><td>1</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
* </table>
* <p>
* The above table shows that top-level scalar vectors (such as the Int Vector for column
* a) appear "end-to-end" as a hyper-vector. Maps also appear end-to-end. But, the
* contents of the map (column c) do not appear end-to-end. Instead, they appear as
* contents in the map vector. To get to c, one indexes into the map vector, steps inside
* the map to find c and indexes to the right row.
* <p>
* Similarly, the maps for d do not appear end-to-end, one must step to the right batch
* in b, then step to d.
* <p>
* Finally, to get to e, one must step
* into the hyper vector for b, then steps to the proper batch, steps to d, step to e
* and finally step to the row within e. This is a very complex, costly indexing scheme
* that differs depending on map nesting depth.
* <p>
* To simplify access, this class restructures the maps to flatten the scalar vectors
* into end-to-end hyper vectors. For example, for the above:
* <p>
* <table border="1">
* <tr><th>Batch</th><th>a</th><th>c</th><th>d</th></tr>
* <tr><td>0</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
* <tr><td>1</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
* </table>
*
* The maps are still available as hyper vectors, but separated into map fields.
* (Scalar access no longer needs to access the maps.) The result is a uniform
* addressing scheme for both top-level and nested vectors.
*/
public static class HyperVectorBuilder {
protected final HyperVectorWrapper<?> valueVectors[];
protected final HyperVectorWrapper<AbstractMapVector> mapVectors[];
private final List<ValueVector> nestedScalars[];
private int vectorIndex;
private int mapIndex;
private final PhysicalSchema physicalSchema;
@SuppressWarnings("unchecked")
public HyperVectorBuilder(RowSetSchema schema) {
physicalSchema = schema.physical();
FlattenedSchema flatSchema = schema.flatAccess();
valueVectors = new HyperVectorWrapper<?>[schema.hierarchicalAccess().count()];
if (flatSchema.mapCount() == 0) {
mapVectors = null;
nestedScalars = null;
} else {
mapVectors = (HyperVectorWrapper<AbstractMapVector>[])
new HyperVectorWrapper<?>[flatSchema.mapCount()];
nestedScalars = new ArrayList[flatSchema.count()];
}
}
@SuppressWarnings("unchecked")
public HyperVectorWrapper<ValueVector>[] mapContainer(VectorContainer container) {
int i = 0;
for (VectorWrapper<?> w : container) {
HyperVectorWrapper<?> hvw = (HyperVectorWrapper<?>) w;
if (w.getField().getType().getMinorType() == MinorType.MAP) {
HyperVectorWrapper<AbstractMapVector> mw = (HyperVectorWrapper<AbstractMapVector>) hvw;
mapVectors[mapIndex++] = mw;
buildHyperMap(physicalSchema.column(i).mapSchema(), mw);
} else {
valueVectors[vectorIndex++] = hvw;
}
i++;
}
if (nestedScalars != null) {
buildNestedHyperVectors();
}
return (HyperVectorWrapper<ValueVector>[]) valueVectors;
}
private void buildHyperMap(PhysicalSchema mapSchema, HyperVectorWrapper<AbstractMapVector> mapWrapper) {
createHyperVectors(mapSchema);
for (AbstractMapVector mapVector : mapWrapper.getValueVectors()) {
buildMap(mapSchema, mapVector);
}
}
private void buildMap(PhysicalSchema mapSchema, AbstractMapVector mapVector) {
for (ValueVector v : mapVector) {
LogicalColumn col = mapSchema.column(v.getField().getName());
if (col.isMap()) {
buildMap(col.mapSchema, (AbstractMapVector) v);
} else {
nestedScalars[col.accessIndex()].add(v);
}
}
}
private void createHyperVectors(PhysicalSchema mapSchema) {
for (int i = 0; i < mapSchema.count(); i++) {
LogicalColumn col = mapSchema.column(i);
if (col.isMap()) {
createHyperVectors(col.mapSchema);
} else {
nestedScalars[col.accessIndex()] = new ArrayList<ValueVector>();
}
}
}
private void buildNestedHyperVectors() {
for (int i = 0; i < nestedScalars.length; i++) {
if (nestedScalars[i] == null) {
continue;
}
ValueVector vectors[] = new ValueVector[nestedScalars[i].size()];
nestedScalars[i].toArray(vectors);
assert valueVectors[i] == null;
valueVectors[i] = new HyperVectorWrapper<ValueVector>(vectors[0].getField(), vectors, false);
}
}
}
/**
* Selection vector that indexes into the hyper vectors.
*/
private final SelectionVector4 sv4;
/**
* Collection of hyper vectors in flattened order: a left-to-right,
* depth first ordering of vectors in maps. Order here corresponds to
* the order used for column indexes in the row set reader.
*/
private final HyperVectorWrapper<ValueVector> hvw[];
public HyperRowSetImpl(BufferAllocator allocator, VectorContainer container, SelectionVector4 sv4) {
super(allocator, container.getSchema(), container);
this.sv4 = sv4;
hvw = new HyperVectorBuilder(schema).mapContainer(container);
}
@Override
public boolean isExtendable() { return false; }
@Override
public boolean isWritable() { return false; }
@Override
public RowSetWriter writer() {
throw new UnsupportedOperationException("Cannot write to a hyper vector");
}
@Override
public RowSetReader reader() {
return buildReader(new HyperRowIndex(sv4));
}
/**
* Internal method to build the set of column readers needed for
* this row set. Used when building a row set reader.
* @param rowIndex object that points to the current row
* @return an array of column readers: in the same order as the
* (non-map) vectors.
*/
protected RowSetReader buildReader(HyperRowIndex rowIndex) {
FlattenedSchema accessSchema = schema().flatAccess();
AbstractColumnReader readers[] = new AbstractColumnReader[accessSchema.count()];
for (int i = 0; i < readers.length; i++) {
MaterializedField field = accessSchema.column(i);
readers[i] = ColumnAccessorFactory.newReader(field.getType());
HyperVectorWrapper<ValueVector> hvw = getHyperVector(i);
readers[i].bind(rowIndex, field, new HyperVectorAccessor(hvw, rowIndex));
}
return new RowSetReaderImpl(accessSchema, rowIndex, readers);
}
@Override
public SelectionVectorMode indirectionType() { return SelectionVectorMode.FOUR_BYTE; }
@Override
public SelectionVector4 getSv4() { return sv4; }
@Override
public HyperVectorWrapper<ValueVector> getHyperVector(int i) { return hvw[i]; }
@Override
public int rowCount() { return sv4.getCount(); }
}