HyperRowSetImpl.java example

Explorer
drill-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.test.rowSet;

import java.util.ArrayList;
import java.util.List;

import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.HyperVectorWrapper;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.record.selection.SelectionVector4;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.AccessorUtilities;
import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor;
import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory;
import org.apache.drill.exec.vector.complex.AbstractMapVector;
import org.apache.drill.test.rowSet.RowSet.HyperRowSet;
import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema;
import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;

/**
 * Implements a row set wrapper around a collection of "hyper vectors."
 * A hyper-vector is a logical vector formed by a series of physical vectors
 * stacked on top of one another. To make a row set, we have a hyper-vector
 * for each column. Another way to visualize this is as a "hyper row set":
 * a stacked collection of single row sets: each column is represented by a
 * vector per row set, with each vector in a row set having the same number
 * of rows. An SV4 then provides a uniform index into the rows in the
 * hyper set. A hyper row set is read-only.
 */

public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {

  /**
   * Read-only row index into the hyper row set with batch and index
   * values mapping via an SV4.
   */

  public static class HyperRowIndex extends BoundedRowIndex {

    private final SelectionVector4 sv4;

    public HyperRowIndex(SelectionVector4 sv4) {
      super(sv4.getCount());
      this.sv4 = sv4;
    }

    @Override
    public int index() {
      return AccessorUtilities.sv4Index(sv4.get(rowIndex));
    }

    @Override
    public int batch( ) {
      return AccessorUtilities.sv4Batch(sv4.get(rowIndex));
    }
  }

  /**
   * Vector accessor used by the column accessors to obtain the vector for
   * each column value. That is, position 0 might be batch 4, index 3,
   * while position 1 might be batch 1, index 7, and so on.
   */

  public static class HyperVectorAccessor implements VectorAccessor {

    private final HyperRowIndex rowIndex;
    private final ValueVector[] vectors;

    public HyperVectorAccessor(HyperVectorWrapper<ValueVector> hvw, HyperRowIndex rowIndex) {
      this.rowIndex = rowIndex;
      vectors = hvw.getValueVectors();
    }

    @Override
    public ValueVector vector() {
      return vectors[rowIndex.batch()];
    }
  }

  /**
   * Build a hyper row set by restructuring a hyper vector bundle into a uniform
   * shape. Consider this schema: <pre><code>
   * { a: 10, b: { c: 20, d: { e: 30 } } }</code></pre>
   * <p>
   * The hyper container, with two batches, has this structure:
   * <table border="1">
   * <tr><th>Batch</th><th>a</th><th>b</th></tr>
   * <tr><td>0</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
   * <tr><td>1</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
   * </table>
   * <p>
   * The above table shows that top-level scalar vectors (such as the Int Vector for column
   * a) appear "end-to-end" as a hyper-vector. Maps also appear end-to-end. But, the
   * contents of the map (column c) do not appear end-to-end. Instead, they appear as
   * contents in the map vector. To get to c, one indexes into the map vector, steps inside
   * the map to find c and indexes to the right row.
   * <p>
   * Similarly, the maps for d do not appear end-to-end, one must step to the right batch
   * in b, then step to d.
   * <p>
   * Finally, to get to e, one must step
   * into the hyper vector for b, then steps to the proper batch, steps to d, step to e
   * and finally step to the row within e. This is a very complex, costly indexing scheme
   * that differs depending on map nesting depth.
   * <p>
   * To simplify access, this class restructures the maps to flatten the scalar vectors
   * into end-to-end hyper vectors. For example, for the above:
   * <p>
   * <table border="1">
   * <tr><th>Batch</th><th>a</th><th>c</th><th>d</th></tr>
   * <tr><td>0</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
   * <tr><td>1</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
   * </table>
   *
   * The maps are still available as hyper vectors, but separated into map fields.
   * (Scalar access no longer needs to access the maps.) The result is a uniform
   * addressing scheme for both top-level and nested vectors.
   */

  public static class HyperVectorBuilder {

    protected final HyperVectorWrapper<?> valueVectors[];
    protected final HyperVectorWrapper<AbstractMapVector> mapVectors[];
    private final List<ValueVector> nestedScalars[];
    private int vectorIndex;
    private int mapIndex;
    private final PhysicalSchema physicalSchema;

    @SuppressWarnings("unchecked")
    public HyperVectorBuilder(RowSetSchema schema) {
      physicalSchema = schema.physical();
      FlattenedSchema flatSchema = schema.flatAccess();
      valueVectors = new HyperVectorWrapper<?>[schema.hierarchicalAccess().count()];
      if (flatSchema.mapCount() == 0) {
        mapVectors = null;
        nestedScalars = null;
      } else {
        mapVectors = (HyperVectorWrapper<AbstractMapVector>[])
            new HyperVectorWrapper<?>[flatSchema.mapCount()];
        nestedScalars = new ArrayList[flatSchema.count()];
      }
    }

    @SuppressWarnings("unchecked")
    public HyperVectorWrapper<ValueVector>[] mapContainer(VectorContainer container) {
      int i = 0;
      for (VectorWrapper<?> w : container) {
        HyperVectorWrapper<?> hvw = (HyperVectorWrapper<?>) w;
        if (w.getField().getType().getMinorType() == MinorType.MAP) {
          HyperVectorWrapper<AbstractMapVector> mw = (HyperVectorWrapper<AbstractMapVector>) hvw;
          mapVectors[mapIndex++] = mw;
          buildHyperMap(physicalSchema.column(i).mapSchema(), mw);
        } else {
          valueVectors[vectorIndex++] = hvw;
        }
        i++;
      }
      if (nestedScalars != null) {
        buildNestedHyperVectors();
      }
      return (HyperVectorWrapper<ValueVector>[]) valueVectors;
    }

    private void buildHyperMap(PhysicalSchema mapSchema, HyperVectorWrapper<AbstractMapVector> mapWrapper) {
      createHyperVectors(mapSchema);
      for (AbstractMapVector mapVector : mapWrapper.getValueVectors()) {
        buildMap(mapSchema, mapVector);
      }
    }

    private void buildMap(PhysicalSchema mapSchema, AbstractMapVector mapVector) {
      for (ValueVector v : mapVector) {
        LogicalColumn col = mapSchema.column(v.getField().getName());
        if (col.isMap()) {
          buildMap(col.mapSchema, (AbstractMapVector) v);
        } else {
          nestedScalars[col.accessIndex()].add(v);
        }
      }
    }

    private void createHyperVectors(PhysicalSchema mapSchema) {
      for (int i = 0; i < mapSchema.count(); i++) {
        LogicalColumn col = mapSchema.column(i);
        if (col.isMap()) {
          createHyperVectors(col.mapSchema);
        } else {
          nestedScalars[col.accessIndex()] = new ArrayList<ValueVector>();
        }
      }
    }

    private void buildNestedHyperVectors() {
      for (int i = 0;  i < nestedScalars.length; i++) {
        if (nestedScalars[i] == null) {
          continue;
        }
        ValueVector vectors[] = new ValueVector[nestedScalars[i].size()];
        nestedScalars[i].toArray(vectors);
        assert valueVectors[i] == null;
        valueVectors[i] = new HyperVectorWrapper<ValueVector>(vectors[0].getField(), vectors, false);
      }
    }
  }

  /**
   * Selection vector that indexes into the hyper vectors.
   */

  private final SelectionVector4 sv4;

  /**
   * Collection of hyper vectors in flattened order: a left-to-right,
   * depth first ordering of vectors in maps. Order here corresponds to
   * the order used for column indexes in the row set reader.
   */

  private final HyperVectorWrapper<ValueVector> hvw[];

  public HyperRowSetImpl(BufferAllocator allocator, VectorContainer container, SelectionVector4 sv4) {
    super(allocator, container.getSchema(), container);
    this.sv4 = sv4;
    hvw = new HyperVectorBuilder(schema).mapContainer(container);
  }

  @Override
  public boolean isExtendable() { return false; }

  @Override
  public boolean isWritable() { return false; }

  @Override
  public RowSetWriter writer() {
    throw new UnsupportedOperationException("Cannot write to a hyper vector");
  }

  @Override
  public RowSetReader reader() {
    return buildReader(new HyperRowIndex(sv4));
  }

  /**
   * Internal method to build the set of column readers needed for
   * this row set. Used when building a row set reader.
   * @param rowIndex object that points to the current row
   * @return an array of column readers: in the same order as the
   * (non-map) vectors.
   */

  protected RowSetReader buildReader(HyperRowIndex rowIndex) {
    FlattenedSchema accessSchema = schema().flatAccess();
    AbstractColumnReader readers[] = new AbstractColumnReader[accessSchema.count()];
    for (int i = 0; i < readers.length; i++) {
      MaterializedField field = accessSchema.column(i);
      readers[i] = ColumnAccessorFactory.newReader(field.getType());
      HyperVectorWrapper<ValueVector> hvw = getHyperVector(i);
      readers[i].bind(rowIndex, field, new HyperVectorAccessor(hvw, rowIndex));
    }
    return new RowSetReaderImpl(accessSchema, rowIndex, readers);
  }

  @Override
  public SelectionVectorMode indirectionType() { return SelectionVectorMode.FOUR_BYTE; }

  @Override
  public SelectionVector4 getSv4() { return sv4; }

  @Override
  public HyperVectorWrapper<ValueVector> getHyperVector(int i) { return hvw[i]; }

  @Override
  public int rowCount() { return sv4.getCount(); }
}