CopierHolder.java example

Explorer
drill-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.impl.xsort.managed;

import java.util.List;
import java.util.concurrent.TimeUnit;

import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.impl.xsort.managed.ExternalSortBatch.SortResults;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.ValueVector;

import com.google.common.base.Stopwatch;

/**
 * Manages a {@link PriorityQueueCopier} instance produced from code generation.
 * Provides a wrapper around a copier "session" to simplify reading batches
 * from the copier.
 */

public class CopierHolder {
  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CopierHolder.class);

  private PriorityQueueCopier copier;

  private final FragmentContext context;
  private final BufferAllocator allocator;
  private OperatorCodeGenerator opCodeGen;

  public CopierHolder(FragmentContext context, BufferAllocator allocator, OperatorCodeGenerator opCodeGen) {
    this.context = context;
    this.allocator = allocator;
    this.opCodeGen = opCodeGen;
  }

  /**
   * Start a merge operation using a temporary vector container. Used for
   * intermediate merges.
   *
   * @param schema
   * @param batchGroupList
   * @param targetRecordCount
   * @return
   */

  public CopierHolder.BatchMerger startMerge(BatchSchema schema, List<? extends BatchGroup> batchGroupList, int targetRecordCount) {
    return new BatchMerger(this, schema, batchGroupList, targetRecordCount);
  }

  /**
   * Start a merge operation using the specified vector container. Used for
   * the final merge operation.
   *
   * @param schema
   * @param batchGroupList
   * @param outputContainer
   * @param targetRecordCount
   * @return
   */
  public CopierHolder.BatchMerger startFinalMerge(BatchSchema schema, List<? extends BatchGroup> batchGroupList, VectorContainer outputContainer, int targetRecordCount) {
    return new BatchMerger(this, schema, batchGroupList, outputContainer, targetRecordCount);
  }

  /**
   * Prepare a copier which will write a collection of vectors to disk. The copier
   * uses generated code to do the actual writes. If the copier has not yet been
   * created, generate code and create it. If it has been created, close it and
   * prepare it for a new collection of batches.
   *
   * @param batch the (hyper) batch of vectors to be copied
   * @param batchGroupList same batches as above, but represented as a list
   * of individual batches
   * @param outputContainer the container into which to copy the batches
   */

  @SuppressWarnings("unchecked")
  private void createCopier(VectorAccessible batch, List<? extends BatchGroup> batchGroupList, VectorContainer outputContainer) {
    if (copier != null) {
      opCodeGen.closeCopier();
    } else {
      copier = opCodeGen.getCopier(batch);
    }

    // Initialize the value vectors for the output container

    for (VectorWrapper<?> i : batch) {
      @SuppressWarnings("resource")
      ValueVector v = TypeHelper.getNewVector(i.getField(), allocator);
      outputContainer.add(v);
    }
    try {
      copier.setup(context, allocator, batch, (List<BatchGroup>) batchGroupList, outputContainer);
    } catch (SchemaChangeException e) {
      throw UserException.unsupportedError(e)
            .message("Unexpected schema change - likely code error.")
            .build(logger);
    }
  }

  public BufferAllocator getAllocator() { return allocator; }

  public void close() {
    opCodeGen.closeCopier();
    copier = null;
  }

  /**
   * We've gathered a set of batches, each of which has been sorted. The batches
   * may have passed through a filter and thus may have "holes" where rows have
   * been filtered out. We will spill records in blocks of targetRecordCount.
   * To prepare, copy that many records into an outputContainer as a set of
   * contiguous values in new vectors. The result is a single batch with
   * vectors that combine a collection of input batches up to the
   * given threshold.
   * <p>
   * Input. Here the top line is a selection vector of indexes.
   * The second line is a set of batch groups (separated by underscores)
   * with letters indicating individual records:<pre>
   * [3 7 4 8 0 6 1] [5 3 6 8 2 0]
   * [eh_ad_ibf]     [r_qm_kn_p]</pre>
   * <p>
   * Output, assuming blocks of 5 records. The brackets represent
   * batches, the line represents the set of batches copied to the
   * spill file.<pre>
   * [abcde] [fhikm] [npqr]</pre>
   * <p>
   * The copying operation does a merge as well: copying
   * values from the sources in ordered fashion. Consider a different example,
   * we want to merge two input batches to produce a single output batch:
   * <pre>
   * Input:  [aceg] [bdfh]
   * Output: [abcdefgh]</pre>
   * <p>
   * In the above, the input consists of two sorted batches. (In reality,
   * the input batches have an associated selection vector, but that is omitted
   * here and just the sorted values shown.) The output is a single batch
   * with the merged records (indicated by letters) from the two input batches.
   * <p>
   * Here we bind the copier to the batchGroupList of sorted, buffered batches
   * to be merged. We bind the copier output to outputContainer: the copier will write its
   * merged "batches" of records to that container.
   * <p>
   * Calls to the {@link #next()} method sequentially return merged batches
   * of the desired row count.
    */

  public static class BatchMerger implements SortResults, AutoCloseable {

    private CopierHolder holder;
    private VectorContainer hyperBatch;
    private VectorContainer outputContainer;
    private int targetRecordCount;
    private int copyCount;
    private int batchCount;
    private long estBatchSize;

    /**
     * Creates a merger with an temporary output container.
     *
     * @param holder the copier that does the work
     * @param schema schema for the input and output batches
     * @param batchGroupList the input batches
     * @param targetRecordCount number of records for each output batch
     */
    private BatchMerger(CopierHolder holder, BatchSchema schema, List<? extends BatchGroup> batchGroupList,
                        int targetRecordCount) {
      this(holder, schema, batchGroupList, new VectorContainer(), targetRecordCount);
    }

    /**
     * Creates a merger with the specified output container
     *
     * @param holder the copier that does the work
     * @param schema schema for the input and output batches
     * @param batchGroupList the input batches
     * @param outputContainer merges output batch into the given output container
     * @param targetRecordCount number of records for each output batch
     */
    private BatchMerger(CopierHolder holder, BatchSchema schema, List<? extends BatchGroup> batchGroupList,
                        VectorContainer outputContainer, int targetRecordCount) {
      this.holder = holder;
      hyperBatch = constructHyperBatch(schema, batchGroupList);
      copyCount = 0;
      this.targetRecordCount = targetRecordCount;
      this.outputContainer = outputContainer;
      holder.createCopier(hyperBatch, batchGroupList, outputContainer);
    }

    /**
     * Return the output container.
     *
     * @return the output container
     */
    public VectorContainer getOutput() {
      return outputContainer;
    }

    /**
     * Read the next merged batch. The batch holds the specified row count, but
     * may be less if this is the last batch.
     *
     * @return the number of rows in the batch, or 0 if no more batches
     * are available
     */

    @Override
    public boolean next() {
      Stopwatch w = Stopwatch.createStarted();
      long start = holder.allocator.getAllocatedMemory();
      int count = holder.copier.next(targetRecordCount);
      copyCount += count;
      if (count > 0) {
        long t = w.elapsed(TimeUnit.MICROSECONDS);
        batchCount++;
        logger.trace("Took {} us to merge {} records", t, count);
        long size = holder.allocator.getAllocatedMemory() - start;
        estBatchSize = Math.max(estBatchSize, size);
      } else {
        logger.trace("copier returned 0 records");
      }

      // Identify the schema to be used in the output container. (Since
      // all merged batches have the same schema, the schema we identify
      // here should be the same as that which we already had.

      outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);

      // The copier does not set the record count in the output
      // container, so do that here.

      outputContainer.setRecordCount(count);

      return count > 0;
    }

    /**
     * Construct a vector container that holds a list of batches, each represented as an
     * array of vectors. The entire collection of vectors has a common schema.
     * <p>
     * To build the collection, we go through the current schema (which has been
     * devised to be common for all batches.) For each field in the schema, we create
     * an array of vectors. To create the elements, we iterate over all the incoming
     * batches and search for the vector that matches the current column.
     * <p>
     * Finally, we build a new schema for the combined container. That new schema must,
     * because of the way the container was created, match the current schema.
     *
     * @param schema schema for the hyper batch
     * @param batchGroupList list of batches to combine
     * @return a container where each column is represented as an array of vectors
     * (hence the "hyper" in the method name)
     */

    private VectorContainer constructHyperBatch(BatchSchema schema, List<? extends BatchGroup> batchGroupList) {
      VectorContainer cont = new VectorContainer();
      for (MaterializedField field : schema) {
        ValueVector[] vectors = new ValueVector[batchGroupList.size()];
        int i = 0;
        for (BatchGroup group : batchGroupList) {
          vectors[i++] = group.getValueAccessorById(
              field.getValueClass(),
              group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds())
              .getValueVector();
        }
        cont.add(vectors);
      }
      cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
      return cont;
    }

    @Override
    public void close() {
      hyperBatch.clear();
      holder.close();
    }

    @Override
    public int getRecordCount() {
      return copyCount;
    }

    @Override
    public int getBatchCount() {
      return batchCount;
    }

    /**
     * Gets the estimated batch size, in bytes. Use for estimating the memory
     * needed to process the batches that this operator created.
     * @return the size of the largest batch created by this operation,
     * in bytes
     */

    public long getEstBatchSize() {
      return estBatchSize;
    }
  }
}