/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.xsort.managed;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.impl.xsort.managed.ExternalSortBatch.SortResults;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.ValueVector;
import com.google.common.base.Stopwatch;
/**
* Manages a {@link PriorityQueueCopier} instance produced from code generation.
* Provides a wrapper around a copier "session" to simplify reading batches
* from the copier.
*/
public class CopierHolder {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CopierHolder.class);
private PriorityQueueCopier copier;
private final FragmentContext context;
private final BufferAllocator allocator;
private OperatorCodeGenerator opCodeGen;
public CopierHolder(FragmentContext context, BufferAllocator allocator, OperatorCodeGenerator opCodeGen) {
this.context = context;
this.allocator = allocator;
this.opCodeGen = opCodeGen;
}
/**
* Start a merge operation using a temporary vector container. Used for
* intermediate merges.
*
* @param schema
* @param batchGroupList
* @param targetRecordCount
* @return
*/
public CopierHolder.BatchMerger startMerge(BatchSchema schema, List<? extends BatchGroup> batchGroupList, int targetRecordCount) {
return new BatchMerger(this, schema, batchGroupList, targetRecordCount);
}
/**
* Start a merge operation using the specified vector container. Used for
* the final merge operation.
*
* @param schema
* @param batchGroupList
* @param outputContainer
* @param targetRecordCount
* @return
*/
public CopierHolder.BatchMerger startFinalMerge(BatchSchema schema, List<? extends BatchGroup> batchGroupList, VectorContainer outputContainer, int targetRecordCount) {
return new BatchMerger(this, schema, batchGroupList, outputContainer, targetRecordCount);
}
/**
* Prepare a copier which will write a collection of vectors to disk. The copier
* uses generated code to do the actual writes. If the copier has not yet been
* created, generate code and create it. If it has been created, close it and
* prepare it for a new collection of batches.
*
* @param batch the (hyper) batch of vectors to be copied
* @param batchGroupList same batches as above, but represented as a list
* of individual batches
* @param outputContainer the container into which to copy the batches
*/
@SuppressWarnings("unchecked")
private void createCopier(VectorAccessible batch, List<? extends BatchGroup> batchGroupList, VectorContainer outputContainer) {
if (copier != null) {
opCodeGen.closeCopier();
} else {
copier = opCodeGen.getCopier(batch);
}
// Initialize the value vectors for the output container
for (VectorWrapper<?> i : batch) {
@SuppressWarnings("resource")
ValueVector v = TypeHelper.getNewVector(i.getField(), allocator);
outputContainer.add(v);
}
try {
copier.setup(context, allocator, batch, (List<BatchGroup>) batchGroupList, outputContainer);
} catch (SchemaChangeException e) {
throw UserException.unsupportedError(e)
.message("Unexpected schema change - likely code error.")
.build(logger);
}
}
public BufferAllocator getAllocator() { return allocator; }
public void close() {
opCodeGen.closeCopier();
copier = null;
}
/**
* We've gathered a set of batches, each of which has been sorted. The batches
* may have passed through a filter and thus may have "holes" where rows have
* been filtered out. We will spill records in blocks of targetRecordCount.
* To prepare, copy that many records into an outputContainer as a set of
* contiguous values in new vectors. The result is a single batch with
* vectors that combine a collection of input batches up to the
* given threshold.
* <p>
* Input. Here the top line is a selection vector of indexes.
* The second line is a set of batch groups (separated by underscores)
* with letters indicating individual records:<pre>
* [3 7 4 8 0 6 1] [5 3 6 8 2 0]
* [eh_ad_ibf] [r_qm_kn_p]</pre>
* <p>
* Output, assuming blocks of 5 records. The brackets represent
* batches, the line represents the set of batches copied to the
* spill file.<pre>
* [abcde] [fhikm] [npqr]</pre>
* <p>
* The copying operation does a merge as well: copying
* values from the sources in ordered fashion. Consider a different example,
* we want to merge two input batches to produce a single output batch:
* <pre>
* Input: [aceg] [bdfh]
* Output: [abcdefgh]</pre>
* <p>
* In the above, the input consists of two sorted batches. (In reality,
* the input batches have an associated selection vector, but that is omitted
* here and just the sorted values shown.) The output is a single batch
* with the merged records (indicated by letters) from the two input batches.
* <p>
* Here we bind the copier to the batchGroupList of sorted, buffered batches
* to be merged. We bind the copier output to outputContainer: the copier will write its
* merged "batches" of records to that container.
* <p>
* Calls to the {@link #next()} method sequentially return merged batches
* of the desired row count.
*/
public static class BatchMerger implements SortResults, AutoCloseable {
private CopierHolder holder;
private VectorContainer hyperBatch;
private VectorContainer outputContainer;
private int targetRecordCount;
private int copyCount;
private int batchCount;
private long estBatchSize;
/**
* Creates a merger with an temporary output container.
*
* @param holder the copier that does the work
* @param schema schema for the input and output batches
* @param batchGroupList the input batches
* @param targetRecordCount number of records for each output batch
*/
private BatchMerger(CopierHolder holder, BatchSchema schema, List<? extends BatchGroup> batchGroupList,
int targetRecordCount) {
this(holder, schema, batchGroupList, new VectorContainer(), targetRecordCount);
}
/**
* Creates a merger with the specified output container
*
* @param holder the copier that does the work
* @param schema schema for the input and output batches
* @param batchGroupList the input batches
* @param outputContainer merges output batch into the given output container
* @param targetRecordCount number of records for each output batch
*/
private BatchMerger(CopierHolder holder, BatchSchema schema, List<? extends BatchGroup> batchGroupList,
VectorContainer outputContainer, int targetRecordCount) {
this.holder = holder;
hyperBatch = constructHyperBatch(schema, batchGroupList);
copyCount = 0;
this.targetRecordCount = targetRecordCount;
this.outputContainer = outputContainer;
holder.createCopier(hyperBatch, batchGroupList, outputContainer);
}
/**
* Return the output container.
*
* @return the output container
*/
public VectorContainer getOutput() {
return outputContainer;
}
/**
* Read the next merged batch. The batch holds the specified row count, but
* may be less if this is the last batch.
*
* @return the number of rows in the batch, or 0 if no more batches
* are available
*/
@Override
public boolean next() {
Stopwatch w = Stopwatch.createStarted();
long start = holder.allocator.getAllocatedMemory();
int count = holder.copier.next(targetRecordCount);
copyCount += count;
if (count > 0) {
long t = w.elapsed(TimeUnit.MICROSECONDS);
batchCount++;
logger.trace("Took {} us to merge {} records", t, count);
long size = holder.allocator.getAllocatedMemory() - start;
estBatchSize = Math.max(estBatchSize, size);
} else {
logger.trace("copier returned 0 records");
}
// Identify the schema to be used in the output container. (Since
// all merged batches have the same schema, the schema we identify
// here should be the same as that which we already had.
outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
// The copier does not set the record count in the output
// container, so do that here.
outputContainer.setRecordCount(count);
return count > 0;
}
/**
* Construct a vector container that holds a list of batches, each represented as an
* array of vectors. The entire collection of vectors has a common schema.
* <p>
* To build the collection, we go through the current schema (which has been
* devised to be common for all batches.) For each field in the schema, we create
* an array of vectors. To create the elements, we iterate over all the incoming
* batches and search for the vector that matches the current column.
* <p>
* Finally, we build a new schema for the combined container. That new schema must,
* because of the way the container was created, match the current schema.
*
* @param schema schema for the hyper batch
* @param batchGroupList list of batches to combine
* @return a container where each column is represented as an array of vectors
* (hence the "hyper" in the method name)
*/
private VectorContainer constructHyperBatch(BatchSchema schema, List<? extends BatchGroup> batchGroupList) {
VectorContainer cont = new VectorContainer();
for (MaterializedField field : schema) {
ValueVector[] vectors = new ValueVector[batchGroupList.size()];
int i = 0;
for (BatchGroup group : batchGroupList) {
vectors[i++] = group.getValueAccessorById(
field.getValueClass(),
group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds())
.getValueVector();
}
cont.add(vectors);
}
cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
return cont;
}
@Override
public void close() {
hyperBatch.clear();
holder.close();
}
@Override
public int getRecordCount() {
return copyCount;
}
@Override
public int getBatchCount() {
return batchCount;
}
/**
* Gets the estimated batch size, in bytes. Use for estimating the memory
* needed to process the batches that this operator created.
* @return the size of the largest batch created by this operation,
* in bytes
*/
public long getEstBatchSize() {
return estBatchSize;
}
}
}