/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.flatten; import java.util.List; import javax.inject.Named; import org.apache.drill.exec.exception.OversizedAllocationException; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.TransferPair; import com.google.common.collect.ImmutableList; import org.apache.drill.exec.vector.complex.RepeatedValueVector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class FlattenTemplate implements Flattener { private static final Logger logger = LoggerFactory.getLogger(FlattenTemplate.class); private static final int OUTPUT_BATCH_SIZE = 4*1024; private static final int OUTPUT_MEMORY_LIMIT = 512 * 1024 * 1024; private ImmutableList<TransferPair> transfers; private BufferAllocator outputAllocator; private SelectionVectorMode svMode; private RepeatedValueVector fieldToFlatten; private RepeatedValueVector.RepeatedAccessor accessor; private int valueIndex; private boolean bigRecords = false; private int bigRecordsBufferSize; /** * The output batch limit starts at OUTPUT_BATCH_SIZE, but may be decreased * if records are found to be large. */ private int outputLimit = OUTPUT_BATCH_SIZE; // this allows for groups to be written between batches if we run out of space, for cases where we have finished // a batch on the boundary it will be set to 0 private int innerValueIndex = -1; private int currentInnerValueIndex; @Override public void setFlattenField(RepeatedValueVector flattenField) { this.fieldToFlatten = flattenField; this.accessor = RepeatedValueVector.RepeatedAccessor.class.cast(flattenField.getAccessor()); } @Override public RepeatedValueVector getFlattenField() { return fieldToFlatten; } @Override public final int flattenRecords(final int recordCount, final int firstOutputIndex, final Flattener.Monitor monitor) { switch (svMode) { case FOUR_BYTE: throw new UnsupportedOperationException("Flatten does not support selection vector inputs."); case TWO_BYTE: throw new UnsupportedOperationException("Flatten does not support selection vector inputs."); case NONE: if (innerValueIndex == -1) { innerValueIndex = 0; } final int initialInnerValueIndex = currentInnerValueIndex; // restore state to local stack int valueIndexLocal = valueIndex; int innerValueIndexLocal = innerValueIndex; int currentInnerValueIndexLocal = currentInnerValueIndex; outer: { int outputIndex = firstOutputIndex; int recordsThisCall = 0; final int valueCount = accessor.getValueCount(); for ( ; valueIndexLocal < valueCount; valueIndexLocal++) { final int innerValueCount = accessor.getInnerValueCountAt(valueIndexLocal); for ( ; innerValueIndexLocal < innerValueCount; innerValueIndexLocal++) { // If we've hit the batch size limit, stop and flush what we've got so far. if (recordsThisCall == outputLimit) { if (bigRecords) { /* * We got to the limit we used before, but did we go over * the bigRecordsBufferSize in the second half of the batch? If * so, we'll need to adjust the batch limits. */ adjustBatchLimits(1, monitor, recordsThisCall); } // Flush this batch. break outer; } /* * At the moment, the output record includes the input record, so for very * large records that we're flattening, we're carrying forward the original * record as well as the flattened element. We've seen a case where flattening a 4MB * record with a 20,000 element array causing memory usage to explode. To avoid * that until we can push down the selected fields to operators like this, we * also limit the amount of memory in use at one time. * * We have to have written at least one record to be able to get a buffer that will * have a real allocator, so we have to do this lazily. We won't check the limit * for the first two records, but that keeps this simple. */ if (bigRecords) { /* * If we're halfway through the outputLimit, check on our memory * usage so far. */ if (recordsThisCall == outputLimit / 2) { /* * If we've used more than half the space we've used for big records * in the past, we've seen even bigger records than before, so stop and * see if we need to flush here before we go over bigRecordsBufferSize * memory usage, and reduce the outputLimit further before we continue * with the next batch. */ if (adjustBatchLimits(2, monitor, recordsThisCall)) { break outer; } } } else { if (outputAllocator.getAllocatedMemory() > OUTPUT_MEMORY_LIMIT) { /* * We're dealing with big records. Reduce the outputLimit to * the current record count, and take note of how much space the * vectors report using for that. We'll use those numbers as limits * going forward in order to avoid allocating more memory. */ bigRecords = true; outputLimit = Math.min(recordsThisCall, outputLimit); if (outputLimit < 1) { throw new IllegalStateException("flatten outputLimit (" + outputLimit + ") won't make progress"); } /* * This will differ from what the allocator reports because of * overhead. But the allocator check is much cheaper to do, so we * only compute this at selected times. */ bigRecordsBufferSize = monitor.getBufferSizeFor(recordsThisCall); // Stop and flush. break outer; } } try { doEval(valueIndexLocal, outputIndex); } catch (OversizedAllocationException ex) { // unable to flatten due to a soft buffer overflow. split the batch here and resume execution. logger.debug("Reached allocation limit. Splitting the batch at input index: {} - inner index: {} - current completed index: {}", valueIndexLocal, innerValueIndexLocal, currentInnerValueIndexLocal) ; /* * TODO * We can't further reduce the output limits here because it won't have * any effect. The vectors have already gotten large, and there's currently * no way to reduce their size. Ideally, we could reduce the outputLimit, * and reduce the size of the currently used vectors. */ break outer; } catch (SchemaChangeException e) { throw new UnsupportedOperationException(e); } outputIndex++; currentInnerValueIndexLocal++; ++recordsThisCall; } innerValueIndexLocal = 0; } } // save state to heap valueIndex = valueIndexLocal; innerValueIndex = innerValueIndexLocal; currentInnerValueIndex = currentInnerValueIndexLocal; // transfer the computed range final int delta = currentInnerValueIndexLocal - initialInnerValueIndex; for (TransferPair t : transfers) { t.splitAndTransfer(initialInnerValueIndex, delta); } return delta; default: throw new UnsupportedOperationException(); } } /** * Determine if the current batch record limit needs to be adjusted (when handling * bigRecord mode). If so, adjust the limit, and return true, otherwise return false. * * <p>If the limit is adjusted, it will always be adjusted down, because we need to operate * based on the largest sized record we've ever seen.</p> * * <p>If the limit is adjusted, then the current batch should be flushed, because * continuing would lead to going over the large memory limit that has already been * established.</p> * * @param multiplier Multiply currently used memory (according to the monitor) before * checking against past memory limits. This allows for checking the currently used * memory after processing a fraction of the expected batch limit, but using that as * a predictor of the full batch's size. For example, if this is checked after half * the batch size limit's records are processed, then using a multiplier of two will * do the check under the assumption that processing the full batch limit will use * twice as much memory. * @param monitor the Flattener.Monitor instance to use for the current memory usage check * @param recordsThisCall the number of records processed so far during this call to * flattenRecords(). * @return true if the batch size limit was adjusted, false otherwise */ private boolean adjustBatchLimits(final int multiplier, final Flattener.Monitor monitor, final int recordsThisCall) { assert bigRecords : "adjusting batch limits when no big records"; final int bufferSize = multiplier * monitor.getBufferSizeFor(recordsThisCall); /* * If the amount of space we've used so far is below the amount that triggered * the bigRecords mode, then no adjustment is needed. */ if (bufferSize <= bigRecordsBufferSize) { return false; } /* * We've used more space than we've used for big records in the past, we've seen * even bigger records, so we need to adjust our limits, and flush what we've got so far. * * We should reduce the outputLimit proportionately to get the predicted * amount of memory used back down to bigRecordsBufferSize. * * The number of records to limit is therefore * outputLimit * * (1 - (bufferSize - bigRecordsBufferSize) / bigRecordsBufferSize) * * Doing some algebra on the multiplier: * (bigRecordsBufferSize - (bufferSize - bigRecordsBufferSize)) / bigRecordsBufferSize * (bigRecordsBufferSize - bufferSize + bigRecordsBufferSize) / bigRecordsBufferSize * (2 * bigRecordsBufferSize - bufferSize) / bigRecordsBufferSize * * If bufferSize has gotten so big that this would be negative, we'll * just go down to one record per batch. We need to check for that on * outputLimit anyway, in order to make sure that we make progress. */ final int newLimit = (int) (outputLimit * (2.0 * ((double) bigRecordsBufferSize) - bufferSize) / bigRecordsBufferSize); outputLimit = Math.max(1, newLimit); return true; } @Override public final void setup(FragmentContext context, RecordBatch incoming, RecordBatch outgoing, List<TransferPair> transfers) throws SchemaChangeException{ this.svMode = incoming.getSchema().getSelectionVectorMode(); switch (svMode) { case FOUR_BYTE: throw new UnsupportedOperationException("Flatten does not support selection vector inputs."); case TWO_BYTE: throw new UnsupportedOperationException("Flatten does not support selection vector inputs."); } this.transfers = ImmutableList.copyOf(transfers); outputAllocator = outgoing.getOutgoingContainer().getAllocator(); doSetup(context, incoming, outgoing); } @Override public void resetGroupIndex() { this.valueIndex = 0; this.currentInnerValueIndex = 0; } public abstract void doSetup(@Named("context") FragmentContext context, @Named("incoming") RecordBatch incoming, @Named("outgoing") RecordBatch outgoing) throws SchemaChangeException; public abstract boolean doEval(@Named("inIndex") int inIndex, @Named("outIndex") int outIndex) throws SchemaChangeException; }