/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.record; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.record.selection.SelectionVector4; /** * A record batch contains a set of field values for a particular range of * records. * <p> * In the case of a record batch composed of ValueVectors, ideally a batch * fits within L2 cache (~256kB per core). The set of value vectors does * not change except during a call to {@link #next()} that returns * {@link IterOutcome#OK_NEW_SCHEMA} value. * </p> * <p> * A key thing to know is that the Iterator provided by a record batch must * align with the rank positions of the field IDs provided using * {@link getValueVectorId}. * </p> */ public interface RecordBatch extends VectorAccessible { /** max batch size, limited by 2-byte length in SV2: 65536 = 2^16 */ public static final int MAX_BATCH_SIZE = 65536; /** * Describes the outcome of incrementing RecordBatch forward by a call to * {@link #next()}. * <p> * Key characteristics of the return value sequence: * </p> * <ul> * <li> * {@code OK_NEW_SCHEMA} always appears unless {@code STOP} appears. (A * batch returns {@code OK_NEW_SCHEMA} before returning {@code NONE} even * if the batch has zero rows.) * </li> * <li>{@code OK_NEW_SCHEMA} always appears before {@code OK} appears.</li> * <li> * The last value is always {@code NONE} or {@code STOP}, and {@code NONE} * and {@code STOP} appear only as the last value. * </li> * </ul> * <p> * <strong>Details</strong>: * </p> * <p> * For normal completion, the basic sequence of return values from calls to * {@code next()} on a {@code RecordBatch} is: * </p> * <ol> * <li> * an {@link #OK_NEW_SCHEMA} value followed by zero or more {@link #OK} * values, * </li> * <li> * zero or more subsequences each having an {@code OK_NEW_SCHEMA} value * followed by zero or more {@code OK} values, and then * </li> * <li> * a {@link #NONE} value. * </li> * </ol> * <p> * In addition to that basic sequence, {@link #NOT_YET} and * {@link #OUT_OF_MEMORY} values can appear anywhere in the subsequence * before the terminal value ({@code NONE} or {@code STOP}). * </p> * <p> * For abnormal termination, the sequence is truncated (before the * {@code NONE}) and ends with {@link #STOP}. That is, the sequence begins * with a subsequence that is some prefix of a normal-completion sequence * and that does not contain {@code NONE}, and ends with {@code STOP}. * </p> * <p> * (The normal-completion return sequence is matched by the following * regular-expression-style grammar: * <pre> * ( ( NOT_YET | OUT_OF_MEMORY )* OK_NEW_SCHEMA * ( NOT_YET | OUT_OF_MEMORY )* OK )* * )+ * ( NOT_YET | OUT_OF_+MEMORY )* NONE * </pre> * ) * </p> */ public static enum IterOutcome { /** * Normal completion of batch. * <p> * The call to {@link #next()} * read no records, * the batch has and will have no more results to return, * and {@code next()} must not be called again. * </p> * <p> * This value will be returned only after {@link #OK_NEW_SCHEMA} has been * returned at least once (not necessarily <em>immediately</em> after). * </p> */ NONE, /** * Zero or more records with same schema. * <p> * The call to {@link #next()} * read zero or more records, * the schema has not changed since the last time {@code OK_NEW_SCHEMA} * was returned, * and the batch will have more results to return (at least completion or * abnormal termination ({@code NONE} or {@code STOP})). * ({@code next()} should be called again.) * </p> * <p> * This will be returned only after {@link #OK_NEW_SCHEMA} has been * returned at least once (not necessarily <em>immediately</em> after). * </p> */ OK, /** * New schema, maybe with records. * <p> * The call to {@link #next()} * changed the schema and vector structures * and read zero or more records, * and the batch will have more results to return (at least completion or * abnormal termination ({@code NONE} or {@code STOP})). * ({@code next()} should be called again.) * </p> */ OK_NEW_SCHEMA, /** * Non-completion (abnormal) termination. * <p> * The call to {@link #next()} * reports that the query has terminated other than by normal completion, * and that the caller must not call any of the schema-access or * data-access methods nor call {@code next()} again. * </p> * <p> * The caller can consume its QueryContext to understand the current state * of things. * </p> */ STOP, /** * No data yet. * <p> * The call to {@link #next()} * read no data, * and the batch will have more results to return in the future (at least * completion or abnormal termination ({@code NONE} or {@code STOP})). * The caller should call {@code next()} again, but should do so later * (including by returning {@code NOT_YET} to its caller). * </p> * <p> * Normally, the caller should perform any locally available work while * waiting for incoming data from the callee, for example, doing partial * sorts on already received data while waiting for additional data to * sort. * </p> * <p> * Used by batches that haven't received incoming data yet. * </p> */ NOT_YET, /** * Out of memory (not fatal). * <p> * The call to {@link #next()}, * including upstream operators, was unable to allocate memory * and did not read any records, * and the batch will have more results to return (at least completion or * abnormal termination ({@code NONE} or {@code STOP})). * The caller should release memory if it can (including by returning * {@code OUT_OF_MEMORY} to its caller) and call {@code next()} again. * </p> */ OUT_OF_MEMORY } /** * Gets the FragmentContext of the current query fragment. Useful for * reporting failure information or other query-level information. */ public FragmentContext getContext(); /** * Gets the current schema of this record batch. * <p> * May be called only when the most recent call to {@link #next}, if any, * returned {@link #OK_NEW_SCHEMA} or {@link #OK}. * </p> * <p> * The schema changes when and only when {@link #next} returns * {@link #OK_NEW_SCHEMA}. * </p> */ @Override public BatchSchema getSchema(); /** * Gets the number of records that are within this record. */ @Override public int getRecordCount(); /** * Informs child nodes that this query should be terminated. Child nodes * should use the QueryContext to determine what has happened. */ public void kill(boolean sendUpstream); public VectorContainer getOutgoingContainer(); /** * Gets the value vector type and ID for the given schema path. The * TypedFieldId should store a fieldId which is the same as the ordinal * position of the field within the Iterator provided this class's * implementation of Iterable<ValueVector>. * * @param path * The path where the vector should be located. * @return The local field id associated with this vector. If no field matches this path, this will return a null * TypedFieldId */ @Override public abstract TypedFieldId getValueVectorId(SchemaPath path); @Override public abstract VectorWrapper<?> getValueAccessorById(Class<?> clazz, int... ids); /** * Updates the data in each Field reading interface for the next range of * records. * <p> * Once a RecordBatch's {@code next()} has returned {@link IterOutcome#NONE} * or {@link IterOutcome#STOP}, the consumer should no longer call * {@code next()}. Behavior at this point is undefined and likely to * throw an exception. * </p> * <p> * See {@link IterOutcome} for the protocol (possible sequences of return * values). * </p> * * * @return An IterOutcome describing the result of the iteration. */ public IterOutcome next(); /** * Gets a writable version of this batch. Takes over ownership of existing * buffers. */ public WritableBatch getWritableBatch(); }