/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl; import io.netty.buffer.DrillBuf; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.ops.OperatorContext; import org.apache.drill.exec.ops.OperatorExecContext; import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.record.CloseableRecordBatch; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.TypedFieldId; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.record.WritableBatch; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.record.selection.SelectionVector4; import org.apache.drill.exec.store.RecordReader; import org.apache.drill.exec.testing.ControlsInjector; import org.apache.drill.exec.testing.ControlsInjectorFactory; import org.apache.drill.exec.util.CallBack; import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.NullableVarCharVector; import org.apache.drill.exec.vector.SchemaChangeCallBack; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.common.map.CaseInsensitiveMap; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; /** * Record batch used for a particular scan. Operators against one or more */ public class ScanBatch implements CloseableRecordBatch { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ScanBatch.class); private static final ControlsInjector injector = ControlsInjectorFactory.getInjector(ScanBatch.class); /** Main collection of fields' value vectors. */ private final VectorContainer container = new VectorContainer(); private int recordCount; private final FragmentContext context; private final OperatorContext oContext; private Iterator<RecordReader> readers; private RecordReader currentReader; private BatchSchema schema; private final Mutator mutator; private boolean done = false; private boolean hasReadNonEmptyFile = false; private Map<String, ValueVector> implicitVectors; private Iterator<Map<String, String>> implicitColumns; private Map<String, String> implicitValues; private final BufferAllocator allocator; public ScanBatch(PhysicalOperator subScanConfig, FragmentContext context, OperatorContext oContext, Iterator<RecordReader> readers, List<Map<String, String>> implicitColumns) throws ExecutionSetupException { this.context = context; this.readers = readers; if (!readers.hasNext()) { throw new ExecutionSetupException("A scan batch must contain at least one reader."); } currentReader = readers.next(); this.oContext = oContext; allocator = oContext.getAllocator(); mutator = new Mutator(oContext, allocator, container); boolean setup = false; try { oContext.getStats().startProcessing(); currentReader.setup(oContext, mutator); setup = true; } finally { // if we had an exception during setup, make sure to release existing data. if (!setup) { try { currentReader.close(); } catch(final Exception e) { throw new ExecutionSetupException(e); } } oContext.getStats().stopProcessing(); } this.implicitColumns = implicitColumns.iterator(); this.implicitValues = this.implicitColumns.hasNext() ? this.implicitColumns.next() : null; addImplicitVectors(); } public ScanBatch(PhysicalOperator subScanConfig, FragmentContext context, Iterator<RecordReader> readers) throws ExecutionSetupException { this(subScanConfig, context, context.newOperatorContext(subScanConfig), readers, Collections.<Map<String, String>> emptyList()); } @Override public FragmentContext getContext() { return context; } @Override public BatchSchema getSchema() { return schema; } @Override public int getRecordCount() { return recordCount; } @Override public void kill(boolean sendUpstream) { if (sendUpstream) { done = true; } else { releaseAssets(); } } private void releaseAssets() { container.zeroVectors(); } private void clearFieldVectorMap() { for (final ValueVector v : mutator.fieldVectorMap().values()) { v.clear(); } } @Override public IterOutcome next() { if (done) { return IterOutcome.NONE; } oContext.getStats().startProcessing(); try { try { injector.injectChecked(context.getExecutionControls(), "next-allocate", OutOfMemoryException.class); currentReader.allocate(mutator.fieldVectorMap()); } catch (OutOfMemoryException e) { logger.debug("Caught Out of Memory Exception", e); clearFieldVectorMap(); return IterOutcome.OUT_OF_MEMORY; } while ((recordCount = currentReader.next()) == 0) { try { if (!readers.hasNext()) { // We're on the last reader, and it has no (more) rows. currentReader.close(); releaseAssets(); done = true; // have any future call to next() return NONE if (mutator.isNewSchema()) { // This last reader has a new schema (e.g., we have a zero-row // file or other source). (Note that some sources have a non- // null/non-trivial schema even when there are no rows.) container.buildSchema(SelectionVectorMode.NONE); schema = container.getSchema(); return IterOutcome.OK_NEW_SCHEMA; } return IterOutcome.NONE; } // At this point, the reader that hit its end is not the last reader. // If all the files we have read so far are just empty, the schema is not useful if (! hasReadNonEmptyFile) { container.clear(); clearFieldVectorMap(); mutator.clear(); } currentReader.close(); currentReader = readers.next(); implicitValues = implicitColumns.hasNext() ? implicitColumns.next() : null; currentReader.setup(oContext, mutator); try { currentReader.allocate(mutator.fieldVectorMap()); } catch (OutOfMemoryException e) { logger.debug("Caught OutOfMemoryException"); clearFieldVectorMap(); return IterOutcome.OUT_OF_MEMORY; } addImplicitVectors(); } catch (ExecutionSetupException e) { this.context.fail(e); releaseAssets(); return IterOutcome.STOP; } } // At this point, the current reader has read 1 or more rows. hasReadNonEmptyFile = true; populateImplicitVectors(); for (VectorWrapper<?> w : container) { w.getValueVector().getMutator().setValueCount(recordCount); } // this is a slight misuse of this metric but it will allow Readers to report how many records they generated. final boolean isNewSchema = mutator.isNewSchema(); oContext.getStats().batchReceived(0, getRecordCount(), isNewSchema); if (isNewSchema) { container.buildSchema(SelectionVectorMode.NONE); schema = container.getSchema(); return IterOutcome.OK_NEW_SCHEMA; } else { return IterOutcome.OK; } } catch (OutOfMemoryException ex) { context.fail(UserException.memoryError(ex).build(logger)); return IterOutcome.STOP; } catch (Exception ex) { logger.debug("Failed to read the batch. Stopping...", ex); context.fail(ex); return IterOutcome.STOP; } finally { oContext.getStats().stopProcessing(); } } private void addImplicitVectors() throws ExecutionSetupException { try { if (implicitVectors != null) { for (ValueVector v : implicitVectors.values()) { v.clear(); } } implicitVectors = Maps.newHashMap(); if (implicitValues != null) { for (String column : implicitValues.keySet()) { final MaterializedField field = MaterializedField.create(column, Types.optional(MinorType.VARCHAR)); @SuppressWarnings("resource") final ValueVector v = mutator.addField(field, NullableVarCharVector.class); implicitVectors.put(column, v); } } } catch(SchemaChangeException e) { throw new ExecutionSetupException(e); } } private void populateImplicitVectors() { if (implicitValues != null) { for (Map.Entry<String, String> entry : implicitValues.entrySet()) { @SuppressWarnings("resource") final NullableVarCharVector v = (NullableVarCharVector) implicitVectors.get(entry.getKey()); String val; if ((val = entry.getValue()) != null) { AllocationHelper.allocate(v, recordCount, val.length()); final byte[] bytes = val.getBytes(); for (int j = 0; j < recordCount; j++) { v.getMutator().setSafe(j, bytes, 0, bytes.length); } v.getMutator().setValueCount(recordCount); } else { AllocationHelper.allocate(v, recordCount, 0); v.getMutator().setValueCount(recordCount); } } } } @Override public SelectionVector2 getSelectionVector2() { throw new UnsupportedOperationException(); } @Override public SelectionVector4 getSelectionVector4() { throw new UnsupportedOperationException(); } @Override public TypedFieldId getValueVectorId(SchemaPath path) { return container.getValueVectorId(path); } @Override public VectorWrapper<?> getValueAccessorById(Class<?> clazz, int... ids) { return container.getValueAccessorById(clazz, ids); } /** * Row set mutator implementation provided to record readers created by * this scan batch. Made visible so that tests can create this mutator * without also needing a ScanBatch instance. (This class is really independent * of the ScanBatch, but resides here for historical reasons. This is, * in turn, the only use of the genereated vector readers in the vector * package.) */ @VisibleForTesting public static class Mutator implements OutputMutator { /** Whether schema has changed since last inquiry (via #isNewSchema}). Is * true before first inquiry. */ private boolean schemaChanged = true; /** Fields' value vectors indexed by fields' keys. */ private final CaseInsensitiveMap<ValueVector> fieldVectorMap = CaseInsensitiveMap.newHashMap(); private final SchemaChangeCallBack callBack = new SchemaChangeCallBack(); private final BufferAllocator allocator; private final VectorContainer container; private final OperatorExecContext oContext; public Mutator(OperatorExecContext oContext, BufferAllocator allocator, VectorContainer container) { this.oContext = oContext; this.allocator = allocator; this.container = container; } public Map<String, ValueVector> fieldVectorMap() { return fieldVectorMap; } @SuppressWarnings("resource") @Override public <T extends ValueVector> T addField(MaterializedField field, Class<T> clazz) throws SchemaChangeException { // Check if the field exists. ValueVector v = fieldVectorMap.get(field.getPath()); if (v == null || v.getClass() != clazz) { // Field does not exist--add it to the map and the output container. v = TypeHelper.getNewVector(field, allocator, callBack); if (!clazz.isAssignableFrom(v.getClass())) { throw new SchemaChangeException( String.format( "The class that was provided, %s, does not correspond to the " + "expected vector type of %s.", clazz.getSimpleName(), v.getClass().getSimpleName())); } final ValueVector old = fieldVectorMap.put(field.getPath(), v); if (old != null) { old.clear(); container.remove(old); } container.add(v); // Added new vectors to the container--mark that the schema has changed. schemaChanged = true; } return clazz.cast(v); } @Override public void allocate(int recordCount) { for (final ValueVector v : fieldVectorMap.values()) { AllocationHelper.allocate(v, recordCount, 50, 10); } } /** * Reports whether schema has changed (field was added or re-added) since * last call to {@link #isNewSchema}. Returns true at first call. */ @Override public boolean isNewSchema() { // Check if top-level schema or any of the deeper map schemas has changed. // Note: Callback's getSchemaChangedAndReset() must get called in order // to reset it and avoid false reports of schema changes in future. (Be // careful with short-circuit OR (||) operator.) final boolean deeperSchemaChanged = callBack.getSchemaChangedAndReset(); if (schemaChanged || deeperSchemaChanged) { schemaChanged = false; return true; } return false; } @Override public DrillBuf getManagedBuffer() { return oContext.getManagedBuffer(); } @Override public CallBack getCallBack() { return callBack; } public void clear() { fieldVectorMap.clear(); } } @Override public Iterator<VectorWrapper<?>> iterator() { return container.iterator(); } @Override public WritableBatch getWritableBatch() { return WritableBatch.get(this); } @Override public void close() throws Exception { container.clear(); for (final ValueVector v : implicitVectors.values()) { v.clear(); } mutator.clear(); currentReader.close(); } @Override public VectorContainer getOutgoingContainer() { throw new UnsupportedOperationException( String.format("You should not call getOutgoingContainer() for class %s", this.getClass().getCanonicalName())); } }