/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.parquet.columnreaders; import java.io.IOException; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.exec.vector.BaseDataValueVector; import org.apache.drill.exec.vector.NullableVectorDefinitionSetter; import org.apache.drill.exec.vector.ValueVector; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.format.SchemaElement; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; abstract class NullableColumnReader<V extends ValueVector> extends ColumnReader<V>{ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(NullableColumnReader.class); protected BaseDataValueVector castedBaseVector; protected NullableVectorDefinitionSetter castedVectorMutator; private long definitionLevelsRead = 0; NullableColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException { super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement); castedBaseVector = (BaseDataValueVector) v; castedVectorMutator = (NullableVectorDefinitionSetter) v.getMutator(); } @Override public void processPages(long recordsToReadInThisPass) throws IOException { readStartInBytes = 0; readLength = 0; readLengthInBits = 0; recordsReadInThisIteration = 0; vectorData = castedBaseVector.getBuffer(); // values need to be spaced out where nulls appear in the column // leaving blank space for nulls allows for random access to values // to optimize copying data out of the buffered disk stream, runs of defined values // are located and copied together, rather than copying individual values int runLength = -1; // number of non-null records in this pass. int nullRunLength = -1; // number of consecutive null records that we read. int currentDefinitionLevel = -1; int readCount = 0; // the record number we last read. int writeCount = 0; // the record number we last wrote to the value vector. // This was previously the indexInOutputVector variable boolean haveMoreData; // true if we have more data and have not filled the vector while (readCount < recordsToReadInThisPass && writeCount < valueVec.getValueCapacity()) { // read a page if needed if (!pageReader.hasPage() || (definitionLevelsRead >= pageReader.currentPageCount)) { if (!pageReader.next()) { break; } //New page. Reset the definition level. currentDefinitionLevel = -1; definitionLevelsRead = 0; recordsReadInThisIteration = 0; readStartInBytes = 0; } nullRunLength = 0; runLength = 0; // // Let's skip the next run of nulls if any ... // // If we are reentering this loop, the currentDefinitionLevel has already been read if (currentDefinitionLevel < 0) { currentDefinitionLevel = pageReader.definitionLevels.readInteger(); } haveMoreData = readCount < recordsToReadInThisPass && writeCount + nullRunLength < valueVec.getValueCapacity() && definitionLevelsRead < pageReader.currentPageCount; while (haveMoreData && currentDefinitionLevel < columnDescriptor .getMaxDefinitionLevel()) { readCount++; nullRunLength++; definitionLevelsRead++; haveMoreData = readCount < recordsToReadInThisPass && writeCount + nullRunLength < valueVec.getValueCapacity() && definitionLevelsRead < pageReader.currentPageCount; if (haveMoreData) { currentDefinitionLevel = pageReader.definitionLevels.readInteger(); } } // // Write the nulls if any // if (nullRunLength > 0) { int writerIndex = ((BaseDataValueVector) valueVec).getBuffer().writerIndex(); castedBaseVector.getBuffer().setIndex(0, writerIndex + (int) Math .ceil(nullRunLength * dataTypeLengthInBits / 8.0)); writeCount += nullRunLength; valuesReadInCurrentPass += nullRunLength; recordsReadInThisIteration += nullRunLength; } // // Handle the run of non-null values // haveMoreData = readCount < recordsToReadInThisPass && writeCount + runLength < valueVec.getValueCapacity() // note: writeCount+runLength && definitionLevelsRead < pageReader.currentPageCount; while (haveMoreData && currentDefinitionLevel >= columnDescriptor .getMaxDefinitionLevel()) { readCount++; runLength++; definitionLevelsRead++; castedVectorMutator.setIndexDefined(writeCount + runLength - 1); //set the nullable bit to indicate a non-null value haveMoreData = readCount < recordsToReadInThisPass && writeCount + runLength < valueVec.getValueCapacity() && definitionLevelsRead < pageReader.currentPageCount; if (haveMoreData) { currentDefinitionLevel = pageReader.definitionLevels.readInteger(); } } // // Write the non-null values // if (runLength > 0) { // set up metadata // This _must_ be set so that the call to readField works correctly for all datatypes this.recordsReadInThisIteration += runLength; this.readStartInBytes = pageReader.readPosInBytes; this.readLengthInBits = runLength * dataTypeLengthInBits; this.readLength = (int) Math.ceil(readLengthInBits / 8.0); readField(runLength); writeCount += runLength; valuesReadInCurrentPass += runLength; pageReader.readPosInBytes = readStartInBytes + readLength; } pageReader.valuesRead += recordsReadInThisIteration; totalValuesRead += runLength + nullRunLength; logger.trace("" + "recordsToReadInThisPass: {} \t " + "Run Length: {} \t Null Run Length: {} \t readCount: {} \t writeCount: {} \t " + "recordsReadInThisIteration: {} \t valuesReadInCurrentPass: {} \t " + "totalValuesRead: {} \t readStartInBytes: {} \t readLength: {} \t pageReader.byteLength: {} \t " + "definitionLevelsRead: {} \t pageReader.currentPageCount: {}", recordsToReadInThisPass, runLength, nullRunLength, readCount, writeCount, recordsReadInThisIteration, valuesReadInCurrentPass, totalValuesRead, readStartInBytes, readLength, pageReader.byteLength, definitionLevelsRead, pageReader.currentPageCount); } valueVec.getMutator().setValueCount(valuesReadInCurrentPass); } @Override protected abstract void readField(long recordsToRead); }