/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.parquet.columnreaders; import com.google.common.base.Stopwatch; import com.google.common.collect.Lists; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.exec.vector.ValueVector; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; public class VarLenBinaryReader { ParquetRecordReader parentReader; final List<VarLengthColumn<? extends ValueVector>> columns; final boolean useAsyncTasks; private final long targetRecordCount; public VarLenBinaryReader(ParquetRecordReader parentReader, List<VarLengthColumn<? extends ValueVector>> columns) { this.parentReader = parentReader; this.columns = columns; useAsyncTasks = parentReader.useAsyncColReader; // Can't read any more records than fixed width fields will fit. // Note: this calculation is very likely wrong; it is a simplified // version of earlier code, but probably needs even more attention. int totalFixedFieldWidth = parentReader.getBitWidthAllFixedFields() / 8; if (totalFixedFieldWidth == 0) { targetRecordCount = 0; } else { targetRecordCount = parentReader.getBatchSize() / totalFixedFieldWidth; } } /** * Reads as many variable length values as possible. * * @param recordsToReadInThisPass - the number of records recommended for reading form the reader * @param firstColumnStatus - a reference to the first column status in the Parquet file to grab metatdata from * @return - the number of fixed length fields that will fit in the batch * @throws IOException */ public long readFields(long recordsToReadInThisPass) throws IOException { // write the first 0 offset for (VarLengthColumn<?> columnReader : columns) { columnReader.reset(); } Stopwatch timer = Stopwatch.createStarted(); // Can't read any more records than fixed width fields will fit. if (targetRecordCount > 0) { recordsToReadInThisPass = Math.min(recordsToReadInThisPass, targetRecordCount); } long recordsReadInCurrentPass = determineSizesSerial(recordsToReadInThisPass); if(useAsyncTasks) { readRecordsParallel(recordsReadInCurrentPass); } else { readRecordsSerial(recordsReadInCurrentPass); } parentReader.parquetReaderStats.timeVarColumnRead.addAndGet(timer.elapsed(TimeUnit.NANOSECONDS)); return recordsReadInCurrentPass; } private long determineSizesSerial(long recordsToReadInThisPass) throws IOException { int recordsReadInCurrentPass = 0; top: do { for (VarLengthColumn<?> columnReader : columns) { // Return status is "done reading", meaning stop if true. if (columnReader.determineSize(recordsReadInCurrentPass)) { break top; } } for (VarLengthColumn<?> columnReader : columns) { columnReader.updateReadyToReadPosition(); columnReader.currDefLevel = -1; } recordsReadInCurrentPass++; } while (recordsReadInCurrentPass < recordsToReadInThisPass); return recordsReadInCurrentPass; } private void readRecordsSerial(long recordsReadInCurrentPass) { for (VarLengthColumn<?> columnReader : columns) { columnReader.readRecords(columnReader.pageReader.valuesReadyToRead); } for (VarLengthColumn<?> columnReader : columns) { columnReader.valueVec.getMutator().setValueCount((int)recordsReadInCurrentPass); } } private void readRecordsParallel(long recordsReadInCurrentPass){ ArrayList<Future<Integer>> futures = Lists.newArrayList(); for (VarLengthColumn<?> columnReader : columns) { Future<Integer> f = columnReader.readRecordsAsync(columnReader.pageReader.valuesReadyToRead); futures.add(f); } Exception exception = null; for(Future<Integer> f: futures){ if(exception != null) { f.cancel(true); } else { try { f.get(); } catch (Exception e) { f.cancel(true); exception = e; } } } for (VarLengthColumn<?> columnReader : columns) { columnReader.valueVec.getMutator().setValueCount((int)recordsReadInCurrentPass); } } protected void handleAndRaise(String s, Exception e) { String message = "Error in parquet record reader.\nMessage: " + s; throw new DrillRuntimeException(message, e); } }