BitReader.java example

Explorer
drill-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.store.parquet.columnreaders;

import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.exec.vector.BitVector;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.format.SchemaElement;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;

final class BitReader extends ColumnReader<BitVector> {

  BitReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData,
            boolean fixedLength, BitVector v, SchemaElement schemaElement) throws ExecutionSetupException {
    super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  }

  @Override
  protected void readField(long recordsToReadInThisPass) {

    recordsReadInThisIteration = Math.min(pageReader.currentPageCount
        - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);

    // A more optimized reader for bit columns was removed to fix the bug
    // DRILL-2031. It attempted to copy large runs of values directly from the
    // decompressed parquet stream into a BitVector. This was complicated by
    // parquet not always breaking a page on a row number divisible by 8. In
    // this case the batch would have to be cut off early or we would have to
    // copy the next page byte-by-byte with a bit shift to move the values into
    // the correct position (to make the value vector one contiguous buffer of
    // data). As page boundaries do not line up across columns, cutting off a
    // batch at every page boundary of a bit column could be costly with many
    // such pages, so we opted to try to shift the bits when necessary.
    //
    // In the end, this was too much complexity for not enough performance
    // benefit, for now this reader has been moved to use the higher level value
    // by value reader provided by the parquet library.
    for (int i = 0; i < recordsReadInThisIteration; i++){
      valueVec.getMutator().setSafe(i + valuesReadInCurrentPass,
            pageReader.valueReader.readBoolean() ? 1 : 0 );
    }
  }
}