/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive.parquet.reader; import com.facebook.presto.hive.parquet.ParquetDataPage; import com.facebook.presto.hive.parquet.ParquetDataPageV1; import com.facebook.presto.hive.parquet.ParquetDataPageV2; import com.facebook.presto.hive.parquet.ParquetDictionaryPage; import parquet.hadoop.metadata.CompressionCodecName; import java.io.IOException; import java.util.LinkedList; import java.util.List; import static com.facebook.presto.hive.parquet.ParquetCompressionUtils.decompress; import static java.lang.Math.toIntExact; class ParquetPageReader { private final CompressionCodecName codec; private final long valueCount; private final List<ParquetDataPage> compressedPages; private final ParquetDictionaryPage compressedDictionaryPage; public ParquetPageReader(CompressionCodecName codec, List<ParquetDataPage> compressedPages, ParquetDictionaryPage compressedDictionaryPage) { this.codec = codec; this.compressedPages = new LinkedList<>(compressedPages); this.compressedDictionaryPage = compressedDictionaryPage; int count = 0; for (ParquetDataPage page : compressedPages) { count += page.getValueCount(); } this.valueCount = count; } public long getTotalValueCount() { return valueCount; } public ParquetDataPage readPage() { if (compressedPages.isEmpty()) { return null; } ParquetDataPage compressedPage = compressedPages.remove(0); try { if (compressedPage instanceof ParquetDataPageV1) { ParquetDataPageV1 dataPageV1 = (ParquetDataPageV1) compressedPage; return new ParquetDataPageV1( decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize()), dataPageV1.getValueCount(), dataPageV1.getUncompressedSize(), dataPageV1.getStatistics(), dataPageV1.getRepetitionLevelEncoding(), dataPageV1.getDefinitionLevelEncoding(), dataPageV1.getValueEncoding()); } else { ParquetDataPageV2 dataPageV2 = (ParquetDataPageV2) compressedPage; if (!dataPageV2.isCompressed()) { return dataPageV2; } int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize() - dataPageV2.getDefinitionLevels().length() - dataPageV2.getRepetitionLevels().length()); return new ParquetDataPageV2( dataPageV2.getRowCount(), dataPageV2.getNullCount(), dataPageV2.getValueCount(), dataPageV2.getRepetitionLevels(), dataPageV2.getDefinitionLevels(), dataPageV2.getDataEncoding(), decompress(codec, dataPageV2.getSlice(), uncompressedSize), dataPageV2.getUncompressedSize(), dataPageV2.getStatistics(), false); } } catch (IOException e) { throw new RuntimeException("Could not decompress page", e); } } public ParquetDictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { return new ParquetDictionaryPage( decompress(codec, compressedDictionaryPage.getSlice(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); } catch (IOException e) { throw new RuntimeException("Error reading dictionary page", e); } } }