/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package parquet.column.page.mem; import static parquet.Log.DEBUG; import static parquet.bytes.BytesInput.copy; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import parquet.Log; import parquet.bytes.BytesInput; import parquet.column.Encoding; import parquet.column.page.DataPageV1; import parquet.column.page.DataPageV2; import parquet.column.page.DictionaryPage; import parquet.column.page.DataPage; import parquet.column.page.PageWriter; import parquet.column.statistics.Statistics; import parquet.io.ParquetEncodingException; public class MemPageWriter implements PageWriter { private static final Log LOG = Log.getLog(MemPageWriter.class); private final List<DataPage> pages = new ArrayList<DataPage>(); private DictionaryPage dictionaryPage; private long memSize = 0; private long totalValueCount = 0; @Override public void writePage(BytesInput bytesInput, int valueCount, Statistics statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException { if (valueCount == 0) { throw new ParquetEncodingException("illegal page of 0 values"); } memSize += bytesInput.size(); pages.add(new DataPageV1(BytesInput.copy(bytesInput), valueCount, (int)bytesInput.size(), statistics, rlEncoding, dlEncoding, valuesEncoding)); totalValueCount += valueCount; if (DEBUG) LOG.debug("page written for " + bytesInput.size() + " bytes and " + valueCount + " records"); } @Override public void writePageV2(int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, Statistics<?> statistics) throws IOException { if (valueCount == 0) { throw new ParquetEncodingException("illegal page of 0 values"); } long size = repetitionLevels.size() + definitionLevels.size() + data.size(); memSize += size; pages.add(DataPageV2.uncompressed(rowCount, nullCount, valueCount, copy(repetitionLevels), copy(definitionLevels), dataEncoding, copy(data), statistics)); totalValueCount += valueCount; if (DEBUG) LOG.debug("page written for " + size + " bytes and " + valueCount + " records"); } @Override public long getMemSize() { return memSize; } public List<DataPage> getPages() { return pages; } public DictionaryPage getDictionaryPage() { return dictionaryPage; } public long getTotalValueCount() { return totalValueCount; } @Override public long allocatedSize() { // this store keeps only the bytes written return memSize; } @Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page per block"); } this.memSize += dictionaryPage.getBytes().size(); this.dictionaryPage = dictionaryPage.copy(); if (DEBUG) LOG.debug("dictionary page written for " + dictionaryPage.getBytes().size() + " bytes and " + dictionaryPage.getDictionarySize() + " records"); } @Override public String memUsageString(String prefix) { return String.format("%s %,d bytes", prefix, memSize); } }