/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.common.io.encoded; import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A block of data for a given section of a file, similar to VRB but in encoded form. * Stores a set of buffers for each encoded stream that is a part of each column. */ public class EncodedColumnBatch<BatchKey> { /** * Slice of the data for a stream for some column, stored inside MemoryBuffer's. * ColumnStreamData can be reused for many EncodedColumnBatch-es (e.g. dictionary stream), so * it tracks the number of such users via a refcount. */ public static class ColumnStreamData { private List<MemoryBuffer> cacheBuffers; /** Base offset from the beginning of the indexable unit; for example, for ORC, * offset from the CB in a compressed file, from the stream in uncompressed file. */ private int indexBaseOffset = 0; /** Reference count. */ private AtomicInteger refCount = new AtomicInteger(0); public void reset() { cacheBuffers.clear(); refCount.set(0); indexBaseOffset = 0; } public void incRef() { refCount.incrementAndGet(); } public int decRef() { int i = refCount.decrementAndGet(); assert i >= 0; return i; } public List<MemoryBuffer> getCacheBuffers() { return cacheBuffers; } public void setCacheBuffers(List<MemoryBuffer> cacheBuffers) { this.cacheBuffers = cacheBuffers; } public int getIndexBaseOffset() { return indexBaseOffset; } public void setIndexBaseOffset(int indexBaseOffset) { this.indexBaseOffset = indexBaseOffset; } @Override public String toString() { String bufStr = ""; if (cacheBuffers != null) { for (MemoryBuffer mb : cacheBuffers) { bufStr += mb.getClass().getSimpleName() + " with " + mb.getByteBufferRaw().remaining() + " bytes, "; } } return "ColumnStreamData [cacheBuffers=[" + bufStr + "], indexBaseOffset=" + indexBaseOffset + "]"; } } /** The key that is used to map this batch to source location. */ protected BatchKey batchKey; /** * Stream data for each column that has true in the corresponding hasData position. * For each column, streams are indexed by kind (for ORC), with missing elements being null. */ protected ColumnStreamData[][] columnData; /** Indicates which columns have data. Correspond to columnData elements. */ protected boolean[] hasData; public void reset() { if (hasData != null) { Arrays.fill(hasData, false); } if (columnData == null) return; for (int i = 0; i < columnData.length; ++i) { if (columnData[i] == null) continue; for (int j = 0; j < columnData[i].length; ++j) { columnData[i][j] = null; } } } public void initColumn(int colIx, int streamCount) { hasData[colIx] = true; if (columnData[colIx] == null || columnData[colIx].length != streamCount) { columnData[colIx] = new ColumnStreamData[streamCount]; } } private static final Logger LOG = LoggerFactory.getLogger(EncodedColumnBatch.class); public void setStreamData(int colIx, int streamIx, ColumnStreamData csd) { assert hasData[colIx]; columnData[colIx][streamIx] = csd; } public BatchKey getBatchKey() { return batchKey; } public ColumnStreamData[] getColumnData(int colIx) { if (!hasData[colIx]) throw new AssertionError("No data for column " + colIx); return columnData[colIx]; } public int getTotalColCount() { return columnData.length; // Includes the columns that have no data } protected void resetColumnArrays(int columnCount) { if (hasData != null && columnCount == hasData.length) { Arrays.fill(hasData, false); return; } hasData = new boolean[columnCount]; ColumnStreamData[][] columnData = new ColumnStreamData[columnCount][]; if (this.columnData != null) { for (int i = 0; i < Math.min(columnData.length, this.columnData.length); ++i) { columnData[i] = this.columnData[i]; } } this.columnData = columnData; } public boolean hasData(int colIx) { return hasData[colIx]; } }