/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.io.reader.impl.v1; import com.linkedin.pinot.core.io.compression.ChunkDecompressor; import com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext; import com.linkedin.pinot.core.io.writer.impl.v1.VarByteChunkSingleValueWriter; import com.linkedin.pinot.core.segment.memory.PinotDataBuffer; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.Charset; /** * Reader class for data written out by {@link VarByteChunkSingleValueWriter}. * For data layout, please refer to the documentation for {@link VarByteChunkSingleValueWriter} */ public class VarByteChunkSingleValueReader extends BaseChunkSingleValueReader { private static final Charset UTF_8 = Charset.forName("UTF-8"); private final int _maxChunkSize; // Thread local (reusable) byte[] to read bytes from data file. private final ThreadLocal<byte[]> _reusableBytes = new ThreadLocal<byte[]>() { @Override protected byte[] initialValue() { return new byte[_lengthOfLongestEntry]; } }; /** * Constructor for the class. * * @param pinotDataBuffer Data buffer to read from * @param uncompressor Chunk uncompressor * @throws IOException */ public VarByteChunkSingleValueReader(PinotDataBuffer pinotDataBuffer, ChunkDecompressor uncompressor) throws IOException { super(pinotDataBuffer, uncompressor); int chunkHeaderSize = _numDocsPerChunk * INT_SIZE; _maxChunkSize = chunkHeaderSize + (_lengthOfLongestEntry * _numDocsPerChunk); } @Override public String getString(int row, ChunkReaderContext context) { int chunkRowId = row % _numDocsPerChunk; ByteBuffer chunkBuffer = getChunkForRow(row, context); int rowOffset = chunkBuffer.getInt(chunkRowId * INT_SIZE); int nextRowOffset; if (chunkRowId == _numDocsPerChunk - 1) { // Last row in this trunk. nextRowOffset = chunkBuffer.limit(); } else { nextRowOffset = chunkBuffer.getInt((chunkRowId + 1) * INT_SIZE); // For incomplete chunks, the next string's offset will be 0 as row offset for absent rows are 0. if (nextRowOffset == 0) { nextRowOffset = chunkBuffer.limit(); } } int length = nextRowOffset - rowOffset; ByteBuffer byteBuffer = chunkBuffer.duplicate(); byteBuffer.position(rowOffset); byte[] bytes = _reusableBytes.get(); byteBuffer.get(bytes, 0, length); return new String(bytes, 0, length, UTF_8); } @Override public ChunkReaderContext createContext() { return new ChunkReaderContext(_maxChunkSize); } }