/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.orc.reader; import com.facebook.presto.orc.OrcCorruptionException; import com.facebook.presto.orc.StreamDescriptor; import com.facebook.presto.orc.metadata.ColumnEncoding; import com.facebook.presto.orc.stream.BooleanInputStream; import com.facebook.presto.orc.stream.InputStreamSource; import com.facebook.presto.orc.stream.InputStreamSources; import com.facebook.presto.orc.stream.LongInputStream; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilderStatus; import com.facebook.presto.spi.type.Type; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; import java.util.Arrays; import java.util.List; import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA; import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA; import static com.facebook.presto.orc.metadata.Stream.StreamKind.IN_DICTIONARY; import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT; import static com.facebook.presto.orc.stream.MissingInputStreamSource.missingStreamSource; import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; public class LongDictionaryStreamReader implements StreamReader { private final StreamDescriptor streamDescriptor; private int readOffset; private int nextBatchSize; @Nonnull private InputStreamSource<BooleanInputStream> presentStreamSource = missingStreamSource(BooleanInputStream.class); @Nullable private BooleanInputStream presentStream; private boolean[] nullVector = new boolean[0]; @Nonnull private InputStreamSource<LongInputStream> dictionaryDataStreamSource = missingStreamSource(LongInputStream.class); private int dictionarySize; @Nonnull private long[] dictionary = new long[0]; @Nonnull private InputStreamSource<BooleanInputStream> inDictionaryStreamSource = missingStreamSource(BooleanInputStream.class); @Nullable private BooleanInputStream inDictionaryStream; private boolean[] inDictionary = new boolean[0]; @Nonnull private InputStreamSource<LongInputStream> dataStreamSource; @Nullable private LongInputStream dataStream; private long[] dataVector = new long[0]; private boolean dictionaryOpen; private boolean rowGroupOpen; public LongDictionaryStreamReader(StreamDescriptor streamDescriptor) { this.streamDescriptor = requireNonNull(streamDescriptor, "stream is null"); } @Override public void prepareNextRead(int batchSize) { readOffset += nextBatchSize; nextBatchSize = batchSize; } @Override public Block readBlock(Type type) throws IOException { if (!rowGroupOpen) { openRowGroup(); } if (readOffset > 0) { if (presentStream != null) { // skip ahead the present bit reader, but count the set bits // and use this as the skip size for the length reader readOffset = presentStream.countBitsSet(readOffset); } if (inDictionaryStream != null) { inDictionaryStream.skip(readOffset); } if (readOffset > 0) { if (dataStream == null) { throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present"); } dataStream.skip(readOffset); } } if (nullVector.length < nextBatchSize) { nullVector = new boolean[nextBatchSize]; } if (dataVector.length < nextBatchSize) { dataVector = new long[nextBatchSize]; } if (presentStream == null) { if (dataStream == null) { throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present"); } Arrays.fill(nullVector, false); dataStream.nextLongVector(nextBatchSize, dataVector); } else { int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector); if (nullValues != nextBatchSize) { if (dataStream == null) { throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present"); } dataStream.nextLongVector(nextBatchSize, dataVector, nullVector); } } if (inDictionary.length < nextBatchSize) { inDictionary = new boolean[nextBatchSize]; } if (inDictionaryStream == null) { Arrays.fill(inDictionary, true); } else { inDictionaryStream.getSetBits(nextBatchSize, inDictionary, nullVector); } BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), nextBatchSize); for (int i = 0; i < nextBatchSize; i++) { if (nullVector[i]) { builder.appendNull(); } else if (inDictionary[i]) { type.writeLong(builder, dictionary[((int) dataVector[i])]); } else { type.writeLong(builder, dataVector[i]); } } readOffset = 0; nextBatchSize = 0; return builder.build(); } private void openRowGroup() throws IOException { // read the dictionary if (!dictionaryOpen && dictionarySize > 0) { if (dictionary.length < dictionarySize) { dictionary = new long[dictionarySize]; } LongInputStream dictionaryStream = dictionaryDataStreamSource.openStream(); if (dictionaryStream == null) { throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but data stream is not present"); } dictionaryStream.nextLongVector(dictionarySize, dictionary); } dictionaryOpen = true; presentStream = presentStreamSource.openStream(); inDictionaryStream = inDictionaryStreamSource.openStream(); dataStream = dataStreamSource.openStream(); rowGroupOpen = true; } @Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { dictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, LongInputStream.class); dictionarySize = encoding.get(streamDescriptor.getStreamId()).getDictionarySize(); dictionaryOpen = false; inDictionaryStreamSource = missingStreamSource(BooleanInputStream.class); presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(LongInputStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; } @Override public void startRowGroup(InputStreamSources dataStreamSources) throws IOException { presentStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, PRESENT, BooleanInputStream.class); inDictionaryStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, IN_DICTIONARY, BooleanInputStream.class); dataStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, DATA, LongInputStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; } @Override public String toString() { return toStringHelper(this) .addValue(streamDescriptor) .toString(); } }