/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.llap.io.metadata; import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.cache.EvictionDispatcher; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.ql.io.SyntheticFileId; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; import org.apache.orc.DataReader; import org.apache.orc.OrcProto; import org.apache.orc.OrcProto.RowIndexEntry; import org.apache.orc.StripeInformation; import org.apache.orc.TypeDescription; import org.apache.orc.impl.OrcIndex; public class OrcStripeMetadata extends LlapCacheableBuffer implements ConsumerStripeMetadata { private final TypeDescription schema; private final OrcBatchKey stripeKey; private final List<OrcProto.ColumnEncoding> encodings; private final List<OrcProto.Stream> streams; private final String writerTimezone; private final long rowCount; private OrcIndex rowIndex; private OrcFile.WriterVersion writerVersion; private final int estimatedMemUsage; private final static HashMap<Class<?>, ObjectEstimator> SIZE_ESTIMATORS; private final static ObjectEstimator SIZE_ESTIMATOR; static { OrcStripeMetadata osm = createDummy(new SyntheticFileId()); SIZE_ESTIMATORS = IncrementalObjectSizeEstimator.createEstimators(osm); IncrementalObjectSizeEstimator.addEstimator( "com.google.protobuf.LiteralByteString", SIZE_ESTIMATORS); // Add long for the regular file ID estimation. IncrementalObjectSizeEstimator.createEstimators(Long.class, SIZE_ESTIMATORS); SIZE_ESTIMATOR = SIZE_ESTIMATORS.get(OrcStripeMetadata.class); } public OrcStripeMetadata(OrcBatchKey stripeKey, DataReader mr, StripeInformation stripe, boolean[] includes, boolean[] sargColumns, TypeDescription schema, OrcFile.WriterVersion writerVersion) throws IOException { this.schema = schema; this.stripeKey = stripeKey; OrcProto.StripeFooter footer = mr.readStripeFooter(stripe); streams = footer.getStreamsList(); encodings = footer.getColumnsList(); writerTimezone = footer.getWriterTimezone(); rowCount = stripe.getNumberOfRows(); rowIndex = mr.readRowIndex(stripe, schema, footer, true, includes, null, sargColumns, writerVersion, null, null); estimatedMemUsage = SIZE_ESTIMATOR.estimate(this, SIZE_ESTIMATORS); this.writerVersion = writerVersion; } private OrcStripeMetadata(Object id) { stripeKey = new OrcBatchKey(id, 0, 0); encodings = new ArrayList<>(); streams = new ArrayList<>(); writerTimezone = ""; schema = TypeDescription.fromString("struct<x:int>"); rowCount = estimatedMemUsage = 0; } @VisibleForTesting public static OrcStripeMetadata createDummy(Object id) { OrcStripeMetadata dummy = new OrcStripeMetadata(id); dummy.encodings.add(OrcProto.ColumnEncoding.getDefaultInstance()); dummy.streams.add(OrcProto.Stream.getDefaultInstance()); OrcProto.RowIndex ri = OrcProto.RowIndex.newBuilder().addEntry( OrcProto.RowIndexEntry.newBuilder().addPositions(1).setStatistics( OrcFileMetadata.createStatsDummy())).build(); OrcProto.BloomFilterIndex bfi = OrcProto.BloomFilterIndex.newBuilder().addBloomFilter( OrcProto.BloomFilter.newBuilder().addBitset(0)).build(); dummy.rowIndex = new OrcIndex( new OrcProto.RowIndex[] { ri }, new OrcProto.Stream.Kind[] { OrcProto.Stream.Kind.BLOOM_FILTER_UTF8 }, new OrcProto.BloomFilterIndex[] { bfi }); return dummy; } public boolean hasAllIndexes(boolean[] includes) { for (int i = 0; i < includes.length; ++i) { if (includes[i] && rowIndex.getRowGroupIndex()[i] == null) return false; } return true; } public void loadMissingIndexes(DataReader mr, StripeInformation stripe, boolean[] includes, boolean[] sargColumns) throws IOException { // Do not loose the old indexes. Create a super set includes OrcProto.RowIndex[] existing = getRowIndexes(); boolean superset[] = new boolean[Math.max(existing.length, includes.length)]; for (int i = 0; i < includes.length; i++) { superset[i] = includes[i]; } for (int i = 0; i < existing.length; i++) { superset[i] = superset[i] || (existing[i] != null); } // TODO: should we save footer to avoid a read here? rowIndex = mr.readRowIndex(stripe, schema, null, true, includes, rowIndex.getRowGroupIndex(), sargColumns, writerVersion, rowIndex.getBloomFilterKinds(), rowIndex.getBloomFilterIndex()); // TODO: theoretically, we should re-estimate memory usage here and update memory manager } public int getStripeIx() { return stripeKey.stripeIx; } public OrcProto.RowIndex[] getRowIndexes() { return rowIndex.getRowGroupIndex(); } public OrcProto.Stream.Kind[] getBloomFilterKinds() { return rowIndex.getBloomFilterKinds(); } public OrcProto.BloomFilterIndex[] getBloomFilterIndexes() { return rowIndex.getBloomFilterIndex(); } public List<OrcProto.ColumnEncoding> getEncodings() { return encodings; } public List<OrcProto.Stream> getStreams() { return streams; } public String getWriterTimezone() { return writerTimezone; } @Override public long getMemoryUsage() { return estimatedMemUsage; } @Override public void notifyEvicted(EvictionDispatcher evictionDispatcher) { evictionDispatcher.notifyEvicted(this); } @Override protected boolean invalidate() { return true; } @Override protected boolean isLocked() { return false; } public OrcBatchKey getKey() { return stripeKey; } public long getRowCount() { return rowCount; } @VisibleForTesting public void resetRowIndex() { rowIndex = null; } @Override public RowIndexEntry getRowIndexEntry(int colIx, int rgIx) { return rowIndex.getRowGroupIndex()[colIx].getEntry(rgIx); } @Override public boolean supportsRowIndexes() { return true; } }