/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.llap.io.metadata;
import com.google.common.annotations.VisibleForTesting;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator;
import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator;
import org.apache.hadoop.hive.llap.cache.EvictionDispatcher;
import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer;
import org.apache.hadoop.hive.ql.io.SyntheticFileId;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.orc.CompressionKind;
import org.apache.orc.FileMetadata;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
import org.apache.orc.StripeInformation;
import org.apache.orc.TypeDescription;
import org.apache.orc.impl.ReaderImpl;
/** ORC file metadata. Currently contains some duplicate info due to how different parts
* of ORC use different info. Ideally we would get rid of protobuf structs in code beyond reading,
* or instead use protobuf structs everywhere instead of the mix of things like now.
*/
public final class OrcFileMetadata extends LlapCacheableBuffer
implements FileMetadata, ConsumerFileMetadata {
private final List<StripeInformation> stripes;
private final List<Integer> versionList;
private final List<OrcProto.StripeStatistics> stripeStats;
private final List<OrcProto.Type> types;
private final List<OrcProto.ColumnStatistics> fileStats;
private final Object fileKey;
private final CompressionKind compressionKind;
private final int rowIndexStride;
private final int compressionBufferSize;
private final int metadataSize;
private final int writerVersionNum;
private final long contentLength;
private final long numberOfRows;
private final boolean isOriginalFormat;
private final int estimatedMemUsage;
private final static HashMap<Class<?>, ObjectEstimator> SIZE_ESTIMATORS;
private final static ObjectEstimator SIZE_ESTIMATOR;
static {
OrcFileMetadata ofm = createDummy(new SyntheticFileId());
SIZE_ESTIMATORS = IncrementalObjectSizeEstimator.createEstimators(ofm);
IncrementalObjectSizeEstimator.addEstimator(
"com.google.protobuf.LiteralByteString", SIZE_ESTIMATORS);
// Add long for the regular file ID estimation.
IncrementalObjectSizeEstimator.createEstimators(Long.class, SIZE_ESTIMATORS);
SIZE_ESTIMATOR = SIZE_ESTIMATORS.get(OrcFileMetadata.class);
}
@VisibleForTesting
public static OrcFileMetadata createDummy(Object fileKey) {
OrcFileMetadata ofm = new OrcFileMetadata(fileKey);
ofm.stripes.add(new ReaderImpl.StripeInformationImpl(
OrcProto.StripeInformation.getDefaultInstance()));
ofm.fileStats.add(OrcProto.ColumnStatistics.getDefaultInstance());
ofm.stripeStats.add(OrcProto.StripeStatistics.newBuilder().addColStats(createStatsDummy()).build());
ofm.types.add(OrcProto.Type.newBuilder().addFieldNames("a").addSubtypes(0).build());
ofm.versionList.add(0);
return ofm;
}
static OrcProto.ColumnStatistics.Builder createStatsDummy() {
return OrcProto.ColumnStatistics.newBuilder().setBucketStatistics(
OrcProto.BucketStatistics.newBuilder().addCount(0)).setStringStatistics(
OrcProto.StringStatistics.newBuilder().setMaximum("zzz"));
}
// Ctor for memory estimation and tests
private OrcFileMetadata(Object fileKey) {
this.fileKey = fileKey;
stripes = new ArrayList<StripeInformation>();
versionList = new ArrayList<Integer>();
fileStats = new ArrayList<>();
stripeStats = new ArrayList<>();
types = new ArrayList<>();
writerVersionNum = metadataSize = compressionBufferSize = rowIndexStride = 0;
contentLength = numberOfRows = 0;
estimatedMemUsage = 0;
isOriginalFormat = false;
compressionKind = CompressionKind.NONE;
}
public OrcFileMetadata(Object fileKey, Reader reader) {
this.fileKey = fileKey;
this.stripeStats = reader.getOrcProtoStripeStatistics();
this.compressionKind = reader.getCompressionKind();
this.compressionBufferSize = reader.getCompressionSize();
this.stripes = reader.getStripes();
this.isOriginalFormat = OrcInputFormat.isOriginal(reader);
this.writerVersionNum = reader.getWriterVersion().getId();
this.versionList = reader.getVersionList();
this.metadataSize = reader.getMetadataSize();
this.types = reader.getTypes();
this.rowIndexStride = reader.getRowIndexStride();
this.contentLength = reader.getContentLength();
this.numberOfRows = reader.getNumberOfRows();
this.fileStats = reader.getOrcProtoFileStatistics();
this.estimatedMemUsage = SIZE_ESTIMATOR.estimate(this, SIZE_ESTIMATORS);
}
// LlapCacheableBuffer
@Override
public void notifyEvicted(EvictionDispatcher evictionDispatcher) {
evictionDispatcher.notifyEvicted(this);
}
@Override
protected boolean invalidate() {
return true; // relies on GC, so it can always be evicted now.
}
@Override
public long getMemoryUsage() {
return estimatedMemUsage;
}
@Override
protected boolean isLocked() {
return false;
}
// FileMetadata
@Override
public List<OrcProto.Type> getTypes() {
return types;
}
@Override
public boolean isOriginalFormat() {
return isOriginalFormat;
}
@Override
public List<StripeInformation> getStripes() {
return stripes;
}
@Override
public CompressionKind getCompressionKind() {
return compressionKind;
}
@Override
public int getCompressionBufferSize() {
return compressionBufferSize;
}
@Override
public int getRowIndexStride() {
return rowIndexStride;
}
@Override
public int getColumnCount() {
return types.size();
}
@Override
public int getFlattenedColumnCount() {
return types.get(0).getSubtypesCount();
}
@Override
public Object getFileKey() {
return fileKey;
}
@Override
public List<Integer> getVersionList() {
return versionList;
}
@Override
public int getMetadataSize() {
return metadataSize;
}
@Override
public int getWriterVersionNum() {
return writerVersionNum;
}
@Override
public List<OrcProto.StripeStatistics> getStripeStats() {
return stripeStats;
}
@Override
public long getContentLength() {
return contentLength;
}
@Override
public long getNumberOfRows() {
return numberOfRows;
}
@Override
public List<OrcProto.ColumnStatistics> getFileStats() {
return fileStats;
}
@Override
public int getStripeCount() {
return stripes.size();
}
public TypeDescription getSchema() {
return OrcUtils.convertTypeFromProtobuf(this.types, 0);
}
}