/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.llap.cache; import static org.junit.Assert.*; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.List; import java.util.ArrayList; import java.util.LinkedHashSet; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.orc.DataReader; import org.apache.orc.OrcFile; import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata; import org.apache.orc.impl.OrcIndex; import org.apache.orc.StripeInformation; import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.orc.OrcProto; import org.junit.Test; import org.mockito.Mockito; import com.google.protobuf.CodedOutputStream; public class TestIncrementalObjectSizeEstimator { private static final Logger LOG = LoggerFactory.getLogger(TestIncrementalObjectSizeEstimator.class); private static class DummyMetadataReader implements DataReader { public boolean doStreamStep = false; public boolean isEmpty; @Override public void open() throws IOException { } @Override public OrcIndex readRowIndex(StripeInformation stripe, TypeDescription fileSchema, OrcProto.StripeFooter footer, boolean ignoreNonUtf8BloomFilter, boolean[] included, OrcProto.RowIndex[] indexes, boolean[] sargColumns, OrcFile.WriterVersion version, OrcProto.Stream.Kind[] bloomFilterKinds, OrcProto.BloomFilterIndex[] bloomFilterIndices ) throws IOException { if (isEmpty) { return new OrcIndex(new OrcProto.RowIndex[] { }, bloomFilterKinds, new OrcProto.BloomFilterIndex[] { }); } OrcProto.ColumnStatistics cs = OrcProto.ColumnStatistics.newBuilder() .setBucketStatistics(OrcProto.BucketStatistics.newBuilder().addCount(0)) .setStringStatistics(OrcProto.StringStatistics.newBuilder().setMaximum("zzz").setMinimum("aaa")) .setBinaryStatistics(OrcProto.BinaryStatistics.newBuilder().setSum(5)) .setDateStatistics(OrcProto.DateStatistics.newBuilder().setMinimum(4545).setMaximum(6656)) .setDecimalStatistics(OrcProto.DecimalStatistics.newBuilder().setMaximum("zzz").setMinimum("aaa")) .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder().setMinimum(0.5).setMaximum(1.5)) .setIntStatistics(OrcProto.IntegerStatistics.newBuilder().setMaximum(10).setMinimum(5)) .setTimestampStatistics(OrcProto.TimestampStatistics.newBuilder().setMaximum(10)).build(); OrcProto.RowIndex ri = OrcProto.RowIndex.newBuilder() .addEntry(OrcProto.RowIndexEntry.newBuilder().addPositions(1)) .addEntry(OrcProto.RowIndexEntry.newBuilder().addPositions(0).addPositions(2).setStatistics(cs)) .build(); OrcProto.RowIndex ri2 = OrcProto.RowIndex.newBuilder() .addEntry(OrcProto.RowIndexEntry.newBuilder().addPositions(3)) .build(); OrcProto.BloomFilterIndex bfi = OrcProto.BloomFilterIndex.newBuilder().addBloomFilter( OrcProto.BloomFilter.newBuilder().addBitset(0).addBitset(1)).build(); if (doStreamStep) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); CodedOutputStream cos = CodedOutputStream.newInstance(baos); ri.writeTo(cos); cos.flush(); ri = OrcProto.RowIndex.newBuilder().mergeFrom(baos.toByteArray()).build(); baos = new ByteArrayOutputStream(); cos = CodedOutputStream.newInstance(baos); ri2.writeTo(cos); cos.flush(); ri2 = OrcProto.RowIndex.newBuilder().mergeFrom(baos.toByteArray()).build(); baos = new ByteArrayOutputStream(); cos = CodedOutputStream.newInstance(baos); bfi.writeTo(cos); cos.flush(); bfi = OrcProto.BloomFilterIndex.newBuilder().mergeFrom(baos.toByteArray()).build(); } return new OrcIndex( new OrcProto.RowIndex[] { ri, ri2 }, bloomFilterKinds, new OrcProto.BloomFilterIndex[] { bfi }); } @Override public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { OrcProto.StripeFooter.Builder fb = OrcProto.StripeFooter.newBuilder(); if (!isEmpty) { fb.addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(20).setKind(OrcProto.Stream.Kind.LENGTH)) .addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(40).setKind(OrcProto.Stream.Kind.DATA)) .addColumns(OrcProto.ColumnEncoding.newBuilder().setDictionarySize(10).setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)); } OrcProto.StripeFooter footer = fb.build(); if (doStreamStep) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); CodedOutputStream cos = CodedOutputStream.newInstance(baos); footer.writeTo(cos); cos.flush(); footer = OrcProto.StripeFooter.newBuilder().mergeFrom(baos.toByteArray()).build(); } return footer; } @Override public DiskRangeList readFileData(DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException { return null; } @Override public boolean isTrackingDiskRanges() { return false; } @Override public void releaseBuffer(ByteBuffer toRelease) { } @Override public DataReader clone() { return null; } @Override public void close() throws IOException { } } @Test public void testMetadata() throws IOException { // Mostly tests that it doesn't crash. OrcStripeMetadata osm = OrcStripeMetadata.createDummy(0); HashMap<Class<?>, ObjectEstimator> map = IncrementalObjectSizeEstimator.createEstimators(osm); IncrementalObjectSizeEstimator.addEstimator("com.google.protobuf.LiteralByteString", map); ObjectEstimator root = map.get(OrcStripeMetadata.class); LOG.info("Estimated " + root.estimate(osm, map) + " for a dummy OSM"); OrcBatchKey stripeKey = null; DummyMetadataReader mr = new DummyMetadataReader(); mr.doStreamStep = false; mr.isEmpty = true; StripeInformation si = Mockito.mock(StripeInformation.class); Mockito.when(si.getNumberOfRows()).thenReturn(0L); osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null); LOG.info("Estimated " + root.estimate(osm, map) + " for an empty OSM"); mr.doStreamStep = true; osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null); LOG.info("Estimated " + root.estimate(osm, map) + " for an empty OSM after serde"); mr.isEmpty = false; stripeKey = new OrcBatchKey(0, 0, 0); osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null); LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM"); osm.resetRowIndex(); LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM w/o row index"); mr.doStreamStep = true; osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null); LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM after serde"); osm.resetRowIndex(); LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM w/o row index after serde"); OrcFileMetadata ofm = OrcFileMetadata.createDummy(0); map = IncrementalObjectSizeEstimator.createEstimators(ofm); IncrementalObjectSizeEstimator.addEstimator("com.google.protobuf.LiteralByteString", map); root = map.get(OrcFileMetadata.class); LOG.info("Estimated " + root.estimate(ofm, map) + " for a dummy OFM"); } private static class Struct { Integer i; int j = 0; LinkedHashSet<Object> list2; List<Object> list; } private static class Struct2 { Struct2 next; Struct2 prev; Struct2 top; } @Test public void testSimpleTypes() { JavaDataModel memModel = JavaDataModel.get(); int intSize = runEstimate(new Integer(0), memModel, null); runEstimate(new String(""), memModel, "empty string"); runEstimate(new String("foobarzzzzzzzzzzzzzz"), memModel, null); List<Object> list = new ArrayList<Object>(0); runEstimate(list, memModel, "empty ArrayList"); list.add(new String("zzz")); runEstimate(list, memModel, "ArrayList - one string"); list.add(new Integer(5)); list.add(new Integer(6)); int arrayListSize = runEstimate(list, memModel, "ArrayList - 3 elements"); LinkedHashSet<Object> list2 = new LinkedHashSet<Object>(0); runEstimate(list2, memModel, "empty LinkedHashSet"); list2.add(new String("zzzz")); runEstimate(list2, memModel, "LinkedHashSet - one string"); list2.add(new Integer(7)); list2.add(new Integer(4)); int lhsSize = runEstimate(list2, memModel, "LinkedHashSet - 3 elements"); Struct struct = new Struct(); int structSize = runEstimate(struct, memModel, "Struct - empty"); struct.i = 10; int structSize2 = runEstimate(struct, memModel, "Struct - one reference"); assertEquals(intSize + structSize, structSize2); struct.list = list; int structSize3 = runEstimate(struct, memModel, "Struct - with ArrayList"); assertEquals(arrayListSize + structSize2, structSize3); struct.list2 = list2; int structSize4 = runEstimate(struct, memModel, "Struct - with LinkedHashSet"); assertEquals(lhsSize + structSize3, structSize4); Struct2 struct2 = new Struct2(); int recSize1 = runEstimate(struct2, memModel, "recursive struct - empty"); struct2.next = new Struct2(); struct2.top = new Struct2(); int recSize2 = runEstimate(struct2, memModel, "recursive struct - no ring"); assertEquals(recSize1 * 3, recSize2); struct2.next.prev = struct2; int recSize3 = runEstimate(struct2, memModel, "recursive struct - ring added"); assertEquals(recSize2, recSize3); } private int runEstimate(Object obj, JavaDataModel memModel, String desc) { HashMap<Class<?>, ObjectEstimator> map = IncrementalObjectSizeEstimator.createEstimators(obj); ObjectEstimator root = map.get(obj.getClass()); int estimate = root.estimate(obj, map); LOG.info("Estimated " + estimate + " for " + (desc == null ? obj.getClass().getName() : desc)); return estimate; } }