/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hadoop.hive.metastore.hbase; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.api.Table; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * */ public class TestHBaseStoreBitVector { private static final Logger LOG = LoggerFactory.getLogger(TestHBaseStoreBitVector.class.getName()); static Map<String, String> emptyParameters = new HashMap<String, String>(); // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key static final int NUM_PART_KEYS = 1; static final int NUM_PARTITIONS = 5; static final String DB = "db"; static final String TBL = "tbl"; static final String COL = "col"; static final String PART_KEY_PREFIX = "part"; static final String PART_VAL_PREFIX = "val"; static final String PART_KV_SEPARATOR = "="; static final List<String> PART_KEYS = new ArrayList<String>(); static final List<String> PART_VALS = new ArrayList<String>(); // Initialize mock partitions static { for (int i = 1; i <= NUM_PART_KEYS; i++) { PART_KEYS.add(PART_KEY_PREFIX + i); } for (int i = 1; i <= NUM_PARTITIONS; i++) { PART_VALS.add(PART_VAL_PREFIX + i); } } static final long DEFAULT_TIME = System.currentTimeMillis(); static final String PART_KEY = "part"; static final String LONG_COL = "longCol"; static final String LONG_TYPE = "long"; static final String INT_TYPE = "int"; static final String INT_VAL = "1234"; static final String DOUBLE_COL = "doubleCol"; static final String DOUBLE_TYPE = "double"; static final String DOUBLE_VAL = "3.1415"; static final String STRING_COL = "stringCol"; static final String STRING_TYPE = "string"; static final String STRING_VAL = "stringval"; static final String DECIMAL_COL = "decimalCol"; static final String DECIMAL_TYPE = "decimal(5,3)"; static final String DECIMAL_VAL = "12.123"; static List<ColumnStatisticsObj> longColStatsObjs = new ArrayList<ColumnStatisticsObj>( NUM_PARTITIONS); static List<ColumnStatisticsObj> doubleColStatsObjs = new ArrayList<ColumnStatisticsObj>( NUM_PARTITIONS); static List<ColumnStatisticsObj> stringColStatsObjs = new ArrayList<ColumnStatisticsObj>( NUM_PARTITIONS); static List<ColumnStatisticsObj> decimalColStatsObjs = new ArrayList<ColumnStatisticsObj>( NUM_PARTITIONS); @Rule public ExpectedException thrown = ExpectedException.none(); @Mock HTableInterface htable; SortedMap<String, Cell> rows = new TreeMap<>(); HBaseStore store; @BeforeClass public static void beforeTest() { // All data intitializations populateMockStats(); } private static void populateMockStats() { ColumnStatisticsObj statsObj; // Add NUM_PARTITIONS ColumnStatisticsObj of each type // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it for (int i = 0; i < NUM_PARTITIONS; i++) { statsObj = mockLongStats(i); longColStatsObjs.add(statsObj); statsObj = mockDoubleStats(i); doubleColStatsObjs.add(statsObj); statsObj = mockStringStats(i); stringColStatsObjs.add(statsObj); statsObj = mockDecimalStats(i); decimalColStatsObjs.add(statsObj); } } private static ColumnStatisticsObj mockLongStats(int i) { long high = 120938479124L + 100*i; long low = -12341243213412124L - 50*i; long nulls = 23 + i; long dVs = 213L + 10*i; String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{1, 2, 3, 4, 5, 6, 7, 8}"; ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); colStatsObj.setColName(LONG_COL); colStatsObj.setColType(LONG_TYPE); ColumnStatisticsData data = new ColumnStatisticsData(); LongColumnStatsData longData = new LongColumnStatsData(); longData.setHighValue(high); longData.setLowValue(low); longData.setNumNulls(nulls); longData.setNumDVs(dVs); longData.setBitVectors(bitVectors); data.setLongStats(longData); colStatsObj.setStatsData(data); return colStatsObj; } private static ColumnStatisticsObj mockDoubleStats(int i) { double high = 123423.23423 + 100*i; double low = 0.00001234233 - 50*i; long nulls = 92 + i; long dVs = 1234123421L + 10*i; String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 2, 3, 4, 5, 6, 7, 8}"; ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); colStatsObj.setColName(DOUBLE_COL); colStatsObj.setColType(DOUBLE_TYPE); ColumnStatisticsData data = new ColumnStatisticsData(); DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); doubleData.setHighValue(high); doubleData.setLowValue(low); doubleData.setNumNulls(nulls); doubleData.setNumDVs(dVs); doubleData.setBitVectors(bitVectors); data.setDoubleStats(doubleData); colStatsObj.setStatsData(data); return colStatsObj; } private static ColumnStatisticsObj mockStringStats(int i) { long maxLen = 1234 + 10*i; double avgLen = 32.3 + i; long nulls = 987 + 10*i; long dVs = 906 + i; String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 3, 4, 5, 6, 7, 8}"; ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); colStatsObj.setColName(STRING_COL); colStatsObj.setColType(STRING_TYPE); ColumnStatisticsData data = new ColumnStatisticsData(); StringColumnStatsData stringData = new StringColumnStatsData(); stringData.setMaxColLen(maxLen); stringData.setAvgColLen(avgLen); stringData.setNumNulls(nulls); stringData.setNumDVs(dVs); stringData.setBitVectors(bitVectors); data.setStringStats(stringData); colStatsObj.setStatsData(data); return colStatsObj; } private static ColumnStatisticsObj mockDecimalStats(int i) { Decimal high = new Decimal(); high.setScale((short)3); String strHigh = String.valueOf(3876 + 100*i); high.setUnscaled(strHigh.getBytes()); Decimal low = new Decimal(); low.setScale((short)3); String strLow = String.valueOf(38 + i); low.setUnscaled(strLow.getBytes()); long nulls = 13 + i; long dVs = 923947293L + 100*i; String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}"; ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); colStatsObj.setColName(DECIMAL_COL); colStatsObj.setColType(DECIMAL_TYPE); ColumnStatisticsData data = new ColumnStatisticsData(); DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); decimalData.setHighValue(high); decimalData.setLowValue(low); decimalData.setNumNulls(nulls); decimalData.setNumDVs(dVs); decimalData.setBitVectors(bitVectors); data.setDecimalStats(decimalData); colStatsObj.setStatsData(data); return colStatsObj; } @AfterClass public static void afterTest() { } @Before public void init() throws IOException { MockitoAnnotations.initMocks(this); HiveConf conf = new HiveConf(); conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); store = MockUtils.init(conf, htable, rows); } @Test public void longTableStatistics() throws Exception { createMockTable(LONG_COL, LONG_TYPE); ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for table level stats ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = longColStatsObjs.get(0); LongColumnStatsData longData = obj.getStatsData().getLongStats(); // Add to DB stats.addToStatsObj(obj); store.updateTableColumnStatistics(stats); // Get from DB ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL)); // Compare ColumnStatisticsDesc Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); // Compare LongColumnStatsData LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors()); } @Test public void doubleTableStatistics() throws Exception { createMockTable(DOUBLE_COL, DOUBLE_TYPE); ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for table level stats ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = doubleColStatsObjs.get(0); DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); // Add to DB stats.addToStatsObj(obj); store.updateTableColumnStatistics(stats); // Get from DB ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL)); // Compare ColumnStatisticsDesc Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); // Compare DoubleColumnStatsData DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors()); } @Test public void stringTableStatistics() throws Exception { createMockTable(STRING_COL, STRING_TYPE); ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for table level stats ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = stringColStatsObjs.get(0); StringColumnStatsData stringData = obj.getStatsData().getStringStats(); // Add to DB stats.addToStatsObj(obj); store.updateTableColumnStatistics(stats); // Get from DB ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL)); // Compare ColumnStatisticsDesc Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); // Compare StringColumnStatsData StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors()); } @Test public void decimalTableStatistics() throws Exception { createMockTable(DECIMAL_COL, DECIMAL_TYPE); ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for table level stats ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = decimalColStatsObjs.get(0); DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); // Add to DB stats.addToStatsObj(obj); store.updateTableColumnStatistics(stats); // Get from DB ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL)); // Compare ColumnStatisticsDesc Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); // Compare DecimalColumnStatsData DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors()); } @Test public void longPartitionStatistics() throws Exception { createMockTableAndPartition(INT_TYPE, INT_VAL); // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for partition level stats ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = longColStatsObjs.get(0); LongColumnStatsData longData = obj.getStatsData().getLongStats(); // Add to DB stats.addToStatsObj(obj); List<String> parVals = new ArrayList<String>(); parVals.add(INT_VAL); store.updatePartitionColumnStatistics(stats, parVals); // Get from DB List<String> partNames = new ArrayList<String>(); partNames.add(desc.getPartName()); List<String> colNames = new ArrayList<String>(); colNames.add(obj.getColName()); List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); // Compare ColumnStatisticsDesc Assert.assertEquals(1, statsFromDB.size()); Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); // Compare LongColumnStatsData LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors()); } @Test public void doublePartitionStatistics() throws Exception { createMockTableAndPartition(DOUBLE_TYPE, DOUBLE_VAL); // Add partition stats for: DOUBLE_COL and partition: {PART_KEY, DOUBLE_VAL} to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for partition level stats ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DOUBLE_VAL); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = doubleColStatsObjs.get(0); DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); // Add to DB stats.addToStatsObj(obj); List<String> parVals = new ArrayList<String>(); parVals.add(DOUBLE_VAL); store.updatePartitionColumnStatistics(stats, parVals); // Get from DB List<String> partNames = new ArrayList<String>(); partNames.add(desc.getPartName()); List<String> colNames = new ArrayList<String>(); colNames.add(obj.getColName()); List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); // Compare ColumnStatisticsDesc Assert.assertEquals(1, statsFromDB.size()); Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); // Compare DoubleColumnStatsData DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors()); } @Test public void stringPartitionStatistics() throws Exception { createMockTableAndPartition(STRING_TYPE, STRING_VAL); // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for partition level stats ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = stringColStatsObjs.get(0); StringColumnStatsData stringData = obj.getStatsData().getStringStats(); // Add to DB stats.addToStatsObj(obj); List<String> parVals = new ArrayList<String>(); parVals.add(STRING_VAL); store.updatePartitionColumnStatistics(stats, parVals); // Get from DB List<String> partNames = new ArrayList<String>(); partNames.add(desc.getPartName()); List<String> colNames = new ArrayList<String>(); colNames.add(obj.getColName()); List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); // Compare ColumnStatisticsDesc Assert.assertEquals(1, statsFromDB.size()); Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); // Compare StringColumnStatsData StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors()); } @Test public void decimalPartitionStatistics() throws Exception { createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL); // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. ColumnStatistics stats = new ColumnStatistics(); // Get a default ColumnStatisticsDesc for partition level stats ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL); stats.setStatsDesc(desc); // Get one of the pre-created ColumnStatisticsObj ColumnStatisticsObj obj = decimalColStatsObjs.get(0); DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); // Add to DB stats.addToStatsObj(obj); List<String> parVals = new ArrayList<String>(); parVals.add(DECIMAL_VAL); store.updatePartitionColumnStatistics(stats, parVals); // Get from DB List<String> partNames = new ArrayList<String>(); partNames.add(desc.getPartName()); List<String> colNames = new ArrayList<String>(); colNames.add(obj.getColName()); List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); // Compare ColumnStatisticsDesc Assert.assertEquals(1, statsFromDB.size()); Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); // Compare ColumnStatisticsObj Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); // Compare ColumnStatisticsData Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); // Compare DecimalColumnStatsData DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors()); } private Table createMockTable(String name, String type) throws Exception { List<FieldSchema> cols = new ArrayList<FieldSchema>(); cols.add(new FieldSchema(name, type, "")); SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); Map<String, String> params = new HashMap<String, String>(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, serde, new ArrayList<String>(), new ArrayList<Order>(), params); int currentTime = (int)(System.currentTimeMillis() / 1000); Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null); store.createTable(table); return table; } private Table createMockTableAndPartition(String partType, String partVal) throws Exception { List<FieldSchema> cols = new ArrayList<FieldSchema>(); cols.add(new FieldSchema("col1", partType, "")); List<String> vals = new ArrayList<String>(); vals.add(partVal); SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); Map<String, String> params = new HashMap<String, String>(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); int currentTime = (int)(System.currentTimeMillis() / 1000); Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null); store.createTable(table); Partition part = new Partition(vals, DB, TBL, currentTime, currentTime, sd, emptyParameters); store.addPartition(part); return table; } /** * Returns a dummy table level ColumnStatisticsDesc with default values */ private ColumnStatisticsDesc getMockTblColStatsDesc() { ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); desc.setLastAnalyzed(DEFAULT_TIME); desc.setDbName(DB); desc.setTableName(TBL); desc.setIsTblLevel(true); return desc; } /** * Returns a dummy partition level ColumnStatisticsDesc */ private ColumnStatisticsDesc getMockPartColStatsDesc(String partKey, String partVal) { ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); desc.setLastAnalyzed(DEFAULT_TIME); desc.setDbName(DB); desc.setTableName(TBL); // part1=val1 desc.setPartName(partKey + PART_KV_SEPARATOR + partVal); desc.setIsTblLevel(false); return desc; } }