/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.metastore; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.events.AddPartitionEvent; import org.apache.hadoop.hive.metastore.events.AlterTableEvent; import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent; import org.apache.hadoop.hive.metastore.events.CreateTableEvent; import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; import org.apache.hadoop.hive.metastore.events.DropPartitionEvent; import org.apache.hadoop.hive.metastore.events.DropTableEvent; import org.apache.hadoop.hive.metastore.events.ListenerEvent; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.shims.ShimLoader; /** * TestHiveMetaStoreStatsMerge. * calls in {@link org.apache.hadoop.hive.metastore.HiveMetaStore} */ public class TestHiveMetaStoreStatsMerge extends TestCase { private HiveConf hiveConf; private HiveMetaStoreClient msc; private final Database db = new Database(); private Table table = new Table(); private static final String dbName = "hive3253"; private static final String tblName = "tmptbl"; @Override protected void setUp() throws Exception { super.setUp(); System.setProperty("hive.metastore.event.listeners", DummyListener.class.getName()); int port = MetaStoreUtils.findFreePort(); MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge()); hiveConf = new HiveConf(this.getClass()); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); SessionState.start(new CliSessionState(hiveConf)); msc = new HiveMetaStoreClient(hiveConf); msc.dropDatabase(dbName, true, true); db.setName(dbName); Map<String, String> tableParams = new HashMap<String, String>(); tableParams.put("a", "string"); List<FieldSchema> cols = new ArrayList<FieldSchema>(); cols.add(new FieldSchema("a", "string", "")); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(cols); sd.setCompressed(false); sd.setParameters(tableParams); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tblName); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName()); sd.setInputFormat(HiveInputFormat.class.getName()); sd.setOutputFormat(HiveOutputFormat.class.getName()); table.setDbName(dbName); table.setTableName(tblName); table.setParameters(tableParams); table.setSd(sd); DummyListener.notifyList.clear(); } @Override protected void tearDown() throws Exception { super.tearDown(); } public void testStatsMerge() throws Exception { int listSize = 0; List<ListenerEvent> notifyList = DummyListener.notifyList; assertEquals(notifyList.size(), listSize); msc.createDatabase(db); listSize++; assertEquals(listSize, notifyList.size()); CreateDatabaseEvent dbEvent = (CreateDatabaseEvent)(notifyList.get(listSize - 1)); assert dbEvent.getStatus(); msc.createTable(table); listSize++; assertEquals(notifyList.size(), listSize); CreateTableEvent tblEvent = (CreateTableEvent)(notifyList.get(listSize - 1)); assert tblEvent.getStatus(); table = msc.getTable(dbName, tblName); ColumnStatistics cs = new ColumnStatistics(); ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName); cs.setStatsDesc(desc); ColumnStatisticsObj obj = new ColumnStatisticsObj(); obj.setColName("a"); obj.setColType("string"); ColumnStatisticsData data = new ColumnStatisticsData(); StringColumnStatsData scsd = new StringColumnStatsData(); scsd.setAvgColLen(10); scsd.setMaxColLen(20); scsd.setNumNulls(30); scsd.setNumDVs(123); scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"); data.setStringStats(scsd); obj.setStatsData(data); cs.addToStatsObj(obj); List<ColumnStatistics> colStats = new ArrayList<>(); colStats.add(cs); SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); msc.setPartitionColumnStatistics(request); List<String> colNames = new ArrayList<>(); colNames.add("a"); StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0) .getStatsData().getStringStats(); assertEquals(getScsd.getNumDVs(), 123); cs = new ColumnStatistics(); scsd = new StringColumnStatsData(); scsd.setAvgColLen(20); scsd.setMaxColLen(5); scsd.setNumNulls(70); scsd.setNumDVs(456); scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}"); data.setStringStats(scsd); obj.setStatsData(data); cs.addToStatsObj(obj); request = new SetPartitionsStatsRequest(colStats); request.setNeedMerge(true); msc.setPartitionColumnStatistics(request); getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0) .getStatsData().getStringStats(); assertEquals(getScsd.getAvgColLen(), 20.0); assertEquals(getScsd.getMaxColLen(), 20); assertEquals(getScsd.getNumNulls(), 100); // since metastore is ObjectStore, we use the max function to merge. assertEquals(getScsd.getNumDVs(), 456); } }