/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.txn.compactor.Cleaner;
import org.apache.hadoop.hive.ql.txn.compactor.Worker;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
/**
* This class resides in itests to facilitate running query using Tez engine, since the jars are
* fully loaded here, which is not the case if it stays in ql.
*/
public class TestAcidOnTez {
private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
File.separator + TestAcidOnTez.class.getCanonicalName()
+ "-" + System.currentTimeMillis()
).getPath().replaceAll("\\\\", "/");
private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse";
//bucket count for test tables; set it to 1 for easier debugging
private static int BUCKET_COUNT = 2;
@Rule
public TestName testName = new TestName();
private HiveConf hiveConf;
private Driver d;
private static enum Table {
ACIDTBL("acidTbl"),
ACIDTBLPART("acidTblPart"),
NONACIDORCTBL("nonAcidOrcTbl"),
NONACIDPART("nonAcidPart");
private final String name;
@Override
public String toString() {
return name;
}
Table(String name) {
this.name = name;
}
}
@Before
public void setUp() throws Exception {
tearDown();
hiveConf = new HiveConf(this.getClass());
hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR);
hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName());
hiveConf
.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
TxnDbUtil.setConfValues(hiveConf);
TxnDbUtil.prepDb();
File f = new File(TEST_WAREHOUSE_DIR);
if (f.exists()) {
FileUtil.fullyDelete(f);
}
if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) {
throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR);
}
SessionState.start(new SessionState(hiveConf));
d = new Driver(hiveConf);
dropTables();
runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc " + getTblProperties());
runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc " + getTblProperties());
runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc ");
runStatementOnDriver("create table " + Table.NONACIDPART + "(a int, b int) partitioned by (p string) stored as orc ");
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(1,2)");
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(3,4)");
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(5,6)");
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(7,8)");
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(9,10)");
runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2),(3,4),(5,6),(7,8),(9,10)");
}
/**
* this is to test differety types of Acid tables
*/
String getTblProperties() {
return "TBLPROPERTIES ('transactional'='true')";
}
private void dropTables() throws Exception {
for(Table t : Table.values()) {
runStatementOnDriver("drop table if exists " + t);
}
}
@After
public void tearDown() throws Exception {
try {
if (d != null) {
dropTables();
d.destroy();
d.close();
d = null;
}
TxnDbUtil.cleanDb();
} finally {
FileUtils.deleteDirectory(new File(TEST_DATA_DIR));
}
}
@Test
public void testMergeJoinOnMR() throws Exception {
testJoin("mr", "MergeJoin");
}
@Test
public void testMapJoinOnMR() throws Exception {
testJoin("mr", "MapJoin");
}
@Test
public void testMergeJoinOnTez() throws Exception {
testJoin("tez", "MergeJoin");
}
@Test
public void testMapJoinOnTez() throws Exception {
testJoin("tez", "MapJoin");
}
// Ideally test like this should be a qfile test. However, the explain output from qfile is always
// slightly different depending on where the test is run, specifically due to file size estimation
private void testJoin(String engine, String joinType) throws Exception {
HiveConf confForTez = new HiveConf(hiveConf); // make a clone of existing hive conf
HiveConf confForMR = new HiveConf(hiveConf); // make a clone of existing hive conf
if (engine.equals("tez")) {
setupTez(confForTez); // one-time setup to make query able to run with Tez
}
if (joinType.equals("MapJoin")) {
setupMapJoin(confForTez);
setupMapJoin(confForMR);
}
runQueries(engine, joinType, confForTez, confForMR);
// Perform compaction. Join result after compaction should still be the same
runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'");
TestTxnCommands2.runWorker(hiveConf);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
TestTxnCommands2.runCleaner(hiveConf);
runQueries(engine, joinType, confForTez, confForMR);
}
private void runQueries(String engine, String joinType, HiveConf confForTez, HiveConf confForMR) throws Exception {
List<String> queries = new ArrayList<String>();
queries.add("select count(*) from " + Table.ACIDTBL + " t1 join " + Table.ACIDTBL + " t2 on t1.a=t2.a");
queries.add("select count(*) from " + Table.ACIDTBL + " t1 join " + Table.NONACIDORCTBL + " t2 on t1.a=t2.a");
// more queries can be added here in the future to test acid joins
List<String> explain; // stores Explain output
int[][] expected = {{5}};
List<String> rs = null;
for (String query : queries) {
if (engine.equals("tez")) {
explain = runStatementOnDriver("explain " + query, confForTez);
if (joinType.equals("MergeJoin")) {
TestTxnCommands2.assertExplainHasString("Merge Join Operator", explain, "Didn't find " + joinType);
} else { // MapJoin
TestTxnCommands2.assertExplainHasString("Map Join Operator", explain, "Didn't find " + joinType);
}
rs = runStatementOnDriver(query, confForTez);
} else { // mr
explain = runStatementOnDriver("explain " + query, confForMR);
if (joinType.equals("MergeJoin")) {
TestTxnCommands2.assertExplainHasString(" Join Operator", explain, "Didn't find " + joinType);
} else { // MapJoin
TestTxnCommands2.assertExplainHasString("Map Join Operator", explain, "Didn't find " + joinType);
}
rs = runStatementOnDriver(query, confForMR);
}
Assert.assertEquals("Join result incorrect", TestTxnCommands2.stringifyValues(expected), rs);
}
}
private void setupTez(HiveConf conf) {
conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez");
conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR);
conf.setBoolean("tez.local.mode", true);
conf.set("fs.defaultFS", "file:///");
conf.setBoolean("tez.runtime.optimize.local.fetch", true);
conf.set("tez.staging-dir", TEST_DATA_DIR);
conf.setBoolean("tez.ignore.lib.uris", true);
}
private void setupMapJoin(HiveConf conf) {
conf.setBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN, true);
conf.setBoolVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK, true);
conf.setLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD, 10000);
}
private List<String> runStatementOnDriver(String stmt) throws Exception {
CommandProcessorResponse cpr = d.run(stmt);
if(cpr.getResponseCode() != 0) {
throw new RuntimeException(stmt + " failed: " + cpr);
}
List<String> rs = new ArrayList<String>();
d.getResults(rs);
return rs;
}
/**
* Run statement with customized hive conf
*/
private List<String> runStatementOnDriver(String stmt, HiveConf conf)
throws Exception {
Driver driver = new Driver(conf);
CommandProcessorResponse cpr = driver.run(stmt);
if(cpr.getResponseCode() != 0) {
throw new RuntimeException(stmt + " failed: " + cpr);
}
List<String> rs = new ArrayList<String>();
driver.getResults(rs);
return rs;
}
}