/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.hcatalog.streaming; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; import org.apache.hadoop.hive.metastore.api.LockState; import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.hadoop.hive.metastore.api.TxnAbortedException; import org.apache.hadoop.hive.metastore.api.TxnInfo; import org.apache.hadoop.hive.metastore.api.TxnState; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.RecordReader; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.txn.AcidHouseKeeperService; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.security.UserGroupInformation; import org.apache.orc.impl.OrcAcidUtils; import org.apache.orc.tools.FileDump; import org.apache.thrift.TException; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestStreaming { private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class); public static class RawFileSystem extends RawLocalFileSystem { private static final URI NAME; static { try { NAME = new URI("raw:///"); } catch (URISyntaxException se) { throw new IllegalArgumentException("bad uri", se); } } @Override public URI getUri() { return NAME; } @Override public FileStatus getFileStatus(Path path) throws IOException { File file = pathToFile(path); if (!file.exists()) { throw new FileNotFoundException("Can't find " + path); } // get close enough short mod = 0; if (file.canRead()) { mod |= 0444; } if (file.canWrite()) { mod |= 0200; } if (file.canExecute()) { mod |= 0111; } return new FileStatus(file.length(), file.isDirectory(), 1, 1024, file.lastModified(), file.lastModified(), FsPermission.createImmutable(mod), "owen", "users", path); } } private static final String COL1 = "id"; private static final String COL2 = "msg"; private final HiveConf conf; private Driver driver; private final IMetaStoreClient msClient; final String metaStoreURI = null; // partitioned table private final static String dbName = "testing"; private final static String tblName = "alerts"; private final static String[] fieldNames = new String[]{COL1,COL2}; List<String> partitionVals; private static Path partLoc; private static Path partLoc2; // unpartitioned table private final static String dbName2 = "testing2"; private final static String tblName2 = "alerts"; private final static String[] fieldNames2 = new String[]{COL1,COL2}; // for bucket join testing private final static String dbName3 = "testing3"; private final static String tblName3 = "dimensionTable"; private final static String dbName4 = "testing4"; private final static String tblName4 = "factTable"; List<String> partitionVals2; private final String PART1_CONTINENT = "Asia"; private final String PART1_COUNTRY = "India"; @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); public TestStreaming() throws Exception { partitionVals = new ArrayList<String>(2); partitionVals.add(PART1_CONTINENT); partitionVals.add(PART1_COUNTRY); partitionVals2 = new ArrayList<String>(1); partitionVals2.add(PART1_COUNTRY); conf = new HiveConf(this.getClass()); conf.set("fs.raw.impl", RawFileSystem.class.getName()); conf.set("hive.enforce.bucketing", "true"); conf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); TxnDbUtil.setConfValues(conf); if (metaStoreURI!=null) { conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI); } conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true); conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true); dbFolder.create(); //1) Start from a clean slate (metastore) TxnDbUtil.cleanDb(); TxnDbUtil.prepDb(); //2) obtain metastore clients msClient = new HiveMetaStoreClient(conf); } @Before public void setup() throws Exception { SessionState.start(new CliSessionState(conf)); driver = new Driver(conf); driver.setMaxRows(200002);//make sure Driver returns all results // drop and recreate the necessary databases and tables dropDB(msClient, dbName); String[] colNames = new String[] {COL1, COL2}; String[] colTypes = new String[] {serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME}; String[] bucketCols = new String[] {COL1}; String loc1 = dbFolder.newFolder(dbName + ".db").toString(); String[] partNames = new String[]{"Continent", "Country"}; partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols, partNames, loc1, 1); dropDB(msClient, dbName2); String loc2 = dbFolder.newFolder(dbName2 + ".db").toString(); partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2); String loc3 = dbFolder.newFolder("testing5.db").toString(); createStoreSales("testing5", loc3); runDDL(driver, "drop table testBucketing3.streamedtable"); runDDL(driver, "drop table testBucketing3.finaltable"); runDDL(driver, "drop table testBucketing3.nobucket"); } @After public void cleanup() throws Exception { msClient.close(); driver.close(); } private static List<FieldSchema> getPartitionKeys() { List<FieldSchema> fields = new ArrayList<FieldSchema>(); // Defining partition names in unsorted order fields.add(new FieldSchema("continent", serdeConstants.STRING_TYPE_NAME, "")); fields.add(new FieldSchema("country", serdeConstants.STRING_TYPE_NAME, "")); return fields; } private void createStoreSales(String dbName, String loc) throws Exception { String dbUri = "raw://" + new Path(loc).toUri().toString(); String tableLoc = dbUri + Path.SEPARATOR + "store_sales"; boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'"); Assert.assertTrue(success); success = runDDL(driver, "use " + dbName); Assert.assertTrue(success); success = runDDL(driver, "drop table if exists store_sales"); Assert.assertTrue(success); success = runDDL(driver, "create table store_sales\n" + "(\n" + " ss_sold_date_sk int,\n" + " ss_sold_time_sk int,\n" + " ss_item_sk int,\n" + " ss_customer_sk int,\n" + " ss_cdemo_sk int,\n" + " ss_hdemo_sk int,\n" + " ss_addr_sk int,\n" + " ss_store_sk int,\n" + " ss_promo_sk int,\n" + " ss_ticket_number int,\n" + " ss_quantity int,\n" + " ss_wholesale_cost decimal(7,2),\n" + " ss_list_price decimal(7,2),\n" + " ss_sales_price decimal(7,2),\n" + " ss_ext_discount_amt decimal(7,2),\n" + " ss_ext_sales_price decimal(7,2),\n" + " ss_ext_wholesale_cost decimal(7,2),\n" + " ss_ext_list_price decimal(7,2),\n" + " ss_ext_tax decimal(7,2),\n" + " ss_coupon_amt decimal(7,2),\n" + " ss_net_paid decimal(7,2),\n" + " ss_net_paid_inc_tax decimal(7,2),\n" + " ss_net_profit decimal(7,2)\n" + ")\n" + " partitioned by (dt string)\n" + "clustered by (ss_store_sk, ss_promo_sk)\n" + "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')"); Assert.assertTrue(success); success = runDDL(driver, "alter table store_sales add partition(dt='2015')"); Assert.assertTrue(success); } /** * make sure it works with table where bucket col is not 1st col * @throws Exception */ @Test public void testBucketingWhereBucketColIsNotFirstCol() throws Exception { List<String> partitionVals = new ArrayList<String>(); partitionVals.add("2015"); HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testing5", "store_sales", partitionVals); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"ss_sold_date_sk","ss_sold_time_sk", "ss_item_sk", "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity", "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost", "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit"},",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); StringBuilder row = new StringBuilder(); for(int i = 0; i < 10; i++) { for(int ints = 0; ints < 11; ints++) { row.append(ints).append(','); } for(int decs = 0; decs < 12; decs++) { row.append(i + 0.1).append(','); } row.setLength(row.length() - 1); txnBatch.write(row.toString().getBytes()); } txnBatch.commit(); txnBatch.close(); connection.close(); ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales"); for (String re : res) { System.out.println(re); } } // stream data into streaming table with N buckets, then copy the data into another bucketed table // check if bucketing in both was done in the same way @Test public void testStreamBucketingMatchesRegularBucketing() throws Exception { int bucketCount = 100; String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString(); String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'"; String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'"; String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'"; runDDL(driver, "create database testBucketing3"); runDDL(driver, "use testBucketing3"); runDDL(driver, "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into " + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='true')") ; // In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3) ; runDDL(driver, "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into " + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')"); String[] records = new String[] { "PSFAHYLZVC,29,EPNMA", "PPPRKWAYAU,96,VUTEE", "MIAOFERCHI,3,WBDSI", "CEGQAZOWVN,0,WCUZL", "XWAKMNSVQF,28,YJVHU", "XBWTSAJWME,2,KDQFO", "FUVLQTAXAY,5,LDSDG", "QTQMDJMGJH,6,QBOMA", "EFLOTLWJWN,71,GHWPS", "PEQNAOJHCM,82,CAAFI", "MOEKQLGZCP,41,RUACR", "QZXMCOPTID,37,LFLWE", "EYALVWICRD,13,JEZLC", "VYWLZAYTXX,16,DMVZX", "OSALYSQIXR,47,HNZVE", "JGKVHKCEGQ,25,KSCJB", "WQFMMYDHET,12,DTRWA", "AJOVAYZKZQ,15,YBKFO", "YAQONWCUAU,31,QJNHZ", "DJBXUEUOEB,35,IYCBL" }; HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null); String[] colNames1 = new String[] { "key1", "key2", "data" }; DelimitedInputWriter wr = new DelimitedInputWriter(colNames1,",", endPt); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr); txnBatch.beginNextTransaction(); for (String record : records) { txnBatch.write(record.toString().getBytes()); } txnBatch.commit(); txnBatch.close(); connection.close(); ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2"); for (String re : res1) { System.out.println(re); } driver.run("insert into nobucket select row__id.bucketid,* from streamedtable"); runDDL(driver, " insert into finaltable select * from nobucket"); ArrayList<String> res2 = queryTable(driver, "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid"); for (String s : res2) { LOG.error(s); } Assert.assertTrue(res2.isEmpty()); } @Test public void testTableValidation() throws Exception { int bucketCount = 100; String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString(); String tbl1 = "validation1"; String tbl2 = "validation2"; String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'"; String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'"; runDDL(driver, "create database testBucketing3"); runDDL(driver, "use testBucketing3"); runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into " + bucketCount + " buckets stored as orc location " + tableLoc) ; runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into " + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ; try { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null); endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); Assert.assertTrue("InvalidTable exception was not thrown", false); } catch (InvalidTable e) { // expecting this exception } try { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null); endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); Assert.assertTrue("InvalidTable exception was not thrown", false); } catch (InvalidTable e) { // expecting this exception } } private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles, String... records) throws Exception { ValidTxnList txns = msClient.getValidTxns(); AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, txns); Assert.assertEquals(0, dir.getObsolete().size()); Assert.assertEquals(0, dir.getOriginalFiles().size()); List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories(); System.out.println("Files found: "); for (AcidUtils.ParsedDelta pd : current) System.out.println(pd.getPath().toString()); Assert.assertEquals(numExpectedFiles, current.size()); // find the absolute minimum transaction long min = Long.MAX_VALUE; long max = Long.MIN_VALUE; for (AcidUtils.ParsedDelta pd : current) { if (pd.getMaxTransaction() > max) max = pd.getMaxTransaction(); if (pd.getMinTransaction() < min) min = pd.getMinTransaction(); } Assert.assertEquals(minTxn, min); Assert.assertEquals(maxTxn, max); InputFormat inf = new OrcInputFormat(); JobConf job = new JobConf(); job.set("mapred.input.dir", partitionPath.toString()); job.set("bucket_count", Integer.toString(buckets)); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg"); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string"); job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true"); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); InputSplit[] splits = inf.getSplits(job, buckets); Assert.assertEquals(buckets, splits.length); org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr = inf.getRecordReader(splits[0], job, Reporter.NULL); NullWritable key = rr.createKey(); OrcStruct value = rr.createValue(); for (String record : records) { Assert.assertEquals(true, rr.next(key, value)); Assert.assertEquals(record, value.toString()); } Assert.assertEquals(false, rr.next(key, value)); } private void checkNothingWritten(Path partitionPath) throws Exception { ValidTxnList txns = msClient.getValidTxns(); AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, txns); Assert.assertEquals(0, dir.getObsolete().size()); Assert.assertEquals(0, dir.getOriginalFiles().size()); List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories(); Assert.assertEquals(0, current.size()); } @Test public void testEndpointConnection() throws Exception { // For partitioned table, partitionVals are specified HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); //shouldn't throw connection.close(); // For unpartitioned table, partitionVals are not specified endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); endPt.newConnection(false, "UT_" + Thread.currentThread().getName()).close(); // should not throw // For partitioned table, partitionVals are not specified try { endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, null); connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); Assert.assertTrue("ConnectionError was not thrown", false); connection.close(); } catch (ConnectionError e) { // expecting this exception String errMsg = "doesn't specify any partitions for partitioned table"; Assert.assertTrue(e.toString().endsWith(errMsg)); } // For unpartitioned table, partition values are specified try { endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, partitionVals); connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); Assert.assertTrue("ConnectionError was not thrown", false); connection.close(); } catch (ConnectionError e) { // expecting this exception String errMsg = "specifies partitions for unpartitioned table"; Assert.assertTrue(e.toString().endsWith(errMsg)); } } @Test public void testAddPartition() throws Exception { List<String> newPartVals = new ArrayList<String>(2); newPartVals.add(PART1_CONTINENT); newPartVals.add("Nepal"); HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName , newPartVals); // Ensure partition is absent try { msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals); Assert.assertTrue("Partition already exists", false); } catch (NoSuchObjectException e) { // expect this exception } // Create partition Assert.assertNotNull(endPt.newConnection(true, "UT_" + Thread.currentThread().getName())); // Ensure partition is present Partition p = msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals); Assert.assertNotNull("Did not find added partition", p); } @Test public void testTransactionBatchEmptyCommit() throws Exception { // 1) to partitioned table HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.commit(); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); // 2) To unpartitioned table endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); writer = new DelimitedInputWriter(fieldNames2,",", endPt); connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.commit(); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); } /** * check that transactions that have not heartbeated and timedout get properly aborted * @throws Exception */ @Test public void testTimeOutReaper() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer); txnBatch.beginNextTransaction(); conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS); //ensure txn timesout conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS); AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService(); houseKeeperService.start(conf); while(houseKeeperService.getIsAliveCounter() <= Integer.MIN_VALUE) { Thread.sleep(100);//make sure it has run at least once } houseKeeperService.stop(); try { //should fail because the TransactionBatch timed out txnBatch.commit(); } catch(TransactionError e) { Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException); } txnBatch.close(); txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.commit(); txnBatch.beginNextTransaction(); int lastCount = houseKeeperService.getIsAliveCounter(); houseKeeperService.start(conf); while(houseKeeperService.getIsAliveCounter() <= lastCount) { Thread.sleep(100);//make sure it has run at least once } houseKeeperService.stop(); try { //should fail because the TransactionBatch timed out txnBatch.commit(); } catch(TransactionError e) { Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException); } txnBatch.close(); connection.close(); } @Test public void testHeartbeat() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer); txnBatch.beginNextTransaction(); //todo: this should ideally check Transaction heartbeat as well, but heartbeat //timestamp is not reported yet //GetOpenTxnsInfoResponse txnresp = msClient.showTxns(); ShowLocksRequest request = new ShowLocksRequest(); request.setDbname(dbName2); request.setTablename(tblName2); ShowLocksResponse response = msClient.showLocks(request); Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size()); ShowLocksResponseElement lock = response.getLocks().get(0); long acquiredAt = lock.getAcquiredat(); long heartbeatAt = lock.getLastheartbeat(); txnBatch.heartbeat(); response = msClient.showLocks(request); Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size()); lock = response.getLocks().get(0); Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat()); Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() + ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt); txnBatch.close(); int txnBatchSize = 200; txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer); for(int i = 0; i < txnBatchSize; i++) { txnBatch.beginNextTransaction(); if(i % 47 == 0) { txnBatch.heartbeat(); } if(i % 10 == 0) { txnBatch.abort(); } else { txnBatch.commit(); } if(i % 37 == 0) { txnBatch.heartbeat(); } } } @Test public void testTransactionBatchEmptyAbort() throws Exception { // 1) to partitioned table HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.abort(); Assert.assertEquals(TransactionBatch.TxnState.ABORTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); // 2) to unpartitioned table endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); writer = new DelimitedInputWriter(fieldNames,",", endPt); connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.abort(); Assert.assertEquals(TransactionBatch.TxnState.ABORTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); } @Test public void testTransactionBatchCommit_Delimited() throws Exception { testTransactionBatchCommit_Delimited(null); } @Test public void testTransactionBatchCommit_DelimitedUGI() throws Exception { testTransactionBatchCommit_Delimited(Utils.getUGI()); } private void testTransactionBatchCommit_Delimited(UserGroupInformation ugi) throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection); // 1st Txn TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); // 2nd Txn txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("2,Welcome to streaming".getBytes()); // data should not be visible checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}"); txnBatch.close(); Assert.assertEquals(TransactionBatch.TxnState.INACTIVE , txnBatch.getCurrentTransactionState()); connection.close(); // To Unpartitioned table endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName()); writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection); // 1st Txn txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.commit(); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); connection.close(); } @Test public void testTransactionBatchCommit_Regex() throws Exception { testTransactionBatchCommit_Regex(null); } @Test public void testTransactionBatchCommit_RegexUGI() throws Exception { testTransactionBatchCommit_Regex(Utils.getUGI()); } private void testTransactionBatchCommit_Regex(UserGroupInformation ugi) throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName()); String regex = "([^,]*),(.*)"; StrictRegexWriter writer = new StrictRegexWriter(regex, endPt, conf, connection); // 1st Txn TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); // 2nd Txn txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("2,Welcome to streaming".getBytes()); // data should not be visible checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}"); txnBatch.close(); Assert.assertEquals(TransactionBatch.TxnState.INACTIVE , txnBatch.getCurrentTransactionState()); connection.close(); // To Unpartitioned table endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null); connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName()); regex = "([^:]*):(.*)"; writer = new StrictRegexWriter(regex, endPt, conf, connection); // 1st Txn txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write("1:Hello streaming".getBytes()); txnBatch.commit(); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); connection.close(); } @Test public void testTransactionBatchCommit_Json() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); StrictJsonWriter writer = new StrictJsonWriter(endPt, connection); // 1st Txn TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}"; txnBatch.write(rec1.getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); Assert.assertEquals(TransactionBatch.TxnState.INACTIVE , txnBatch.getCurrentTransactionState()); connection.close(); List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName); Assert.assertEquals(1, rs.size()); } @Test public void testRemainingTransactions() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt); StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); // 1) test with txn.Commit() TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); int batch=0; int initialCount = txnBatch.remainingTransactions(); while (txnBatch.remainingTransactions()>0) { txnBatch.beginNextTransaction(); Assert.assertEquals(--initialCount, txnBatch.remainingTransactions()); for (int rec=0; rec<2; ++rec) { Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write((batch * rec + ",Hello streaming").getBytes()); } txnBatch.commit(); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); ++batch; } Assert.assertEquals(0, txnBatch.remainingTransactions()); txnBatch.close(); Assert.assertEquals(TransactionBatch.TxnState.INACTIVE , txnBatch.getCurrentTransactionState()); // 2) test with txn.Abort() txnBatch = connection.fetchTransactionBatch(10, writer); batch=0; initialCount = txnBatch.remainingTransactions(); while (txnBatch.remainingTransactions()>0) { txnBatch.beginNextTransaction(); Assert.assertEquals(--initialCount,txnBatch.remainingTransactions()); for (int rec=0; rec<2; ++rec) { Assert.assertEquals(TransactionBatch.TxnState.OPEN , txnBatch.getCurrentTransactionState()); txnBatch.write((batch * rec + ",Hello streaming").getBytes()); } txnBatch.abort(); Assert.assertEquals(TransactionBatch.TxnState.ABORTED , txnBatch.getCurrentTransactionState()); ++batch; } Assert.assertEquals(0, txnBatch.remainingTransactions()); txnBatch.close(); Assert.assertEquals(TransactionBatch.TxnState.INACTIVE , txnBatch.getCurrentTransactionState()); connection.close(); } @Test public void testTransactionBatchAbort() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.write("2,Welcome to streaming".getBytes()); txnBatch.abort(); checkNothingWritten(partLoc); Assert.assertEquals(TransactionBatch.TxnState.ABORTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); checkNothingWritten(partLoc); } @Test public void testTransactionBatchAbortAndCommit() throws Exception { String agentInfo = "UT_" + Thread.currentThread().getName(); HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); StreamingConnection connection = endPt.newConnection(false, agentInfo); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.write("2,Welcome to streaming".getBytes()); ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest()); Assert.assertEquals("LockCount", 1, resp.getLocksSize()); Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType()); Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState()); Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo()); txnBatch.abort(); checkNothingWritten(partLoc); Assert.assertEquals(TransactionBatch.TxnState.ABORTED , txnBatch.getCurrentTransactionState()); txnBatch.beginNextTransaction(); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.write("2,Welcome to streaming".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 14, 23, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}"); txnBatch.close(); connection.close(); } @Test public void testMultipleTransactionBatchCommits() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt); StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName()); TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.write("1,Hello streaming".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}"); txnBatch.beginNextTransaction(); txnBatch.write("2,Welcome to streaming".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 24, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}"); txnBatch.close(); // 2nd Txn Batch txnBatch = connection.fetchTransactionBatch(10, writer); txnBatch.beginNextTransaction(); txnBatch.write("3,Hello streaming - once again".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 34, 1, 2, "{1, Hello streaming}", "{2, Welcome to streaming}", "{3, Hello streaming - once again}"); txnBatch.beginNextTransaction(); txnBatch.write("4,Welcome to streaming - once again".getBytes()); txnBatch.commit(); checkDataWritten(partLoc, 15, 34, 1, 2, "{1, Hello streaming}", "{2, Welcome to streaming}", "{3, Hello streaming - once again}", "{4, Welcome to streaming - once again}"); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch.getCurrentTransactionState()); txnBatch.close(); connection.close(); } @Test public void testInterleavedTransactionBatchCommits() throws Exception { HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames, ",", endPt); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); // Acquire 1st Txn Batch TransactionBatch txnBatch1 = connection.fetchTransactionBatch(10, writer); txnBatch1.beginNextTransaction(); // Acquire 2nd Txn Batch DelimitedInputWriter writer2 = new DelimitedInputWriter(fieldNames, ",", endPt); TransactionBatch txnBatch2 = connection.fetchTransactionBatch(10, writer2); txnBatch2.beginNextTransaction(); // Interleaved writes to both batches txnBatch1.write("1,Hello streaming".getBytes()); txnBatch2.write("3,Hello streaming - once again".getBytes()); checkNothingWritten(partLoc); txnBatch2.commit(); checkDataWritten(partLoc, 24, 33, 1, 1, "{3, Hello streaming - once again}"); txnBatch1.commit(); checkDataWritten(partLoc, 14, 33, 1, 2, "{1, Hello streaming}", "{3, Hello streaming - once again}"); txnBatch1.beginNextTransaction(); txnBatch1.write("2,Welcome to streaming".getBytes()); txnBatch2.beginNextTransaction(); txnBatch2.write("4,Welcome to streaming - once again".getBytes()); checkDataWritten(partLoc, 14, 33, 1, 2, "{1, Hello streaming}", "{3, Hello streaming - once again}"); txnBatch1.commit(); checkDataWritten(partLoc, 14, 33, 1, 2, "{1, Hello streaming}", "{2, Welcome to streaming}", "{3, Hello streaming - once again}"); txnBatch2.commit(); checkDataWritten(partLoc, 14, 33, 1, 2, "{1, Hello streaming}", "{2, Welcome to streaming}", "{3, Hello streaming - once again}", "{4, Welcome to streaming - once again}"); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch1.getCurrentTransactionState()); Assert.assertEquals(TransactionBatch.TxnState.COMMITTED , txnBatch2.getCurrentTransactionState()); txnBatch1.close(); txnBatch2.close(); connection.close(); } private static class WriterThd extends Thread { private final StreamingConnection conn; private final DelimitedInputWriter writer; private final String data; private Throwable error; WriterThd(HiveEndPoint ep, String data) throws Exception { super("Writer_" + data); writer = new DelimitedInputWriter(fieldNames, ",", ep); conn = ep.newConnection(false, "UT_" + Thread.currentThread().getName()); this.data = data; setUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread thread, Throwable throwable) { error = throwable; LOG.error("Thread " + thread.getName() + " died: " + throwable.getMessage(), throwable); } }); } @Override public void run() { TransactionBatch txnBatch = null; try { txnBatch = conn.fetchTransactionBatch(10, writer); while (txnBatch.remainingTransactions() > 0) { txnBatch.beginNextTransaction(); txnBatch.write(data.getBytes()); txnBatch.write(data.getBytes()); txnBatch.commit(); } // while } catch (Exception e) { throw new RuntimeException(e); } finally { if (txnBatch != null) { try { txnBatch.close(); } catch (Exception e) { LOG.error("txnBatch.close() failed: " + e.getMessage(), e); conn.close(); } } try { conn.close(); } catch (Exception e) { LOG.error("conn.close() failed: " + e.getMessage(), e); } } } } @Test public void testConcurrentTransactionBatchCommits() throws Exception { final HiveEndPoint ep = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals); List<WriterThd> writers = new ArrayList<WriterThd>(3); writers.add(new WriterThd(ep, "1,Matrix")); writers.add(new WriterThd(ep, "2,Gandhi")); writers.add(new WriterThd(ep, "3,Silence")); for(WriterThd w : writers) { w.start(); } for(WriterThd w : writers) { w.join(); } for(WriterThd w : writers) { if(w.error != null) { Assert.assertFalse("Writer thread" + w.getName() + " died: " + w.error.getMessage() + " See log file for stack trace", true); } } } private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration()); Reader reader = OrcFile.createReader(orcFile, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); StructObjectInspector inspector = (StructObjectInspector) reader .getObjectInspector(); System.out.format("Found Bucket File : %s \n", orcFile.getName()); ArrayList<SampleRec> result = new ArrayList<SampleRec>(); while (rows.hasNext()) { Object row = rows.next(null); SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5]; result.add(rec); } return result; } // Assumes stored data schema = [acid fields],string,int,string // return array of 6 fields, where the last field has the actual data private static Object[] deserializeDeltaFileRow(Object row, StructObjectInspector inspector) { List<? extends StructField> fields = inspector.getAllStructFieldRefs(); WritableIntObjectInspector f0ins = (WritableIntObjectInspector) fields.get(0).getFieldObjectInspector(); WritableLongObjectInspector f1ins = (WritableLongObjectInspector) fields.get(1).getFieldObjectInspector(); WritableIntObjectInspector f2ins = (WritableIntObjectInspector) fields.get(2).getFieldObjectInspector(); WritableLongObjectInspector f3ins = (WritableLongObjectInspector) fields.get(3).getFieldObjectInspector(); WritableLongObjectInspector f4ins = (WritableLongObjectInspector) fields.get(4).getFieldObjectInspector(); StructObjectInspector f5ins = (StructObjectInspector) fields.get(5).getFieldObjectInspector(); int f0 = f0ins.get(inspector.getStructFieldData(row, fields.get(0))); long f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1))); int f2 = f2ins.get(inspector.getStructFieldData(row, fields.get(2))); long f3 = f3ins.get(inspector.getStructFieldData(row, fields.get(3))); long f4 = f4ins.get(inspector.getStructFieldData(row, fields.get(4))); SampleRec f5 = deserializeInner(inspector.getStructFieldData(row, fields.get(5)), f5ins); return new Object[] {f0, f1, f2, f3, f4, f5}; } // Assumes row schema => string,int,string private static SampleRec deserializeInner(Object row, StructObjectInspector inspector) { List<? extends StructField> fields = inspector.getAllStructFieldRefs(); WritableStringObjectInspector f0ins = (WritableStringObjectInspector) fields.get(0).getFieldObjectInspector(); WritableIntObjectInspector f1ins = (WritableIntObjectInspector) fields.get(1).getFieldObjectInspector(); WritableStringObjectInspector f2ins = (WritableStringObjectInspector) fields.get(2).getFieldObjectInspector(); String f0 = f0ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(0))); int f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1))); String f2 = f2ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(2))); return new SampleRec(f0, f1, f2); } @Test public void testBucketing() throws Exception { String agentInfo = "UT_" + Thread.currentThread().getName(); dropDB(msClient, dbName3); dropDB(msClient, dbName4); // 1) Create two bucketed tables String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db"; dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths String[] colNames = "key1,key2,data".split(","); String[] colTypes = "string,int,string".split(","); String[] bucketNames = "key1,key2".split(","); int bucketCount = 4; createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames , null, dbLocation, bucketCount); String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db"; dbLocation2 = dbLocation2.replaceAll("\\\\","/"); // for windows paths String[] colNames2 = "key3,key4,data2".split(","); String[] colTypes2 = "string,int,string".split(","); String[] bucketNames2 = "key3,key4".split(","); createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2 , null, dbLocation2, bucketCount); // 2) Insert data into both tables HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null); StreamingConnection connection = endPt.newConnection(false, agentInfo); DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name0,1,Hello streaming".getBytes()); txnBatch.write("name2,2,Welcome to streaming".getBytes()); txnBatch.write("name4,2,more Streaming unlimited".getBytes()); txnBatch.write("name5,2,even more Streaming unlimited".getBytes()); txnBatch.commit(); HiveEndPoint endPt2 = new HiveEndPoint(metaStoreURI, dbName4, tblName4, null); StreamingConnection connection2 = endPt2.newConnection(false, agentInfo); DelimitedInputWriter writer2 = new DelimitedInputWriter(colNames2,",", endPt2, connection); TransactionBatch txnBatch2 = connection2.fetchTransactionBatch(2, writer2); txnBatch2.beginNextTransaction(); txnBatch2.write("name5,2,fact3".getBytes()); // bucket 0 txnBatch2.write("name8,2,fact3".getBytes()); // bucket 1 txnBatch2.write("name0,1,fact1".getBytes()); // bucket 2 txnBatch2.commit(); // 3 Check data distribution in buckets HashMap<Integer, ArrayList<SampleRec>> actual1 = dumpAllBuckets(dbLocation, tblName3); HashMap<Integer, ArrayList<SampleRec>> actual2 = dumpAllBuckets(dbLocation2, tblName4); System.err.println("\n Table 1"); System.err.println(actual1); System.err.println("\n Table 2"); System.err.println(actual2); // assert bucket listing is as expected Assert.assertEquals("number of buckets does not match expectation", actual1.values().size(), 3); Assert.assertEquals("records in bucket does not match expectation", actual1.get(0).size(), 2); Assert.assertEquals("records in bucket does not match expectation", actual1.get(1).size(), 1); Assert.assertTrue("bucket 2 shouldn't have been created", actual1.get(2) == null); Assert.assertEquals("records in bucket does not match expectation", actual1.get(3).size(), 1); } private void runCmdOnDriver(String cmd) throws QueryFailedException { boolean t = runDDL(driver, cmd); Assert.assertTrue(cmd + " failed", t); } @Test public void testFileDump() throws Exception { String agentInfo = "UT_" + Thread.currentThread().getName(); dropDB(msClient, dbName3); dropDB(msClient, dbName4); // 1) Create two bucketed tables String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db"; dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths String[] colNames = "key1,key2,data".split(","); String[] colTypes = "string,int,string".split(","); String[] bucketNames = "key1,key2".split(","); int bucketCount = 4; createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames , null, dbLocation, bucketCount); String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db"; dbLocation2 = dbLocation2.replaceAll("\\\\","/"); // for windows paths String[] colNames2 = "key3,key4,data2".split(","); String[] colTypes2 = "string,int,string".split(","); String[] bucketNames2 = "key3,key4".split(","); createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2 , null, dbLocation2, bucketCount); // 2) Insert data into both tables HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null); StreamingConnection connection = endPt.newConnection(false, agentInfo); DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name0,1,Hello streaming".getBytes()); txnBatch.write("name2,2,Welcome to streaming".getBytes()); txnBatch.write("name4,2,more Streaming unlimited".getBytes()); txnBatch.write("name5,2,even more Streaming unlimited".getBytes()); txnBatch.commit(); PrintStream origErr = System.err; ByteArrayOutputStream myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.err.flush(); System.setErr(origErr); String errDump = new String(myErr.toByteArray()); Assert.assertEquals(false, errDump.contains("file(s) are corrupted")); // since this test runs on local file system which does not have an API to tell if files or // open or not, we are testing for negative case even though the bucket files are still open // for writes (transaction batch not closed yet) Assert.assertEquals(false, errDump.contains("is still open for writes.")); HiveEndPoint endPt2 = new HiveEndPoint(metaStoreURI, dbName4, tblName4, null); DelimitedInputWriter writer2 = new DelimitedInputWriter(colNames2,",", endPt2); StreamingConnection connection2 = endPt2.newConnection(false, agentInfo); TransactionBatch txnBatch2 = connection2.fetchTransactionBatch(2, writer2); txnBatch2.beginNextTransaction(); txnBatch2.write("name5,2,fact3".getBytes()); // bucket 0 txnBatch2.write("name8,2,fact3".getBytes()); // bucket 1 txnBatch2.write("name0,1,fact1".getBytes()); // bucket 2 // no data for bucket 3 -- expect 0 length bucket file txnBatch2.commit(); origErr = System.err; myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.out.flush(); System.err.flush(); System.setErr(origErr); errDump = new String(myErr.toByteArray()); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(false, errDump.contains("file(s) are corrupted")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); } @Test public void testFileDumpCorruptDataFiles() throws Exception { dropDB(msClient, dbName3); // 1) Create two bucketed tables String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db"; dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths String[] colNames = "key1,key2,data".split(","); String[] colTypes = "string,int,string".split(","); String[] bucketNames = "key1,key2".split(","); int bucketCount = 4; createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames , null, dbLocation, bucketCount); // 2) Insert data into both tables HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection); // we need side file for this test, so we create 2 txn batch and test with only one TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name0,1,Hello streaming".getBytes()); txnBatch.write("name2,2,Welcome to streaming".getBytes()); txnBatch.write("name4,2,more Streaming unlimited".getBytes()); txnBatch.write("name5,2,even more Streaming unlimited".getBytes()); txnBatch.commit(); // intentionally corrupt some files Path path = new Path(dbLocation); Collection<String> files = FileDump.getAllFilesInPath(path, conf); int readableFooter = -1; for (String file : files) { if (file.contains("bucket_00000")) { // empty out the file corruptDataFile(file, conf, Integer.MIN_VALUE); } else if (file.contains("bucket_00001")) { corruptDataFile(file, conf, -1); } else if (file.contains("bucket_00002")) { Assert.assertFalse("bucket 2 shouldn't have been created", true); } else if (file.contains("bucket_00003")) { corruptDataFile(file, conf, 100); } } PrintStream origErr = System.err; ByteArrayOutputStream myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.err.flush(); System.setErr(origErr); String errDump = new String(myErr.toByteArray()); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(true, errDump.contains("3 file(s) are corrupted")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); origErr = System.err; myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation, "--recover", "--skip-dump"}); System.err.flush(); System.setErr(origErr); errDump = new String(myErr.toByteArray()); Assert.assertEquals(true, errDump.contains("bucket_00000 recovered successfully!")); Assert.assertEquals(true, errDump.contains("No readable footers found. Creating empty orc file.")); Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!")); Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!")); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); // test after recovery origErr = System.err; myErr = new ByteArrayOutputStream(); // replace stdout and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.err.flush(); System.setErr(origErr); errDump = new String(myErr.toByteArray()); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(false, errDump.contains("file(s) are corrupted")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); // after recovery there shouldn't be any *_flush_length files files = FileDump.getAllFilesInPath(path, conf); for (String file : files) { Assert.assertEquals(false, file.contains("_flush_length")); } txnBatch.close(); } private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes) throws Exception { Path bPath = new Path(file); Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt"); FileSystem fs = bPath.getFileSystem(conf); FileStatus fileStatus = fs.getFileStatus(bPath); int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes; byte[] buffer = new byte[len]; FSDataInputStream fdis = fs.open(bPath); fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length)); fdis.close(); FSDataOutputStream fdos = fs.create(cPath, true); fdos.write(buffer, 0, buffer.length); fdos.close(); fs.delete(bPath, false); fs.rename(cPath, bPath); } @Test public void testFileDumpCorruptSideFiles() throws Exception { dropDB(msClient, dbName3); // 1) Create two bucketed tables String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db"; dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths String[] colNames = "key1,key2,data".split(","); String[] colTypes = "string,int,string".split(","); String[] bucketNames = "key1,key2".split(","); int bucketCount = 4; createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames , null, dbLocation, bucketCount); // 2) Insert data into both tables HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name0,1,Hello streaming".getBytes()); txnBatch.write("name2,2,Welcome to streaming".getBytes()); txnBatch.write("name4,2,more Streaming unlimited".getBytes()); txnBatch.write("name5,2,even more Streaming unlimited".getBytes()); txnBatch.write("name6,3,aHello streaming".getBytes()); txnBatch.commit(); Map<String,List<Long>> offsetMap = new HashMap<String,List<Long>>(); recordOffsets(conf, dbLocation, offsetMap); txnBatch.beginNextTransaction(); txnBatch.write("name01,11,-Hello streaming".getBytes()); txnBatch.write("name21,21,-Welcome to streaming".getBytes()); txnBatch.write("name41,21,-more Streaming unlimited".getBytes()); txnBatch.write("name51,21,-even more Streaming unlimited".getBytes()); txnBatch.write("name02,12,--Hello streaming".getBytes()); txnBatch.write("name22,22,--Welcome to streaming".getBytes()); txnBatch.write("name42,22,--more Streaming unlimited".getBytes()); txnBatch.write("name52,22,--even more Streaming unlimited".getBytes()); txnBatch.write("name7,4,aWelcome to streaming".getBytes()); txnBatch.write("name8,5,amore Streaming unlimited".getBytes()); txnBatch.write("name9,6,aeven more Streaming unlimited".getBytes()); txnBatch.write("name10,7,bHello streaming".getBytes()); txnBatch.write("name11,8,bWelcome to streaming".getBytes()); txnBatch.write("name12,9,bmore Streaming unlimited".getBytes()); txnBatch.write("name13,10,beven more Streaming unlimited".getBytes()); txnBatch.commit(); recordOffsets(conf, dbLocation, offsetMap); // intentionally corrupt some files Path path = new Path(dbLocation); Collection<String> files = FileDump.getAllFilesInPath(path, conf); for (String file : files) { if (file.contains("bucket_00000")) { corruptSideFile(file, conf, offsetMap, "bucket_00000", -1); // corrupt last entry } else if (file.contains("bucket_00001")) { corruptSideFile(file, conf, offsetMap, "bucket_00001", 0); // empty out side file } else if (file.contains("bucket_00002")) { corruptSideFile(file, conf, offsetMap, "bucket_00002", 3); // total 3 entries (2 valid + 1 fake) } else if (file.contains("bucket_00003")) { corruptSideFile(file, conf, offsetMap, "bucket_00003", 10); // total 10 entries (2 valid + 8 fake) } } PrintStream origErr = System.err; ByteArrayOutputStream myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.err.flush(); System.setErr(origErr); String errDump = new String(myErr.toByteArray()); Assert.assertEquals(true, errDump.contains("bucket_00000_flush_length [length: 11")); Assert.assertEquals(true, errDump.contains("bucket_00001_flush_length [length: 0")); Assert.assertEquals(true, errDump.contains("bucket_00002_flush_length [length: 24")); Assert.assertEquals(true, errDump.contains("bucket_00003_flush_length [length: 80")); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(true, errDump.contains("4 file(s) are corrupted")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); origErr = System.err; myErr = new ByteArrayOutputStream(); // replace stderr and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation, "--recover", "--skip-dump"}); System.err.flush(); System.setErr(origErr); errDump = new String(myErr.toByteArray()); Assert.assertEquals(true, errDump.contains("bucket_00000 recovered successfully!")); Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!")); Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!")); Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!")); List<Long> offsets = offsetMap.get("bucket_00000"); Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString())); offsets = offsetMap.get("bucket_00001"); Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString())); offsets = offsetMap.get("bucket_00002"); Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString())); offsets = offsetMap.get("bucket_00003"); Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString())); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); // test after recovery origErr = System.err; myErr = new ByteArrayOutputStream(); // replace stdout and run command System.setErr(new PrintStream(myErr)); FileDump.main(new String[]{dbLocation}); System.err.flush(); System.setErr(origErr); errDump = new String(myErr.toByteArray()); Assert.assertEquals(false, errDump.contains("Exception")); Assert.assertEquals(false, errDump.contains("file(s) are corrupted")); Assert.assertEquals(false, errDump.contains("is still open for writes.")); // after recovery there shouldn't be any *_flush_length files files = FileDump.getAllFilesInPath(path, conf); for (String file : files) { Assert.assertEquals(false, file.contains("_flush_length")); } txnBatch.close(); } private void corruptSideFile(final String file, final HiveConf conf, final Map<String, List<Long>> offsetMap, final String key, final int numEntries) throws IOException { Path dataPath = new Path(file); Path sideFilePath = OrcAcidUtils.getSideFile(dataPath); Path cPath = new Path(sideFilePath.getParent(), sideFilePath.getName() + ".corrupt"); FileSystem fs = sideFilePath.getFileSystem(conf); List<Long> offsets = offsetMap.get(key); long lastOffset = offsets.get(offsets.size() - 1); FSDataOutputStream fdos = fs.create(cPath, true); // corrupt last entry if (numEntries < 0) { byte[] lastOffsetBytes = longToBytes(lastOffset); for (int i = 0; i < offsets.size() - 1; i++) { fdos.writeLong(offsets.get(i)); } fdos.write(lastOffsetBytes, 0, 3); } else if (numEntries > 0) { int firstRun = Math.min(offsets.size(), numEntries); // add original entries for (int i=0; i < firstRun; i++) { fdos.writeLong(offsets.get(i)); } // add fake entries int remaining = numEntries - firstRun; for (int i = 0; i < remaining; i++) { fdos.writeLong(lastOffset + ((i + 1) * 100)); } } fdos.close(); fs.delete(sideFilePath, false); fs.rename(cPath, sideFilePath); } private byte[] longToBytes(long x) { ByteBuffer buffer = ByteBuffer.allocate(8); buffer.putLong(x); return buffer.array(); } private void recordOffsets(final HiveConf conf, final String dbLocation, final Map<String, List<Long>> offsetMap) throws IOException { Path path = new Path(dbLocation); Collection<String> files = FileDump.getAllFilesInPath(path, conf); for (String file: files) { Path bPath = new Path(file); FileSystem fs = bPath.getFileSystem(conf); FileStatus fileStatus = fs.getFileStatus(bPath); long len = fileStatus.getLen(); if (file.contains("bucket_00000")) { if (offsetMap.containsKey("bucket_00000")) { List<Long> offsets = offsetMap.get("bucket_00000"); offsets.add(len); offsetMap.put("bucket_00000", offsets); } else { List<Long> offsets = new ArrayList<Long>(); offsets.add(len); offsetMap.put("bucket_00000", offsets); } } else if (file.contains("bucket_00001")) { if (offsetMap.containsKey("bucket_00001")) { List<Long> offsets = offsetMap.get("bucket_00001"); offsets.add(len); offsetMap.put("bucket_00001", offsets); } else { List<Long> offsets = new ArrayList<Long>(); offsets.add(len); offsetMap.put("bucket_00001", offsets); } } else if (file.contains("bucket_00002")) { if (offsetMap.containsKey("bucket_00002")) { List<Long> offsets = offsetMap.get("bucket_00002"); offsets.add(len); offsetMap.put("bucket_00002", offsets); } else { List<Long> offsets = new ArrayList<Long>(); offsets.add(len); offsetMap.put("bucket_00002", offsets); } } else if (file.contains("bucket_00003")) { if (offsetMap.containsKey("bucket_00003")) { List<Long> offsets = offsetMap.get("bucket_00003"); offsets.add(len); offsetMap.put("bucket_00003", offsets); } else { List<Long> offsets = new ArrayList<Long>(); offsets.add(len); offsetMap.put("bucket_00003", offsets); } } } } @Test public void testErrorHandling() throws Exception { String agentInfo = "UT_" + Thread.currentThread().getName(); runCmdOnDriver("create database testErrors"); runCmdOnDriver("use testErrors"); runCmdOnDriver("create table T(a int, b int) clustered by (b) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')"); HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testErrors", "T", null); StreamingConnection connection = endPt.newConnection(false, agentInfo); DelimitedInputWriter innerWriter = new DelimitedInputWriter("a,b".split(","),",", endPt, connection); FaultyWriter writer = new FaultyWriter(innerWriter); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.close(); txnBatch.heartbeat();//this is no-op on closed batch txnBatch.abort();//ditto GetOpenTxnsInfoResponse r = msClient.showTxns(); Assert.assertEquals("HWM didn't match", 17, r.getTxn_high_water_mark()); List<TxnInfo> ti = r.getOpen_txns(); Assert.assertEquals("wrong status ti(0)", TxnState.ABORTED, ti.get(0).getState()); Assert.assertEquals("wrong status ti(1)", TxnState.ABORTED, ti.get(1).getState()); Exception expectedEx = null; try { txnBatch.beginNextTransaction(); } catch(IllegalStateException ex) { expectedEx = ex; } Assert.assertTrue("beginNextTransaction() should have failed", expectedEx != null && expectedEx.getMessage().contains("has been closed()")); expectedEx = null; try { txnBatch.write("name0,1,Hello streaming".getBytes()); } catch(IllegalStateException ex) { expectedEx = ex; } Assert.assertTrue("write() should have failed", expectedEx != null && expectedEx.getMessage().contains("has been closed()")); expectedEx = null; try { txnBatch.commit(); } catch(IllegalStateException ex) { expectedEx = ex; } Assert.assertTrue("commit() should have failed", expectedEx != null && expectedEx.getMessage().contains("has been closed()")); txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name2,2,Welcome to streaming".getBytes()); txnBatch.write("name4,2,more Streaming unlimited".getBytes()); txnBatch.write("name5,2,even more Streaming unlimited".getBytes()); txnBatch.commit(); expectedEx = null; txnBatch.beginNextTransaction(); writer.enableErrors(); try { txnBatch.write("name6,2,Doh!".getBytes()); } catch(StreamingIOFailure ex) { expectedEx = ex; txnBatch.getCurrentTransactionState(); txnBatch.getCurrentTxnId();//test it doesn't throw ArrayIndexOutOfBounds... } Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"), expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred")); expectedEx = null; try { txnBatch.commit(); } catch(IllegalStateException ex) { expectedEx = ex; } Assert.assertTrue("commit() should have failed", expectedEx != null && expectedEx.getMessage().contains("has been closed()")); r = msClient.showTxns(); Assert.assertEquals("HWM didn't match", 19, r.getTxn_high_water_mark()); ti = r.getOpen_txns(); Assert.assertEquals("wrong status ti(0)", TxnState.ABORTED, ti.get(0).getState()); Assert.assertEquals("wrong status ti(1)", TxnState.ABORTED, ti.get(1).getState()); //txnid 3 was committed and thus not open Assert.assertEquals("wrong status ti(2)", TxnState.ABORTED, ti.get(2).getState()); writer.disableErrors(); txnBatch = connection.fetchTransactionBatch(2, writer); txnBatch.beginNextTransaction(); txnBatch.write("name2,2,Welcome to streaming".getBytes()); writer.enableErrors(); expectedEx = null; try { txnBatch.commit(); } catch(StreamingIOFailure ex) { expectedEx = ex; } Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"), expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred")); r = msClient.showTxns(); Assert.assertEquals("HWM didn't match", 21, r.getTxn_high_water_mark()); ti = r.getOpen_txns(); Assert.assertEquals("wrong status ti(3)", TxnState.ABORTED, ti.get(3).getState()); Assert.assertEquals("wrong status ti(4)", TxnState.ABORTED, ti.get(4).getState()); txnBatch.abort(); } // assumes un partitioned table // returns a map<bucketNum, list<record> > private HashMap<Integer, ArrayList<SampleRec>> dumpAllBuckets(String dbLocation, String tableName) throws IOException { HashMap<Integer, ArrayList<SampleRec>> result = new HashMap<Integer, ArrayList<SampleRec>>(); for (File deltaDir : new File(dbLocation + "/" + tableName).listFiles()) { if(!deltaDir.getName().startsWith("delta")) continue; File[] bucketFiles = deltaDir.listFiles(); for (File bucketFile : bucketFiles) { if(bucketFile.toString().endsWith("length")) continue; Integer bucketNum = getBucketNumber(bucketFile); ArrayList<SampleRec> recs = dumpBucket(new Path(bucketFile.toString())); result.put(bucketNum, recs); } } return result; } //assumes bucket_NNNNN format of file name private Integer getBucketNumber(File bucketFile) { String fname = bucketFile.getName(); int start = fname.indexOf('_'); String number = fname.substring(start+1, fname.length()); return Integer.parseInt(number); } // delete db and all tables in it public static void dropDB(IMetaStoreClient client, String databaseName) { try { for (String table : client.listTableNamesByFilter(databaseName, "", (short)-1)) { client.dropTable(databaseName, table, true, true); } client.dropDatabase(databaseName); } catch (TException e) { } } ///////// -------- UTILS ------- ///////// // returns Path of the partition created (if any) else Path of table public static Path createDbAndTable(Driver driver, String databaseName, String tableName, List<String> partVals, String[] colNames, String[] colTypes, String[] bucketCols, String[] partNames, String dbLocation, int bucketCount) throws Exception { String dbUri = "raw://" + new Path(dbLocation).toUri().toString(); String tableLoc = dbUri + Path.SEPARATOR + tableName; runDDL(driver, "create database IF NOT EXISTS " + databaseName + " location '" + dbUri + "'"); runDDL(driver, "use " + databaseName); String crtTbl = "create table " + tableName + " ( " + getTableColumnsStr(colNames,colTypes) + " )" + getPartitionStmtStr(partNames) + " clustered by ( " + join(bucketCols, ",") + " )" + " into " + bucketCount + " buckets " + " stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('transactional'='true') "; runDDL(driver, crtTbl); if(partNames!=null && partNames.length!=0) { return addPartition(driver, tableName, partVals, partNames); } return new Path(tableLoc); } private static Path addPartition(Driver driver, String tableName, List<String> partVals, String[] partNames) throws QueryFailedException, CommandNeedRetryException, IOException { String partSpec = getPartsSpec(partNames, partVals); String addPart = "alter table " + tableName + " add partition ( " + partSpec + " )"; runDDL(driver, addPart); return getPartitionPath(driver, tableName, partSpec); } private static Path getPartitionPath(Driver driver, String tableName, String partSpec) throws CommandNeedRetryException, IOException { ArrayList<String> res = queryTable(driver, "describe extended " + tableName + " PARTITION (" + partSpec + ")"); String partInfo = res.get(res.size() - 1); int start = partInfo.indexOf("location:") + "location:".length(); int end = partInfo.indexOf(",",start); return new Path( partInfo.substring(start,end) ); } private static String getTableColumnsStr(String[] colNames, String[] colTypes) { StringBuffer sb = new StringBuffer(); for (int i=0; i < colNames.length; ++i) { sb.append(colNames[i] + " " + colTypes[i]); if (i<colNames.length-1) { sb.append(","); } } return sb.toString(); } // converts partNames into "partName1 string, partName2 string" private static String getTablePartsStr(String[] partNames) { if (partNames==null || partNames.length==0) { return ""; } StringBuffer sb = new StringBuffer(); for (int i=0; i < partNames.length; ++i) { sb.append(partNames[i] + " string"); if (i < partNames.length-1) { sb.append(","); } } return sb.toString(); } // converts partNames,partVals into "partName1=val1, partName2=val2" private static String getPartsSpec(String[] partNames, List<String> partVals) { StringBuffer sb = new StringBuffer(); for (int i=0; i < partVals.size(); ++i) { sb.append(partNames[i] + " = '" + partVals.get(i) + "'"); if(i < partVals.size()-1) { sb.append(","); } } return sb.toString(); } private static String join(String[] values, String delimiter) { if(values==null) return null; StringBuffer strbuf = new StringBuffer(); boolean first = true; for (Object value : values) { if (!first) { strbuf.append(delimiter); } else { first = false; } strbuf.append(value.toString()); } return strbuf.toString(); } private static String getPartitionStmtStr(String[] partNames) { if ( partNames == null || partNames.length == 0) { return ""; } return " partitioned by (" + getTablePartsStr(partNames) + " )"; } private static boolean runDDL(Driver driver, String sql) throws QueryFailedException { LOG.debug(sql); System.out.println(sql); int retryCount = 1; // # of times to retry if first attempt fails for (int attempt=0; attempt <= retryCount; ++attempt) { try { //LOG.debug("Running Hive Query: "+ sql); CommandProcessorResponse cpr = driver.run(sql); if(cpr.getResponseCode() == 0) { return true; } LOG.error("Statement: " + sql + " failed: " + cpr); } catch (CommandNeedRetryException e) { if (attempt == retryCount) { throw new QueryFailedException(sql, e); } continue; } } // for return false; } public static ArrayList<String> queryTable(Driver driver, String query) throws CommandNeedRetryException, IOException { driver.run(query); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); if(res.isEmpty()) System.err.println(driver.getErrorMsg()); return res; } private static class SampleRec { public String field1; public int field2; public String field3; public SampleRec(String field1, int field2, String field3) { this.field1 = field1; this.field2 = field2; this.field3 = field3; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SampleRec that = (SampleRec) o; if (field2 != that.field2) return false; if (field1 != null ? !field1.equals(that.field1) : that.field1 != null) return false; return !(field3 != null ? !field3.equals(that.field3) : that.field3 != null); } @Override public int hashCode() { int result = field1 != null ? field1.hashCode() : 0; result = 31 * result + field2; result = 31 * result + (field3 != null ? field3.hashCode() : 0); return result; } @Override public String toString() { return " { " + "'" + field1 + '\'' + "," + field2 + ",'" + field3 + '\'' + " }"; } } /** * This is test-only wrapper around the real RecordWriter. * It can simulate faults from lower levels to test error handling logic. */ private static final class FaultyWriter implements RecordWriter { private final RecordWriter delegate; private boolean shouldThrow = false; private FaultyWriter(RecordWriter delegate) { assert delegate != null; this.delegate = delegate; } @Override public void write(long transactionId, byte[] record) throws StreamingException { delegate.write(transactionId, record); produceFault(); } @Override public void flush() throws StreamingException { delegate.flush(); produceFault(); } @Override public void clear() throws StreamingException { delegate.clear(); } @Override public void newBatch(Long minTxnId, Long maxTxnID) throws StreamingException { delegate.newBatch(minTxnId, maxTxnID); } @Override public void closeBatch() throws StreamingException { delegate.closeBatch(); } /** * allows testing of "unexpected" errors * @throws StreamingIOFailure */ private void produceFault() throws StreamingIOFailure { if(shouldThrow) { throw new StreamingIOFailure("Simulated fault occurred"); } } void enableErrors() { shouldThrow = true; } void disableErrors() { shouldThrow = false; } } }