/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.explore.service; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.dataset.lib.KeyValueTable; import co.cask.cdap.api.dataset.lib.PartitionDetail; import co.cask.cdap.api.dataset.lib.PartitionKey; import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.explore.utils.ExploreTableNaming; import co.cask.cdap.proto.ColumnDesc; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.QueryHandle; import co.cask.cdap.proto.QueryResult; import co.cask.cdap.proto.QueryStatus; import co.cask.cdap.proto.TableInfo; import co.cask.cdap.test.SlowTests; import co.cask.tephra.Transaction; import co.cask.tephra.TransactionAware; import com.google.common.collect.Lists; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TemporaryFolder; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; /** * Tests explore metadata endpoints. */ @Category(SlowTests.class) public class ExploreUpgradeTest extends BaseHiveExploreServiceTest { @ClassRule public static TemporaryFolder tmpFolder = new TemporaryFolder(); private static final ExploreTableNaming NAMING = new ExploreTableNaming(); @BeforeClass public static void start() throws Exception { initialize(CConfiguration.create(), tmpFolder, true); } @Test public void testUpgrade() throws Exception { // add some old style tables to default database String dummyPath = tmpFolder.newFolder().getAbsolutePath(); // create a stream and some datasets that will be upgraded. Need to create the actual instances // so that upgrade can find them, but we will manually create the Hive tables for them in the old style. Id.Stream streamId = Id.Stream.from(Id.Namespace.DEFAULT, "purchases"); // add a stream createStream(streamId); try { // add a key-value table for record scannables Id.DatasetInstance kvID = Id.DatasetInstance.from(Id.Namespace.DEFAULT, "kvtable"); datasetFramework.addInstance(KeyValueTable.class.getName(), kvID, DatasetProperties.EMPTY); // add a time partitioned fileset Id.DatasetInstance filesetID = Id.DatasetInstance.from(Id.Namespace.DEFAULT, "myfiles"); Schema schema = Schema.recordOf("rec", Schema.Field.of("body", Schema.of(Schema.Type.STRING)), Schema.Field.of("ts", Schema.of(Schema.Type.LONG))); datasetFramework.addInstance(TimePartitionedFileSet.class.getName(), filesetID, FileSetProperties.builder() .setBasePath("my/path") .setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", schema.toString()) .build()); TimePartitionedFileSet tpfs = datasetFramework.getDataset(filesetID, Collections.<String, String>emptyMap(), null); Assert.assertNotNull(tpfs); Transaction tx1 = transactionManager.startShort(100); TransactionAware txTpfs = (TransactionAware) tpfs; txTpfs.startTx(tx1); tpfs.addPartition(0L, "epoch"); Set<PartitionDetail> partitionDetails = tpfs.getPartitions(null); txTpfs.commitTx(); transactionManager.canCommit(tx1, txTpfs.getTxChanges()); transactionManager.commit(tx1); txTpfs.postTxCommit(); // remove existing tables. will replace with manually created old-style tables waitForCompletion(Lists.newArrayList( exploreTableManager.disableStream(NAMING.getTableName(streamId), streamId), exploreTableManager.disableDataset(kvID, datasetFramework.getDatasetSpec(kvID)), exploreTableManager.disableDataset(filesetID, datasetFramework.getDatasetSpec(filesetID)))); String createOldStream = "CREATE EXTERNAL TABLE IF NOT EXISTS cdap_stream_purchases " + "(ts bigint, headers map<string, string>, body string) COMMENT 'CDAP Stream' " + "STORED BY 'co.cask.cdap.hive.stream.StreamStorageHandler' " + "WITH SERDEPROPERTIES ('explore.stream.name'='purchases') " + "LOCATION '" + dummyPath + "' "; String createOldRecordScannable = "CREATE EXTERNAL TABLE IF NOT EXISTS cdap_kvtable " + "(key binary, value binary) COMMENT 'CDAP Dataset' " + "STORED BY 'co.cask.cdap.hive.datasets.DatasetStorageHandler' " + "WITH SERDEPROPERTIES ('explore.dataset.name'='cdap.user.kvtable')"; String createOldFileset = "CREATE EXTERNAL TABLE IF NOT EXISTS cdap_myfiles " + "(body string, ts bigint) " + "PARTITIONED BY (year int, month int, day int, hour int, minute int) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + "LOCATION '" + dummyPath + "' " + "TBLPROPERTIES ('cdap.name'='cdap.user.myfiles', 'avro.schema.literal'='" + schema.toString() + "')"; String createNonCDAP = "CREATE TABLE some_table (x int, y string)"; // order matters, at least in unit test environment... // if you create a table from a dataset that uses DatasetStorageHandler, // the next tables will call initialize on DatasetStorageHandler... // TODO: find out why the above is true waitForCompletion(Lists.newArrayList( exploreService.execute(Id.Namespace.DEFAULT, createNonCDAP), exploreService.execute(Id.Namespace.DEFAULT, createOldFileset), exploreService.execute(Id.Namespace.DEFAULT, createOldRecordScannable), exploreService.execute(Id.Namespace.DEFAULT, createOldStream) )); exploreService.upgrade(); // check new tables have cdap version, which means they were upgraded TableInfo tableInfo = exploreService.getTableInfo("default", "dataset_myfiles"); Assert.assertTrue(tableInfo.getParameters().containsKey(Constants.Explore.CDAP_VERSION)); tableInfo = exploreService.getTableInfo("default", "dataset_kvtable"); Assert.assertTrue(tableInfo.getParameters().containsKey(Constants.Explore.CDAP_VERSION)); tableInfo = exploreService.getTableInfo("default", "stream_purchases"); Assert.assertTrue(tableInfo.getParameters().containsKey(Constants.Explore.CDAP_VERSION)); tableInfo = exploreService.getTableInfo("default", "some_table"); Assert.assertFalse(tableInfo.getParameters().containsKey(Constants.Explore.CDAP_VERSION)); // check partition was added to tpfs dataset Iterator<PartitionDetail> partitionIter = partitionDetails.iterator(); String expected = stringify(partitionIter.next().getPartitionKey()); runCommand(Id.Namespace.DEFAULT, "show partitions dataset_myfiles", true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(expected))) ); // check that old tables were dropped Assert.assertEquals(4, exploreService.getTables("default").size()); } finally { dropStream(streamId); } } private String stringify(PartitionKey partitionKey) { return String.format("year=%s/month=%s/day=%s/hour=%s/minute=%s", partitionKey.getField("year"), partitionKey.getField("month"), partitionKey.getField("day"), partitionKey.getField("hour"), partitionKey.getField("minute")); } private void waitForCompletion(List<QueryHandle> handles) throws Exception { for (QueryHandle handle : handles) { QueryStatus status = exploreService.getStatus(handle); while (!status.getStatus().isDone()) { TimeUnit.SECONDS.sleep(1); status = exploreService.getStatus(handle); } } } }