/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.hive;
import com.google.common.collect.Sets;
import java.util.Set;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetNotFoundException;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.TestHelpers;
import org.kitesdk.data.spi.DatasetRepositories;
import org.kitesdk.data.spi.DatasetRepository;
public class TestManagedExternalHandling {
private static final DatasetDescriptor descriptor =
new DatasetDescriptor.Builder()
.schemaLiteral("\"string\"")
.build();
private DatasetRepository managed;
private DatasetRepository external;
@Before
public void setupRepositories() {
// ensure the datasets do not already exist
Datasets.delete("dataset:hive?dataset=managed");
Datasets.delete("dataset:hive:target/test-repo/ns/external");
// ensure no other metadata is left in the metastore
cleanHive();
// create datasets
this.managed = DatasetRepositories.repositoryFor("repo:hive");
Datasets.create("dataset:hive?dataset=managed", descriptor);
this.external = DatasetRepositories.repositoryFor("repo:hive:target/test-repo");
external.create("ns", "external", descriptor);
}
@After
public void cleanHive() {
// ensures all tables are removed
MetaStoreUtil metastore = MetaStoreUtil.get(new Configuration());
for (String database : metastore.getAllDatabases()) {
for (String table : metastore.getAllTables(database)) {
metastore.dropTable(database, table);
}
if (!"default".equals(database)) {
metastore.dropDatabase(database, true);
}
}
}
@Test
public void testManagedWithExternal() {
HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider(
new HiveConf());
Assert.assertTrue(provider.isExternal("ns", "external"));
Dataset<GenericData.Record> dataset = managed.load("ns", "external");
Assert.assertNotNull("Should open external dataset with managed", dataset);
Assert.assertEquals("Should match external dataset",
external.load("ns", "external").getDescriptor(), dataset.getDescriptor());
DatasetDescriptor updatedDescriptor =
new DatasetDescriptor.Builder(dataset.getDescriptor())
.property("kite.writer.cache-size", "34")
.schemaLiteral("\"string\"")
.build();
Dataset<GenericData.Record> updated = managed
.update("ns", "external", updatedDescriptor);
Assert.assertNotNull("Should update external dataset with managed",
updated);
Assert.assertEquals("Should see changes in external dataset",
external.load("ns", "external").getDescriptor(), updated.getDescriptor());
Assert.assertTrue("Should delete external tables with managed",
managed.delete("ns", "external"));
TestHelpers.assertThrows("Should delete external table correctly",
DatasetNotFoundException.class, new Runnable() {
@Override
public void run() {
external.load("ns", "external");
}
});
}
@Test
public void testExternalWithManaged() {
HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider(
new HiveConf());
Assert.assertTrue(provider.isManaged("default", "managed"));
Dataset<GenericData.Record> dataset = external.load("default", "managed");
Assert.assertNotNull("Should open managed dataset with external", dataset);
Assert.assertEquals("Should match managed dataset",
managed.load("default", "managed").getDescriptor(), dataset.getDescriptor());
DatasetDescriptor updatedDescriptor =
new DatasetDescriptor.Builder(dataset.getDescriptor())
.property("kite.writer.cache-size", "34")
.schemaLiteral("\"string\"")
.build();
Dataset<GenericData.Record> updated = external
.update("default", "managed", updatedDescriptor);
Assert.assertNotNull("Should update managed dataset with external",
updated);
Assert.assertEquals("Should see changes in managed dataset",
managed.load("default", "managed").getDescriptor(), updated.getDescriptor());
Assert.assertTrue("Should delete managed tables with external",
external.delete("default", "managed"));
TestHelpers.assertThrows("Should delete managed table correctly",
DatasetNotFoundException.class, new Runnable() {
@Override
public void run() {
managed.load("default", "managed");
}
});
}
@Test
public void testRepositoryList() throws Exception {
// create unreadable hive tables
MetaStoreUtil metastore = MetaStoreUtil.get(new Configuration());
metastore.dropTable("default", "bad_type");
metastore.dropTable("bad", "bad_serde");
metastore.dropTable("bad", "bad_schema");
Table badType = HiveUtils.createEmptyTable("default", "bad_type");
badType.setTableType(TableType.VIRTUAL_VIEW.toString());
metastore.createTable(badType);
Table badSerDe = HiveUtils.createEmptyTable("bad", "bad_serde");
badSerDe.setTableType(TableType.MANAGED_TABLE.toString()); // readable type
badSerDe.getSd().getSerdeInfo().setSerializationLib("com.example.ExampleHiveSerDe");
metastore.createTable(badSerDe);
// add a bad schema if decimal is supported (not supported by Kite)
if (HiveSchemaConverter.decimalClass != null) {
Table badSchema = HiveUtils.createEmptyTable("bad", "bad_schema");
badSchema.setTableType(TableType.MANAGED_TABLE.toString()); // readable type
badSchema.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.avro.AvroSerDe");
badSchema.getSd().setInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat");
badSchema.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat");
badSchema.getSd().getCols().add(new FieldSchema("invalid", "decimal(1,2)", null));
metastore.createTable(badSchema);
}
// note that unreadable tables are not in the lists
Set<String> expectedNamespaces = Sets.newHashSet("default", "ns");
Assert.assertEquals("Managed should list namespaces with external and managed tables",
expectedNamespaces, Sets.newHashSet(managed.namespaces()));
Assert.assertEquals("External should list namespaces with external and managed tables",
expectedNamespaces, Sets.newHashSet(external.namespaces()));
Set<String> expectedInDefault = Sets.newHashSet("managed");
Assert.assertEquals("Managed should list external and managed tables",
expectedInDefault, Sets.newHashSet(managed.datasets("default")));
Assert.assertEquals("External should list external and managed tables",
expectedInDefault, Sets.newHashSet(external.datasets("default")));
Set<String> expectedInNS = Sets.newHashSet("external");
Assert.assertEquals("Managed should list external and managed tables",
expectedInNS, Sets.newHashSet(managed.datasets("ns")));
Assert.assertEquals("External should list external and managed tables",
expectedInNS, Sets.newHashSet(external.datasets("ns")));
}
}