/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive; import com.facebook.presto.GroupByHashPageIndexerFactory; import com.facebook.presto.hive.AbstractTestHiveClient.HiveTransaction; import com.facebook.presto.hive.AbstractTestHiveClient.Transaction; import com.facebook.presto.hive.authentication.NoHdfsAuthentication; import com.facebook.presto.hive.metastore.BridgingHiveMetastore; import com.facebook.presto.hive.metastore.CachingHiveMetastore; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; import com.facebook.presto.hive.metastore.PrincipalPrivileges; import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.hive.metastore.ThriftHiveMetastore; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorOutputTableHandle; import com.facebook.presto.spi.ConnectorPageSink; import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.ConnectorSplitSource; import com.facebook.presto.spi.ConnectorTableHandle; import com.facebook.presto.spi.ConnectorTableLayoutResult; import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.Constraint; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.sql.gen.JoinCompiler; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.MaterializedRow; import com.facebook.presto.testing.TestingConnectorSession; import com.facebook.presto.testing.TestingNodeManager; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; import com.google.common.net.HostAndPort; import io.airlift.concurrent.BoundedExecutor; import io.airlift.json.JsonCodec; import io.airlift.slice.Slice; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; import java.util.concurrent.ExecutorService; import static com.facebook.presto.hadoop.HadoopFileStatus.isDirectory; import static com.facebook.presto.hive.AbstractTestHiveClient.createTableProperties; import static com.facebook.presto.hive.AbstractTestHiveClient.filterNonHiddenColumnHandles; import static com.facebook.presto.hive.AbstractTestHiveClient.filterNonHiddenColumnMetadata; import static com.facebook.presto.hive.AbstractTestHiveClient.getAllSplits; import static com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER; import static com.facebook.presto.hive.HiveTestUtils.getDefaultHiveDataStreamFactories; import static com.facebook.presto.hive.HiveTestUtils.getDefaultHiveFileWriterFactories; import static com.facebook.presto.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider; import static com.facebook.presto.hive.HiveTestUtils.getTypes; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; import static io.airlift.concurrent.MoreFutures.getFutureValue; import static io.airlift.concurrent.Threads.daemonThreadsNamed; import static io.airlift.testing.Assertions.assertEqualsIgnoreOrder; import static java.lang.String.format; import static java.util.Locale.ENGLISH; import static java.util.concurrent.Executors.newCachedThreadPool; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @Test(groups = "hive-s3") public abstract class AbstractTestHiveClientS3 { protected String writableBucket; protected String database; protected SchemaTableName tableS3; protected SchemaTableName temporaryCreateTable; protected HdfsEnvironment hdfsEnvironment; protected LocationService locationService; protected TestingHiveMetastore metastoreClient; protected HiveMetadataFactory metadataFactory; protected HiveTransactionManager transactionManager; protected ConnectorSplitManager splitManager; protected ConnectorPageSinkProvider pageSinkProvider; protected ConnectorPageSourceProvider pageSourceProvider; private ExecutorService executor; @BeforeClass public void setUp() throws Exception { executor = newCachedThreadPool(daemonThreadsNamed("hive-%s")); } @AfterClass public void tearDown() throws Exception { if (executor != null) { executor.shutdownNow(); executor = null; } } protected void setupHive(String databaseName) { database = databaseName; tableS3 = new SchemaTableName(database, "presto_test_s3"); String random = UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); temporaryCreateTable = new SchemaTableName(database, "tmp_presto_test_create_s3_" + random); } protected void setup(String host, int port, String databaseName, String awsAccessKey, String awsSecretKey, String writableBucket) { this.writableBucket = writableBucket; setupHive(databaseName); HiveS3Config s3Config = new HiveS3Config() .setS3AwsAccessKey(awsAccessKey) .setS3AwsSecretKey(awsSecretKey); HiveClientConfig hiveClientConfig = new HiveClientConfig(); String proxy = System.getProperty("hive.metastore.thrift.client.socks-proxy"); if (proxy != null) { hiveClientConfig.setMetastoreSocksProxy(HostAndPort.fromString(proxy)); } HiveConnectorId connectorId = new HiveConnectorId("hive-test"); HiveCluster hiveCluster = new TestingHiveCluster(hiveClientConfig, host, port); ExecutorService executor = newCachedThreadPool(daemonThreadsNamed("hive-s3-%s")); HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationUpdater(hiveClientConfig, s3Config)); HivePartitionManager hivePartitionManager = new HivePartitionManager(connectorId, TYPE_MANAGER, hiveClientConfig); hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hiveClientConfig, new NoHdfsAuthentication()); metastoreClient = new TestingHiveMetastore( new BridgingHiveMetastore(new ThriftHiveMetastore(hiveCluster)), executor, hiveClientConfig, writableBucket, hdfsEnvironment); locationService = new HiveLocationService(hdfsEnvironment); JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); metadataFactory = new HiveMetadataFactory( connectorId, hiveClientConfig, metastoreClient, hdfsEnvironment, hivePartitionManager, newDirectExecutorService(), TYPE_MANAGER, locationService, new TableParameterCodec(), partitionUpdateCodec, new HiveTypeTranslator(), new NodeVersion("test_version")); transactionManager = new HiveTransactionManager(); splitManager = new HiveSplitManager( connectorId, transactionHandle -> ((HiveMetadata) transactionManager.get(transactionHandle)).getMetastore(), new NamenodeStats(), hdfsEnvironment, new HadoopDirectoryLister(), new BoundedExecutor(executor, hiveClientConfig.getMaxSplitIteratorThreads()), new HiveCoercionPolicy(TYPE_MANAGER), hiveClientConfig.getMaxOutstandingSplits(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxInitialSplits(), hiveClientConfig.getRecursiveDirWalkerEnabled()); pageSinkProvider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(hiveClientConfig), hdfsEnvironment, metastoreClient, new GroupByHashPageIndexerFactory(new JoinCompiler()), TYPE_MANAGER, new HiveClientConfig(), locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(hiveClientConfig)); pageSourceProvider = new HivePageSourceProvider(hiveClientConfig, hdfsEnvironment, getDefaultHiveRecordCursorProvider(hiveClientConfig), getDefaultHiveDataStreamFactories(hiveClientConfig), TYPE_MANAGER); } protected ConnectorSession newSession() { return new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig()).getSessionProperties()); } protected Transaction newTransaction() { return new HiveTransaction(transactionManager, metadataFactory.create()); } @Test public void testGetRecordsS3() throws Exception { try (Transaction transaction = newTransaction()) { ConnectorMetadata metadata = transaction.getMetadata(); ConnectorSession session = newSession(); ConnectorTableHandle table = getTableHandle(metadata, tableS3); List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, table).values()); Map<String, Integer> columnIndex = indexColumns(columnHandles); List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, table, new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty()); HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle(); assertEquals(layoutHandle.getPartitions().get().size(), 1); ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle); long sum = 0; for (ConnectorSplit split : getAllSplits(splitSource)) { try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, columnHandles)) { MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); for (MaterializedRow row : result) { sum += (Long) row.getField(columnIndex.get("t_bigint")); } } } assertEquals(sum, 78300); } } @Test public void testGetFileStatus() throws Exception { Path basePath = new Path("s3://presto-test-hive/"); Path tablePath = new Path(basePath, "presto_test_s3"); Path filePath = new Path(tablePath, "test1.csv"); FileSystem fs = hdfsEnvironment.getFileSystem("user", basePath); assertTrue(isDirectory(fs.getFileStatus(basePath))); assertTrue(isDirectory(fs.getFileStatus(tablePath))); assertFalse(isDirectory(fs.getFileStatus(filePath))); assertFalse(fs.exists(new Path(basePath, "foo"))); } @Test public void testRename() throws Exception { Path basePath = new Path(format("s3://%s/rename/%s/", writableBucket, UUID.randomUUID())); FileSystem fs = hdfsEnvironment.getFileSystem("user", basePath); assertFalse(fs.exists(basePath)); // create file foo.txt Path path = new Path(basePath, "foo.txt"); assertTrue(fs.createNewFile(path)); assertTrue(fs.exists(path)); // rename foo.txt to bar.txt Path newPath = new Path(basePath, "bar.txt"); assertFalse(fs.exists(newPath)); assertTrue(fs.rename(path, newPath)); assertFalse(fs.exists(path)); assertTrue(fs.exists(newPath)); // create file foo.txt and rename to bar.txt assertTrue(fs.createNewFile(path)); assertFalse(fs.rename(path, newPath)); assertTrue(fs.exists(path)); // rename foo.txt to foo.txt assertTrue(fs.rename(path, path)); assertTrue(fs.exists(path)); // delete foo.txt assertTrue(fs.delete(path, false)); assertFalse(fs.exists(path)); // create directory source with file Path source = new Path(basePath, "source"); assertTrue(fs.createNewFile(new Path(source, "test.txt"))); // rename source to non-existing target Path target = new Path(basePath, "target"); assertFalse(fs.exists(target)); assertTrue(fs.rename(source, target)); assertFalse(fs.exists(source)); assertTrue(fs.exists(target)); // create directory source with file assertTrue(fs.createNewFile(new Path(source, "test.txt"))); // rename source to existing target assertTrue(fs.rename(source, target)); assertFalse(fs.exists(source)); target = new Path(target, "source"); assertTrue(fs.exists(target)); assertTrue(fs.exists(new Path(target, "test.txt"))); // delete target target = new Path(basePath, "target"); assertTrue(fs.exists(target)); assertTrue(fs.delete(target, true)); assertFalse(fs.exists(target)); // cleanup fs.delete(basePath, true); } @Test public void testTableCreation() throws Exception { for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { try { doCreateTable(temporaryCreateTable, storageFormat); } finally { dropTable(temporaryCreateTable); } } } private void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception { List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder() .add(new ColumnMetadata("id", BIGINT)) .build(); MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT) .row(1L) .row(3L) .row(2L) .build(); try (Transaction transaction = newTransaction()) { ConnectorMetadata metadata = transaction.getMetadata(); ConnectorSession session = newSession(); // begin creating the table ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat)); ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty()); // write the records ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle); sink.appendPage(data.toPage()); Collection<Slice> fragments = getFutureValue(sink.finish()); // commit the table metadata.finishCreateTable(session, outputHandle, fragments); transaction.commit(); // Hack to work around the metastore not being configured for S3. // The metastore tries to validate the location when creating the // table, which fails without explicit configuration for S3. // We work around that by using a dummy location when creating the // table and update it here to the correct S3 location. metastoreClient.updateTableLocation( database, tableName.getTableName(), locationService.writePathRoot(((HiveOutputTableHandle) outputHandle).getLocationHandle()).get().toString()); } try (Transaction transaction = newTransaction()) { ConnectorMetadata metadata = transaction.getMetadata(); ConnectorSession session = newSession(); // load the new table ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); // verify the metadata ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns); // verify the data List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, tableHandle, new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty()); HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle(); assertEquals(layoutHandle.getPartitions().get().size(), 1); ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle); ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, columnHandles)) { MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows()); } } } private void dropTable(SchemaTableName table) { try (Transaction transaction = newTransaction()) { transaction.getMetastore(table.getSchemaName()).dropTable(newSession(), table.getSchemaName(), table.getTableName()); transaction.commit(); } catch (RuntimeException e) { // this usually occurs because the table was not created } } private ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) { ConnectorTableHandle handle = metadata.getTableHandle(newSession(), tableName); checkArgument(handle != null, "table not found: %s", tableName); return handle; } private static ImmutableMap<String, Integer> indexColumns(List<ColumnHandle> columnHandles) { ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder(); int i = 0; for (ColumnHandle columnHandle : columnHandles) { HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle; index.put(hiveColumnHandle.getName(), i); i++; } return index.build(); } private static class TestingHiveMetastore extends CachingHiveMetastore { private final String writableBucket; private final HdfsEnvironment hdfsEnvironment; public TestingHiveMetastore(ExtendedHiveMetastore delegate, ExecutorService executor, HiveClientConfig hiveClientConfig, String writableBucket, HdfsEnvironment hdfsEnvironment) { super(delegate, executor, hiveClientConfig); this.writableBucket = writableBucket; this.hdfsEnvironment = hdfsEnvironment; } @Override public Optional<Database> getDatabase(String databaseName) { return super.getDatabase(databaseName) .map(database -> Database.builder(database) .setLocation(Optional.of("s3://" + writableBucket + "/")) .build()); } @Override public void createTable(Table table, PrincipalPrivileges privileges) { // hack to work around the metastore not being configured for S3 Table.Builder tableBuilder = Table.builder(table); tableBuilder.getStorageBuilder().setLocation("/"); super.createTable(tableBuilder.build(), privileges); } @Override public void dropTable(String databaseName, String tableName, boolean deleteData) { try { Optional<Table> table = getTable(databaseName, tableName); if (!table.isPresent()) { throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); } // hack to work around the metastore not being configured for S3 List<String> locations = listAllDataPaths(databaseName, tableName); Table.Builder tableBuilder = Table.builder(table.get()); tableBuilder.getStorageBuilder().setLocation("/"); // drop table replaceTable(databaseName, tableName, tableBuilder.build(), new PrincipalPrivileges(ImmutableMultimap.of(), ImmutableMultimap.of())); delegate.dropTable(databaseName, tableName, false); // drop data if (deleteData) { for (String location : locations) { Path path = new Path(location); hdfsEnvironment.getFileSystem("user", path).delete(path, true); } } } catch (Exception e) { throw Throwables.propagate(e); } finally { invalidateTable(databaseName, tableName); } } public void updateTableLocation(String databaseName, String tableName, String location) { Optional<Table> table = getTable(databaseName, tableName); if (!table.isPresent()) { throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); } Table.Builder tableBuilder = Table.builder(table.get()); tableBuilder.getStorageBuilder().setLocation(location); // NOTE: this clears the permissions replaceTable(databaseName, tableName, tableBuilder.build(), new PrincipalPrivileges(ImmutableMultimap.of(), ImmutableMultimap.of())); } private List<String> listAllDataPaths(String schemaName, String tableName) { ImmutableList.Builder<String> locations = ImmutableList.builder(); Table table = getTable(schemaName, tableName).get(); if (table.getStorage().getLocation() != null) { // For partitioned table, there should be nothing directly under this directory. // But including this location in the set makes the directory content assert more // extensive, which is desirable. locations.add(table.getStorage().getLocation()); } Optional<List<String>> partitionNames = getPartitionNames(schemaName, tableName); if (partitionNames.isPresent()) { getPartitionsByNames(schemaName, tableName, partitionNames.get()).values().stream() .map(Optional::get) .map(partition -> partition.getStorage().getLocation()) .filter(location -> !location.startsWith(table.getStorage().getLocation())) .forEach(locations::add); } return locations.build(); } } }