/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive; import com.facebook.presto.Session; import com.facebook.presto.hive.authentication.NoHdfsAuthentication; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.PrincipalType; import com.facebook.presto.hive.metastore.file.FileHiveMetastore; import com.facebook.presto.metadata.QualifiedObjectName; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.tests.DistributedQueryRunner; import com.facebook.presto.tpch.TpchPlugin; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; import io.airlift.log.Logging; import io.airlift.tpch.TpchTable; import org.intellij.lang.annotations.Language; import org.joda.time.DateTimeZone; import java.io.File; import java.util.Map; import static com.facebook.presto.testing.TestingSession.testSessionBuilder; import static com.facebook.presto.tests.QueryAssertions.copyTpchTables; import static com.facebook.presto.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.airlift.units.Duration.nanosSince; import static java.lang.String.format; import static java.util.Locale.ENGLISH; import static java.util.concurrent.TimeUnit.SECONDS; import static org.testng.Assert.assertEquals; public final class HiveQueryRunner { private static final Logger log = Logger.get(HiveQueryRunner.class); private HiveQueryRunner() { } public static final String HIVE_CATALOG = "hive"; public static final String HIVE_BUCKETED_CATALOG = "hive_bucketed"; public static final String TPCH_SCHEMA = "tpch"; private static final String TPCH_BUCKETED_SCHEMA = "tpch_bucketed"; private static final DateTimeZone TIME_ZONE = DateTimeZone.forID("Asia/Kathmandu"); public static DistributedQueryRunner createQueryRunner(TpchTable<?>... tables) throws Exception { return createQueryRunner(ImmutableList.copyOf(tables)); } public static DistributedQueryRunner createQueryRunner(Iterable<TpchTable<?>> tables) throws Exception { return createQueryRunner(tables, ImmutableMap.of()); } public static DistributedQueryRunner createQueryRunner(Iterable<TpchTable<?>> tables, Map<String, String> extraProperties) throws Exception { return createQueryRunner(tables, extraProperties, "sql-standard", ImmutableMap.of()); } public static DistributedQueryRunner createQueryRunner(Iterable<TpchTable<?>> tables, Map<String, String> extraProperties, String security, Map<String, String> extraHiveProperties) throws Exception { assertEquals(DateTimeZone.getDefault(), TIME_ZONE, "Timezone not configured correctly. Add -Duser.timezone=Asia/Katmandu to your JVM arguments"); DistributedQueryRunner queryRunner = new DistributedQueryRunner(createSession(), 4, extraProperties); try { queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toFile(); HiveClientConfig hiveClientConfig = new HiveClientConfig(); HiveS3Config s3Config = new HiveS3Config(); HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationUpdater(hiveClientConfig, s3Config)); HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hiveClientConfig, new NoHdfsAuthentication()); FileHiveMetastore metastore = new FileHiveMetastore(hdfsEnvironment, baseDir.toURI().toString(), "test"); metastore.createDatabase(createDatabaseMetastoreObject(TPCH_SCHEMA)); metastore.createDatabase(createDatabaseMetastoreObject(TPCH_BUCKETED_SCHEMA)); queryRunner.installPlugin(new HivePlugin(HIVE_CATALOG, metastore)); Map<String, String> hiveProperties = ImmutableMap.<String, String>builder() .putAll(extraHiveProperties) .put("hive.metastore.uri", "thrift://localhost:8080") .put("hive.time-zone", TIME_ZONE.getID()) .put("hive.security", security) .build(); Map<String, String> hiveBucketedProperties = ImmutableMap.<String, String>builder() .putAll(hiveProperties) .put("hive.max-initial-split-size", "10kB") // so that each bucket has multiple splits .put("hive.max-split-size", "10kB") // so that each bucket has multiple splits .put("hive.storage-format", "TEXTFILE") // so that there's no minimum split size for the file .put("hive.compression-codec", "NONE") // so that the file is splittable .build(); queryRunner.createCatalog(HIVE_CATALOG, HIVE_CATALOG, hiveProperties); queryRunner.createCatalog(HIVE_BUCKETED_CATALOG, HIVE_CATALOG, hiveBucketedProperties); copyTpchTables(queryRunner, "tpch", TINY_SCHEMA_NAME, createSession(), tables); copyTpchTablesBucketed(queryRunner, "tpch", TINY_SCHEMA_NAME, createBucketedSession(), tables); return queryRunner; } catch (Exception e) { queryRunner.close(); throw e; } } private static Database createDatabaseMetastoreObject(String name) { return Database.builder() .setDatabaseName(name) .setOwnerName("public") .setOwnerType(PrincipalType.ROLE) .build(); } public static Session createSession() { return testSessionBuilder() .setCatalog(HIVE_CATALOG) .setSchema(TPCH_SCHEMA) .build(); } public static Session createBucketedSession() { return testSessionBuilder() .setCatalog(HIVE_BUCKETED_CATALOG) .setSchema(TPCH_BUCKETED_SCHEMA) .build(); } public static void copyTpchTablesBucketed( QueryRunner queryRunner, String sourceCatalog, String sourceSchema, Session session, Iterable<TpchTable<?>> tables) throws Exception { log.info("Loading data from %s.%s...", sourceCatalog, sourceSchema); long startTime = System.nanoTime(); for (TpchTable<?> table : tables) { copyTableBucketed(queryRunner, new QualifiedObjectName(sourceCatalog, sourceSchema, table.getTableName().toLowerCase(ENGLISH)), session); } log.info("Loading from %s.%s complete in %s", sourceCatalog, sourceSchema, nanosSince(startTime).toString(SECONDS)); } private static void copyTableBucketed(QueryRunner queryRunner, QualifiedObjectName table, Session session) { long start = System.nanoTime(); log.info("Running import for %s", table.getObjectName()); @Language("SQL") String sql; switch (table.getObjectName()) { case "part": case "partsupp": case "supplier": case "nation": case "region": sql = format("CREATE TABLE %s AS SELECT * FROM %s", table.getObjectName(), table); break; case "lineitem": sql = format("CREATE TABLE %s WITH (bucketed_by=array['orderkey'], bucket_count=11) AS SELECT * FROM %s", table.getObjectName(), table); break; case "customer": sql = format("CREATE TABLE %s WITH (bucketed_by=array['custkey'], bucket_count=11) AS SELECT * FROM %s", table.getObjectName(), table); break; case "orders": sql = format("CREATE TABLE %s WITH (bucketed_by=array['custkey'], bucket_count=11) AS SELECT * FROM %s", table.getObjectName(), table); break; default: throw new UnsupportedOperationException(); } long rows = (Long) queryRunner.execute(session, sql).getMaterializedRows().get(0).getField(0); log.info("Imported %s rows for %s in %s", rows, table.getObjectName(), nanosSince(start).convertToMostSuccinctTimeUnit()); } public static void main(String[] args) throws Exception { // You need to add "--user user" to your CLI for your queries to work Logging.initialize(); DistributedQueryRunner queryRunner = createQueryRunner(TpchTable.getTables(), ImmutableMap.of("http-server.http.port", "8080")); Thread.sleep(10); Logger log = Logger.get(DistributedQueryRunner.class); log.info("======== SERVER STARTED ========"); log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); } }