/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.explore.guice;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.runtime.RuntimeModule;
import co.cask.cdap.data2.datafabric.dataset.RemoteDatasetFramework;
import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory;
import co.cask.cdap.explore.executor.ExploreExecutorHttpHandler;
import co.cask.cdap.explore.executor.ExploreExecutorService;
import co.cask.cdap.explore.executor.ExploreMetadataHttpHandler;
import co.cask.cdap.explore.executor.ExploreStatusHandler;
import co.cask.cdap.explore.executor.NamespacedExploreMetadataHttpHandler;
import co.cask.cdap.explore.executor.NamespacedQueryExecutorHttpHandler;
import co.cask.cdap.explore.executor.QueryExecutorHttpHandler;
import co.cask.cdap.explore.service.ExploreService;
import co.cask.cdap.explore.service.ExploreServiceUtils;
import co.cask.cdap.explore.service.hive.BaseHiveExploreService;
import co.cask.cdap.explore.service.hive.Hive14ExploreService;
import co.cask.cdap.format.RecordFormats;
import co.cask.cdap.gateway.handlers.CommonHandlers;
import co.cask.cdap.hive.datasets.DatasetStorageHandler;
import co.cask.http.HttpHandler;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.google.inject.Exposed;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.PrivateModule;
import com.google.inject.Provider;
import com.google.inject.Provides;
import com.google.inject.Scopes;
import com.google.inject.Singleton;
import com.google.inject.multibindings.Multibinder;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import com.google.inject.util.Modules;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.twill.api.ClassAcceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
/**
* Guice runtime module for the explore functionality.
*/
public class ExploreRuntimeModule extends RuntimeModule {
private static final Logger LOG = LoggerFactory.getLogger(ExploreRuntimeModule.class);
@Override
public Module getInMemoryModules() {
// Turning off assertions for Hive packages, since some assertions in StandardStructObjectInspector do not work
// when outer joins are run. It is okay to turn off Hive assertions since we assume Hive is a black-box that does
// the right thing, and we only want to test our/our user's code.
getClass().getClassLoader().setPackageAssertionStatus("org.apache.hadoop.hive", false);
getClass().getClassLoader().setPackageAssertionStatus("org.apache.hive", false);
return Modules.combine(new ExploreExecutorModule(), new ExploreLocalModule(true));
}
@Override
public Module getStandaloneModules() {
return Modules.combine(new ExploreExecutorModule(), new ExploreLocalModule(false));
}
@Override
public Module getDistributedModules() {
return Modules.combine(new ExploreExecutorModule(), new ExploreDistributedModule());
}
private static final class ExploreExecutorModule extends PrivateModule {
@Override
protected void configure() {
Named exploreSeriveName = Names.named(Constants.Service.EXPLORE_HTTP_USER_SERVICE);
Multibinder<HttpHandler> handlerBinder =
Multibinder.newSetBinder(binder(), HttpHandler.class, exploreSeriveName);
handlerBinder.addBinding().to(NamespacedQueryExecutorHttpHandler.class);
handlerBinder.addBinding().to(QueryExecutorHttpHandler.class);
handlerBinder.addBinding().to(NamespacedExploreMetadataHttpHandler.class);
handlerBinder.addBinding().to(ExploreMetadataHttpHandler.class);
handlerBinder.addBinding().to(ExploreExecutorHttpHandler.class);
handlerBinder.addBinding().to(ExploreStatusHandler.class);
CommonHandlers.add(handlerBinder);
bind(ExploreExecutorService.class).in(Scopes.SINGLETON);
expose(ExploreExecutorService.class);
}
}
private static final class ExploreLocalModule extends PrivateModule {
private final boolean isInMemory;
public ExploreLocalModule(boolean isInMemory) {
this.isInMemory = isInMemory;
}
@Override
protected void configure() {
// Current version of hive used in standalone is Hive 14
bind(ExploreService.class).annotatedWith(Names.named("explore.service.impl")).to(Hive14ExploreService.class);
bind(ExploreService.class).toProvider(ExploreServiceProvider.class).in(Scopes.SINGLETON);
expose(ExploreService.class);
bind(boolean.class).annotatedWith(Names.named("explore.inmemory")).toInstance(isInMemory);
bind(File.class).annotatedWith(Names.named(Constants.Explore.PREVIEWS_DIR_NAME))
.toProvider(PreviewsDirProvider.class);
}
private static final class PreviewsDirProvider implements Provider<File> {
private final CConfiguration cConf;
@Inject
public PreviewsDirProvider(CConfiguration cConf) {
this.cConf = cConf;
}
@Override
public File get() {
String localDirStr = cConf.get(Constants.Explore.LOCAL_DATA_DIR);
File previewsDir = new File(localDirStr, "previewsDir");
previewsDir.mkdirs();
return previewsDir;
}
}
@Singleton
private static final class ExploreServiceProvider implements Provider<ExploreService> {
private final CConfiguration cConf;
private final Configuration hConf;
private final ExploreService exploreService;
private final boolean isInMemory;
@Inject
public ExploreServiceProvider(CConfiguration cConf, Configuration hConf,
@Named("explore.service.impl") ExploreService exploreService,
@Named("explore.inmemory") boolean isInMemory) {
this.exploreService = exploreService;
this.cConf = cConf;
this.hConf = hConf;
this.isInMemory = isInMemory;
}
private static final long seed = System.currentTimeMillis();
@Override
public ExploreService get() {
File hiveDataDir = new File(cConf.get(Constants.Explore.LOCAL_DATA_DIR));
// The properties set using setProperty will be included to any new HiveConf object created,
// at the condition that the configuration is known by Hive, and so is one of the HiveConf.ConfVars
// variables.
System.setProperty(HiveConf.ConfVars.SCRATCHDIR.toString(),
new File(hiveDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath());
// Reset hadoop tmp dir because Hive does not pick it up from hConf
System.setProperty("hadoop.tmp.dir", hConf.get("hadoop.tmp.dir"));
File warehouseDir = new File(cConf.get(Constants.Explore.LOCAL_DATA_DIR), "warehouse");
File databaseDir = new File(cConf.get(Constants.Explore.LOCAL_DATA_DIR), "database");
if (isInMemory) {
// This seed is required to make all tests pass when launched together, and when several of them
// start a hive metastore / hive server.
warehouseDir = new File(warehouseDir, Long.toString(seed));
databaseDir = new File(databaseDir, Long.toString(seed));
}
LOG.debug("Setting {} to {}",
HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), warehouseDir.getAbsoluteFile());
System.setProperty(HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), warehouseDir.getAbsolutePath());
// Set derby log location
System.setProperty("derby.stream.error.file",
cConf.get(Constants.Explore.LOCAL_DATA_DIR) + File.separator + "derby.log");
String connectUrl = String.format("jdbc:derby:;databaseName=%s;create=true", databaseDir.getAbsoluteFile());
LOG.debug("Setting {} to {}", HiveConf.ConfVars.METASTORECONNECTURLKEY.toString(), connectUrl);
System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.toString(), connectUrl);
// Some more local mode settings
System.setProperty(HiveConf.ConfVars.LOCALMODEAUTO.toString(), "true");
System.setProperty(HiveConf.ConfVars.SUBMITVIACHILD.toString(), "false");
System.setProperty(MRConfig.FRAMEWORK_NAME, "local");
// Disable security
// Also need to disable security by making HiveAuthFactory.loginFromKeytab a no-op, since Hive >=0.14
// ignores the HIVE_SERVER2_AUTHENTICATION property and instead uses UserGroupInformation.isSecurityEnabled()
// (rewrite to HiveAuthFactory.loginFromKeytab bytecode is done in ExploreServiceUtils.traceDependencies)
System.setProperty(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.toString(), "NONE");
System.setProperty(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.toString(), "false");
System.setProperty(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.toString(), "false");
return exploreService;
}
}
}
private static final class ExploreDistributedModule extends PrivateModule {
private static final Logger LOG = LoggerFactory.getLogger(ExploreDistributedModule.class);
@Override
protected void configure() {
try {
CConfiguration cConf = CConfiguration.create();
File tmpDir = new File(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR)),
cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
tmpDir.mkdirs();
setupClasspath(tmpDir);
// Set local tmp dir to an absolute location in the twill runnable otherwise Hive complains
String localScratchPath = System.getProperty("java.io.tmpdir") + File.separator +
"hive-" + System.getProperty("user.name");
System.setProperty(HiveConf.ConfVars.LOCALSCRATCHDIR.toString(),
new File(localScratchPath).getAbsolutePath());
LOG.info("Setting {} to {}", HiveConf.ConfVars.LOCALSCRATCHDIR.toString(),
System.getProperty(HiveConf.ConfVars.LOCALSCRATCHDIR.toString()));
File previewDir = Files.createTempDir();
LOG.info("Storing preview files in {}", previewDir.getAbsolutePath());
bind(File.class).annotatedWith(Names.named(Constants.Explore.PREVIEWS_DIR_NAME)).toInstance(previewDir);
} catch (Throwable e) {
throw Throwables.propagate(e);
}
}
@Provides
@Singleton
@Exposed
public ExploreService providesExploreService(Injector injector) {
// Figure out which HiveExploreService class to load
Class<? extends ExploreService> hiveExploreServiceCl = ExploreServiceUtils.getHiveService();
LOG.info("Using Explore service class {}", hiveExploreServiceCl.getName());
return injector.getInstance(hiveExploreServiceCl);
}
}
private static void setupClasspath(File tmpDir) throws IOException {
// Here we find the transitive dependencies and remove all paths that come from the boot class path -
// those paths are not needed because the new JVM will have them in its boot class path.
// It could even be wrong to keep them because in the target container, the boot class path may be different
// (for example, if Hadoop uses a different Java version than CDAP).
final Set<String> bootstrapClassPaths = ExploreServiceUtils.getBoostrapClasses();
ClassAcceptor classAcceptor = new ClassAcceptor() {
/* Excluding any class contained in the bootstrapClassPaths and Kryo classes and hive-exec.jar
* We need to remove Kryo dependency in the Explore container. Spark introduced version 2.21 version of Kryo,
* which would be normally shipped to the Explore container. Yet, Hive requires Kryo 2.22,
* and gets it from the Hive jars - hive-exec.jar to be precise.
* we also exclude hive jars as hive dependencies are found in job.jar.
* */
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
if (bootstrapClassPaths.contains(classPathUrl.getFile()) ||
className.startsWith("com.esotericsoftware.kryo") || classPathUrl.getFile().contains("hive")) {
return false;
}
return true;
}
};
Set<File> hBaseTableDeps = ExploreServiceUtils.traceDependencies(
null, classAcceptor, tmpDir, HBaseTableUtilFactory.getHBaseTableUtilClass().getName());
// Note the order of dependency jars is important so that HBase jars come first in the classpath order
// LinkedHashSet maintains insertion order while removing duplicate entries.
Set<File> orderedDependencies = new LinkedHashSet<>();
orderedDependencies.addAll(hBaseTableDeps);
orderedDependencies.addAll(ExploreServiceUtils.traceDependencies(null, classAcceptor, tmpDir,
RemoteDatasetFramework.class.getName(),
DatasetStorageHandler.class.getName(),
RecordFormats.class.getName()));
// Note: the class path entries need to be prefixed with "file://" for the jars to work when
// Hive starts local map-reduce job.
ImmutableList.Builder<String> builder = ImmutableList.builder();
for (File dep : orderedDependencies) {
builder.add("file://" + dep.getAbsolutePath());
}
List<String> orderedDependenciesStr = builder.build();
// These dependency files need to be copied over to spark container
System.setProperty(BaseHiveExploreService.SPARK_YARN_DIST_FILES,
Joiner.on(',').join(Iterables.transform(orderedDependencies, new Function<File, String>() {
@Override
public String apply(File input) {
return input.getAbsolutePath();
}
})));
LOG.debug("Setting {} to {}", BaseHiveExploreService.SPARK_YARN_DIST_FILES,
System.getProperty(BaseHiveExploreService.SPARK_YARN_DIST_FILES));
// These dependency files need to be copied over to hive job container
System.setProperty(HiveConf.ConfVars.HIVEAUXJARS.toString(), Joiner.on(',').join(orderedDependenciesStr));
LOG.debug("Setting {} to {}", HiveConf.ConfVars.HIVEAUXJARS.toString(),
System.getProperty(HiveConf.ConfVars.HIVEAUXJARS.toString()));
// add hive-exec.jar to the HADOOP_CLASSPATH, which is used by the local mapreduce job launched by hive ,
// we need to add this, otherwise when hive runs a MapRedLocalTask it cannot find
// "org.apache.hadoop.hive.serde2.SerDe" class in its classpath.
List<String> orderedDependenciesWithHiveJar = Lists.newArrayList(orderedDependenciesStr);
String hiveExecJar = new JobConf(org.apache.hadoop.hive.ql.exec.Task.class).getJar();
Preconditions.checkNotNull(hiveExecJar, "Couldn't locate hive-exec.jar to be included in HADOOP_CLASSPATH " +
"for MapReduce jobs launched by Hive");
orderedDependenciesWithHiveJar.add(hiveExecJar);
LOG.debug("Added hive-exec.jar {} to HADOOP_CLASSPATH to be included for MapReduce jobs launched by Hive",
hiveExecJar);
//TODO: Setup HADOOP_CLASSPATH hack, more info on why this is needed, see CDAP-9
LocalMapreduceClasspathSetter classpathSetter =
new LocalMapreduceClasspathSetter(new HiveConf(), tmpDir.getAbsolutePath(),
orderedDependenciesWithHiveJar);
for (File jar : hBaseTableDeps) {
classpathSetter.accept(jar.getAbsolutePath());
}
classpathSetter.setupClasspathScript();
}
}