/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.runtime.main;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.app.guice.AppFabricServiceRuntimeModule;
import co.cask.cdap.app.guice.AuthorizationModule;
import co.cask.cdap.app.guice.ProgramRunnerRuntimeModule;
import co.cask.cdap.app.guice.ServiceStoreModules;
import co.cask.cdap.app.store.ServiceStore;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.guice.ConfigModule;
import co.cask.cdap.common.guice.DiscoveryRuntimeModule;
import co.cask.cdap.common.guice.IOModule;
import co.cask.cdap.common.guice.KafkaClientModule;
import co.cask.cdap.common.guice.LocationRuntimeModule;
import co.cask.cdap.common.guice.TwillModule;
import co.cask.cdap.common.guice.ZKClientModule;
import co.cask.cdap.common.io.URLConnections;
import co.cask.cdap.common.kerberos.SecurityUtil;
import co.cask.cdap.common.namespace.guice.NamespaceClientRuntimeModule;
import co.cask.cdap.common.runtime.DaemonMain;
import co.cask.cdap.common.service.RetryOnStartFailureService;
import co.cask.cdap.common.service.RetryStrategies;
import co.cask.cdap.common.twill.HadoopClassExcluder;
import co.cask.cdap.common.utils.DirUtils;
import co.cask.cdap.data.runtime.DataFabricModules;
import co.cask.cdap.data.runtime.DataSetServiceModules;
import co.cask.cdap.data.runtime.DataSetsModules;
import co.cask.cdap.data.stream.StreamAdminModules;
import co.cask.cdap.data.view.ViewAdminModules;
import co.cask.cdap.data2.audit.AuditModule;
import co.cask.cdap.data2.datafabric.dataset.service.DatasetService;
import co.cask.cdap.data2.util.hbase.ConfigurationTable;
import co.cask.cdap.data2.util.hbase.HBaseTableUtil;
import co.cask.cdap.explore.client.ExploreClient;
import co.cask.cdap.explore.guice.ExploreClientModule;
import co.cask.cdap.explore.service.ExploreServiceUtils;
import co.cask.cdap.hive.ExploreUtils;
import co.cask.cdap.internal.app.services.AppFabricServer;
import co.cask.cdap.logging.appender.LogAppenderInitializer;
import co.cask.cdap.logging.guice.LoggingModules;
import co.cask.cdap.metrics.guice.MetricsClientRuntimeModule;
import co.cask.cdap.notifications.feeds.guice.NotificationFeedServiceRuntimeModule;
import co.cask.cdap.notifications.guice.NotificationServiceRuntimeModule;
import co.cask.cdap.proto.Id;
import co.cask.cdap.security.TokenSecureStoreUpdater;
import co.cask.cdap.store.guice.NamespaceStoreModule;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.Service;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.twill.api.ElectionHandler;
import org.apache.twill.api.TwillApplication;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillPreparer;
import org.apache.twill.api.TwillRunnerService;
import org.apache.twill.api.logging.LogEntry;
import org.apache.twill.api.logging.LogHandler;
import org.apache.twill.api.logging.PrinterLogHandler;
import org.apache.twill.common.Cancellable;
import org.apache.twill.common.Threads;
import org.apache.twill.internal.ServiceListenerAdapter;
import org.apache.twill.internal.zookeeper.LeaderElection;
import org.apache.twill.kafka.client.KafkaClientService;
import org.apache.twill.zookeeper.ZKClientService;
import org.apache.twill.zookeeper.ZKOperations;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.net.InetAddress;
import java.net.URL;
import java.net.UnknownHostException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nullable;
/**
* Driver class for starting all master services.
* AppFabricHttpService
* TwillRunnables: MetricsProcessor, MetricsHttp, LogSaver, TransactionService, StreamHandler.
*/
public class MasterServiceMain extends DaemonMain {
private static final Logger LOG = LoggerFactory.getLogger(MasterServiceMain.class);
private static final long MAX_BACKOFF_TIME_MS = TimeUnit.MILLISECONDS.convert(10, TimeUnit.MINUTES);
private static final long SUCCESSFUL_RUN_DURATON_MS = TimeUnit.MILLISECONDS.convert(20, TimeUnit.MINUTES);
// Maximum time to try looking up the existing twill application
private static final long LOOKUP_ATTEMPT_TIMEOUT_MS = 2000;
private final CConfiguration cConf;
private final Configuration hConf;
private final Injector baseInjector;
private final ZKClientService zkClient;
private final KafkaClientService kafkaClient;
private final MetricsCollectionService metricsCollectionService;
private final ServiceStore serviceStore;
private final LeaderElection leaderElection;
private final TokenSecureStoreUpdater secureStoreUpdater;
private volatile boolean stopped;
public static void main(final String[] args) throws Exception {
LOG.info("Starting {}", MasterServiceMain.class.getSimpleName());
new MasterServiceMain().doMain(args);
}
public MasterServiceMain() {
CConfiguration cConf = CConfiguration.create();
cConf.set(Constants.Dataset.Manager.ADDRESS, getLocalHost().getCanonicalHostName());
// Note: login has to happen before any objects that need Kerberos credentials are instantiated.
login(cConf);
Configuration hConf = HBaseConfiguration.create();
Injector injector = createBaseInjector(cConf, hConf);
this.baseInjector = injector;
this.cConf = injector.getInstance(CConfiguration.class);
this.hConf = injector.getInstance(Configuration.class);
this.zkClient = injector.getInstance(ZKClientService.class);
this.kafkaClient = injector.getInstance(KafkaClientService.class);
this.metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
this.serviceStore = injector.getInstance(ServiceStore.class);
this.secureStoreUpdater = baseInjector.getInstance(TokenSecureStoreUpdater.class);
this.leaderElection = createLeaderElection();
// leader election will normally stay running. Will only stop if there was some issue starting up.
this.leaderElection.addListener(new ServiceListenerAdapter() {
@Override
public void terminated(Service.State from) {
if (!stopped) {
LOG.error("CDAP Master failed to start");
System.exit(1);
}
}
@Override
public void failed(Service.State from, Throwable failure) {
if (!stopped) {
LOG.error("CDAP Master failed to start");
System.exit(1);
}
}
}, MoreExecutors.sameThreadExecutor());
}
@Override
public void init(String[] args) {
cleanupTempDir();
checkExploreRequirements();
}
@Override
public void start() {
try {
// Workaround for release of file descriptors opened by URLClassLoader - https://issues.cask.co/browse/CDAP-2841
URLConnections.setDefaultUseCaches(false);
} catch (IOException e) {
LOG.error("Could not disable caching of URLJarFiles. This may lead to 'too many open files` exception.", e);
}
createSystemHBaseNamespace();
updateConfigurationTable();
LogAppenderInitializer logAppenderInitializer = baseInjector.getInstance(LogAppenderInitializer.class);
logAppenderInitializer.initialize();
zkClient.startAndWait();
// Tries to create the ZK root node (which can be namespaced through the zk connection string)
Futures.getUnchecked(ZKOperations.ignoreError(zkClient.create("/", null, CreateMode.PERSISTENT),
KeeperException.NodeExistsException.class, null));
kafkaClient.startAndWait();
metricsCollectionService.startAndWait();
serviceStore.startAndWait();
leaderElection.startAndWait();
}
@Override
public void stop() {
LOG.info("Stopping {}", Constants.Service.MASTER_SERVICES);
stopped = true;
// if leader election failed to start, its listener will stop the master.
// In that case, we don't want to try stopping it again, as it will log confusing exceptions
if (leaderElection.isRunning()) {
stopQuietly(leaderElection);
}
stopQuietly(serviceStore);
stopQuietly(metricsCollectionService);
stopQuietly(kafkaClient);
stopQuietly(zkClient);
if (cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED)) {
Closeables.closeQuietly(baseInjector.getInstance(ExploreClient.class));
}
}
@Override
public void destroy() {
}
/**
* Stops a guava {@link Service}. No exception will be thrown even stopping failed.
*/
private void stopQuietly(Service service) {
try {
service.stopAndWait();
} catch (Exception e) {
LOG.warn("Exception when stopping service {}", service, e);
}
}
/**
* Stops a guava {@link Service}. No exception will be thrown even stopping failed.
*/
private void stopQuietly(TwillRunnerService service) {
try {
service.stop();
} catch (Exception e) {
LOG.warn("Exception when stopping service {}", service, e);
}
}
private InetAddress getLocalHost() {
try {
return InetAddress.getLocalHost();
} catch (UnknownHostException e) {
LOG.error("Error obtaining localhost address", e);
throw Throwables.propagate(e);
}
}
/**
* Returns a map from system service name to a map from property to configuration key.
*/
private Map<String, Map<String, String>> getConfigKeys() {
Map<String, Map<String, String>> configKeys = Maps.newHashMap();
configKeys.put(Constants.Service.LOGSAVER,
ImmutableMap.of("default", Constants.LogSaver.NUM_INSTANCES,
"max", Constants.LogSaver.MAX_INSTANCES));
configKeys.put(Constants.Service.TRANSACTION,
ImmutableMap.of("default", Constants.Transaction.Container.NUM_INSTANCES,
"max", Constants.Transaction.Container.MAX_INSTANCES));
configKeys.put(Constants.Service.METRICS_PROCESSOR,
ImmutableMap.of("default", Constants.MetricsProcessor.NUM_INSTANCES,
"max", Constants.MetricsProcessor.MAX_INSTANCES));
configKeys.put(Constants.Service.METRICS,
ImmutableMap.of("default", Constants.Metrics.NUM_INSTANCES,
"max", Constants.Metrics.MAX_INSTANCES));
configKeys.put(Constants.Service.STREAMS,
ImmutableMap.of("default", Constants.Stream.CONTAINER_INSTANCES,
"max", Constants.Stream.MAX_INSTANCES));
configKeys.put(Constants.Service.DATASET_EXECUTOR,
ImmutableMap.of("default", Constants.Dataset.Executor.CONTAINER_INSTANCES,
"max", Constants.Dataset.Executor.MAX_INSTANCES));
configKeys.put(Constants.Service.EXPLORE_HTTP_USER_SERVICE,
ImmutableMap.of("default", Constants.Explore.CONTAINER_INSTANCES,
"max", Constants.Explore.MAX_INSTANCES));
return configKeys;
}
private Map<String, Integer> getSystemServiceInstances() {
Map<String, Integer> instanceCountMap = new HashMap<>();
for (Map.Entry<String, Map<String, String>> entry : getConfigKeys().entrySet()) {
String service = entry.getKey();
Map<String, String> configKeys = entry.getValue();
try {
int maxCount = cConf.getInt(configKeys.get("max"));
Integer savedCount = serviceStore.getServiceInstance(service);
if (savedCount == null || savedCount == 0) {
savedCount = Math.min(maxCount, cConf.getInt(configKeys.get("default")));
} else {
// If the max value is smaller than the saved instance count, update the store to the max value.
if (savedCount > maxCount) {
savedCount = maxCount;
}
}
serviceStore.setServiceInstance(service, savedCount);
instanceCountMap.put(service, savedCount);
LOG.info("Setting instance count of {} Service to {}", service, savedCount);
} catch (Exception e) {
LOG.error("Couldn't retrieve instance count {}: {}", service, e.getMessage(), e);
}
}
return instanceCountMap;
}
private Injector createBaseInjector(CConfiguration cConf, Configuration hConf) {
return Guice.createInjector(
new ConfigModule(cConf, hConf),
new ZKClientModule(),
new LocationRuntimeModule().getDistributedModules(),
new LoggingModules().getDistributedModules(),
new IOModule(),
new KafkaClientModule(),
new DiscoveryRuntimeModule().getDistributedModules(),
new DataSetServiceModules().getDistributedModules(),
new DataFabricModules().getDistributedModules(),
new DataSetsModules().getDistributedModules(),
new MetricsClientRuntimeModule().getDistributedModules(),
new ServiceStoreModules().getDistributedModules(),
new ExploreClientModule(),
new NotificationFeedServiceRuntimeModule().getDistributedModules(),
new NotificationServiceRuntimeModule().getDistributedModules(),
new ViewAdminModules().getDistributedModules(),
new StreamAdminModules().getDistributedModules(),
new NamespaceClientRuntimeModule().getDistributedModules(),
new NamespaceStoreModule().getDistributedModules(),
new AuditModule().getDistributedModules(),
new AuthorizationModule()
);
}
/**
* Creates an unstarted {@link LeaderElection} for the master service.
*/
private LeaderElection createLeaderElection() {
String electionPath = "/election/" + Constants.Service.MASTER_SERVICES;
return new LeaderElection(zkClient, electionPath, new ElectionHandler() {
private final AtomicReference<TwillController> controller = new AtomicReference<>();
private final List<Service> services = new ArrayList<>();
private Cancellable secureStoreUpdateCancellable;
// Executor for re-running master twill app if it gets terminated.
private ScheduledExecutorService executor;
private TwillRunnerService twillRunner;
@Override
public void leader() {
LOG.info("Became leader for master services");
final Injector injector = baseInjector.createChildInjector(
new TwillModule(),
new AppFabricServiceRuntimeModule().getDistributedModules(),
new ProgramRunnerRuntimeModule().getDistributedModules()
);
twillRunner = injector.getInstance(TwillRunnerService.class);
twillRunner.start();
// Schedule secure store update.
if (User.isHBaseSecurityEnabled(hConf) || UserGroupInformation.isSecurityEnabled()) {
secureStoreUpdateCancellable = twillRunner.scheduleSecureStoreUpdate(secureStoreUpdater, 30000L,
secureStoreUpdater.getUpdateInterval(),
TimeUnit.MILLISECONDS);
}
// Create app-fabric and dataset services
services.add(new RetryOnStartFailureService(new Supplier<Service>() {
@Override
public Service get() {
return injector.getInstance(DatasetService.class);
}
}, RetryStrategies.exponentialDelay(200, 5000, TimeUnit.MILLISECONDS)));
services.add(injector.getInstance(AppFabricServer.class));
executor = Executors.newSingleThreadScheduledExecutor(Threads.createDaemonThreadFactory("master-runner"));
// Start monitoring twill application
monitorTwillApplication(executor, 0, controller, twillRunner);
// Start app-fabric and dataset services
for (Service service : services) {
LOG.info("Starting service in master: {}", service);
try {
service.startAndWait();
} catch (Throwable t) {
// shut down the executor and stop the twill app,
// then throw an exception to cause the leader election service to stop
// leaderelection's listener will then shutdown the master
stop(true);
throw new RuntimeException(String.format("Unable to start service %s: %s", service, t.getMessage()));
}
}
LOG.info("CDAP Master started successfully.");
}
@Override
public void follower() {
LOG.info("Became follower for master services");
stop(stopped);
}
private void stop(boolean shouldTerminateApp) {
// Shutdown the retry executor so that no re-run of the twill app will be attempted
if (executor != null) {
executor.shutdownNow();
}
// Stop secure store update
if (secureStoreUpdateCancellable != null) {
secureStoreUpdateCancellable.cancel();
}
// If the master process has been explcitly stopped, stop the twill application as well.
if (shouldTerminateApp) {
LOG.info("Stopping master twill application");
TwillController twillController = controller.get();
if (twillController != null) {
Futures.getUnchecked(twillController.terminate());
}
}
// Stop local services last since DatasetService is running locally
// and remote services need it to preserve states.
for (Service service : Lists.reverse(services)) {
// service may not be running if there was an error in startup
if (service.isRunning()) {
LOG.info("Stopping service in master: {}", service);
stopQuietly(service);
}
}
services.clear();
if (twillRunner != null) {
stopQuietly(twillRunner);
}
}
});
}
/**
* Cleanup the cdap system temp directory.
*/
private void cleanupTempDir() {
File tmpDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR),
cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
if (!tmpDir.isDirectory()) {
return;
}
try {
DirUtils.deleteDirectoryContents(tmpDir, true);
} catch (IOException e) {
// It's ok not able to cleanup temp directory.
LOG.debug("Failed to cleanup temp directory {}", tmpDir, e);
}
}
/**
* Check that if Explore is enabled, the correct jars are present on master node,
* and that the distribution of Hive is supported.
*/
private void checkExploreRequirements() {
if (cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED)) {
// This check will throw an exception if Hive is not present or if it's distribution is unsupported
ExploreServiceUtils.checkHiveSupport();
}
}
/**
* Performs kerbose login if security is enabled.
*/
private void login(CConfiguration cConf) {
try {
SecurityUtil.loginForMasterService(cConf);
} catch (Exception e) {
LOG.error("Failed to login as CDAP user", e);
throw Throwables.propagate(e);
}
}
/**
* Creates HBase namespace for the cdap system namespace.
*/
private void createSystemHBaseNamespace() {
HBaseTableUtil tableUtil = baseInjector.getInstance(HBaseTableUtil.class);
try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
tableUtil.createNamespaceIfNotExists(admin, Id.Namespace.SYSTEM);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
/**
* The transaction coprocessors (0.94 and 0.96 versions of {@code DefaultTransactionProcessor}) need access
* to CConfiguration values in order to load transaction snapshots for data cleanup.
*/
private void updateConfigurationTable() {
try {
new ConfigurationTable(hConf).write(ConfigurationTable.Type.DEFAULT, cConf);
} catch (IOException ioe) {
throw Throwables.propagate(ioe);
}
}
/**
* Monitors the twill application for master services running through Twill.
*
* @param executor executor for re-running the application if it gets terminated
* @param failures number of failures in starting the application
* @param serviceController the reference to be updated with the active {@link TwillController}
*/
private void monitorTwillApplication(final ScheduledExecutorService executor, final int failures,
final AtomicReference<TwillController> serviceController,
final TwillRunnerService twillRunner) {
if (executor.isShutdown()) {
return;
}
// Determines if the application is running. If not, starts a new one.
final long startTime;
TwillController controller = getCurrentTwillController(twillRunner);
if (controller != null) {
startTime = 0L;
} else {
try {
controller = startTwillApplication(twillRunner);
} catch (Exception e) {
LOG.error("Failed to start master twill application", e);
throw e;
}
startTime = System.currentTimeMillis();
}
// Monitor the application
serviceController.set(controller);
controller.onTerminated(new Runnable() {
@Override
public void run() {
if (executor.isShutdown()) {
return;
}
LOG.warn("{} was terminated; restarting with back-off", Constants.Service.MASTER_SERVICES);
backoffRun();
}
private void backoffRun() {
if (System.currentTimeMillis() - startTime > SUCCESSFUL_RUN_DURATON_MS) {
// Restart immediately
executor.execute(new Runnable() {
@Override
public void run() {
monitorTwillApplication(executor, 0, serviceController, twillRunner);
}
});
return;
}
long nextRunTime = Math.min(500 * (long) Math.pow(2, failures + 1), MAX_BACKOFF_TIME_MS);
executor.schedule(new Runnable() {
@Override
public void run() {
monitorTwillApplication(executor, failures + 1, serviceController, twillRunner);
}
}, nextRunTime, TimeUnit.MILLISECONDS);
}
}, Threads.SAME_THREAD_EXECUTOR);
}
/**
* Returns the {@link TwillController} for the current master service or {@code null} if none is running.
*/
@Nullable
private TwillController getCurrentTwillController(TwillRunnerService twillRunner) {
int count = 100;
long sleepMs = LOOKUP_ATTEMPT_TIMEOUT_MS / count;
// Try to lookup the existing twill application
for (int i = 0; i < count; i++) {
TwillController result = null;
for (TwillController controller : twillRunner.lookup(Constants.Service.MASTER_SERVICES)) {
if (result != null) {
LOG.warn("Stopping one extra instance of {}", Constants.Service.MASTER_SERVICES);
try {
controller.terminate();
controller.awaitTerminated();
} catch (ExecutionException e) {
LOG.warn("Exception while Stopping one extra instance of {} - {}", Constants.Service.MASTER_SERVICES, e);
}
} else {
result = controller;
}
}
if (result != null) {
return result;
}
try {
TimeUnit.MILLISECONDS.sleep(sleepMs);
} catch (InterruptedException e) {
break;
}
}
return null;
}
/**
* Starts the {@link TwillApplication} for the master services.
*
* @return The {@link TwillController} for the application.
*/
private TwillController startTwillApplication(TwillRunnerService twillRunner) {
try {
// Create a temp dir for the run to hold temporary files created to run the application
Path tempPath = Files.createDirectories(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR),
cConf.get(Constants.AppFabric.TEMP_DIR)).toPath());
final Path runDir = Files.createTempDirectory(tempPath, "master");
try {
Path cConfFile = saveCConf(cConf, runDir.resolve("cConf.xml"));
Path hConfFile = saveHConf(hConf, runDir.resolve("hConf.xml"));
Path logbackFile = saveLogbackConf(runDir.resolve("logback.xml"));
TwillPreparer preparer = twillRunner.prepare(new MasterTwillApplication(cConf, cConfFile.toFile(),
hConfFile.toFile(),
getSystemServiceInstances()));
if (cConf.getBoolean(Constants.COLLECT_CONTAINER_LOGS)) {
if (LOG instanceof ch.qos.logback.classic.Logger) {
preparer.addLogHandler(new LogHandler() {
@Override
public void onLog(LogEntry entry) {
ch.qos.logback.classic.Logger logger = (ch.qos.logback.classic.Logger) LOG;
logger.callAppenders(new TwillLogEntryAdapter(entry));
}
});
} else {
LOG.warn("Unsupported logger binding ({}) for container log collection. Falling back to System.out.",
LOG.getClass().getName());
preparer.addLogHandler(new PrinterLogHandler(new PrintWriter(System.out)));
}
} else {
preparer.addJVMOptions("-Dtwill.disable.kafka=true");
}
// Add logback xml
if (Files.exists(logbackFile)) {
preparer.withResources().withResources(logbackFile.toUri());
}
// Add yarn queue name if defined
String queueName = cConf.get(Constants.Service.SCHEDULER_QUEUE);
if (queueName != null) {
LOG.info("Setting scheduler queue to {} for master services", queueName);
preparer.setSchedulerQueue(queueName);
}
// Add HBase dependencies
preparer.withDependencies(baseInjector.getInstance(HBaseTableUtil.class).getClass());
// Add secure tokens
if (User.isHBaseSecurityEnabled(hConf) || UserGroupInformation.isSecurityEnabled()) {
// TokenSecureStoreUpdater.update() ignores parameters
preparer.addSecureStore(secureStoreUpdater.update(null, null));
}
// add hadoop classpath to application classpath and exclude hadoop classes from bundle jar.
String yarnAppClassPath = hConf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
preparer.withApplicationClassPaths(Splitter.on(",").trimResults().split(yarnAppClassPath))
.withBundlerClassAcceptor(new HadoopClassExcluder());
// Add explore dependencies
if (cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED)) {
prepareExploreContainer(preparer);
}
// Add a listener to delete temp files when application started/terminated.
TwillController controller = preparer.start();
Runnable cleanup = new Runnable() {
@Override
public void run() {
try {
File dir = runDir.toFile();
if (dir.isDirectory()) {
DirUtils.deleteDirectoryContents(dir);
}
} catch (IOException e) {
LOG.warn("Failed to cleanup directory {}", runDir, e);
}
}
};
controller.onRunning(cleanup, Threads.SAME_THREAD_EXECUTOR);
controller.onTerminated(cleanup, Threads.SAME_THREAD_EXECUTOR);
return controller;
} catch (Exception e) {
try {
DirUtils.deleteDirectoryContents(runDir.toFile());
} catch (IOException ex) {
LOG.warn("Failed to cleanup directory {}", runDir, ex);
e.addSuppressed(ex);
}
throw e;
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
/**
* Prepare the specs of the twill application for the Explore twill runnable.
* Add jars needed by the Explore module in the classpath of the containers, and
* add conf files (hive_site.xml, etc) as resources available for the Explore twill
* runnable.
*/
private TwillPreparer prepareExploreContainer(TwillPreparer preparer) {
try {
// Put jars needed by Hive in the containers classpath. Those jars are localized in the Explore
// container by MasterTwillApplication, so they are available for ExploreServiceTwillRunnable
File tempDir = DirUtils.createTempDir(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR),
cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
Set<File> jars = ExploreServiceUtils.traceExploreDependencies(tempDir);
for (File jarFile : jars) {
LOG.trace("Adding jar file to classpath: {}", jarFile.getName());
preparer = preparer.withClassPaths(jarFile.getName());
}
} catch (IOException e) {
throw new RuntimeException("Unable to trace Explore dependencies", e);
}
// EXPLORE_CONF_FILES will be defined in startup scripts if Hive is installed.
String hiveConfFiles = System.getProperty(Constants.Explore.EXPLORE_CONF_FILES);
LOG.debug("Hive conf files = {}", hiveConfFiles);
if (hiveConfFiles == null) {
throw new RuntimeException("System property " + Constants.Explore.EXPLORE_CONF_FILES + " is not set");
}
// Add all the conf files needed by hive as resources available to containers
File tempDir = DirUtils.createTempDir(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR),
cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
Iterable<File> hiveConfFilesFiles = ExploreUtils.getClassPathJarsFiles(hiveConfFiles);
Set<String> addedFiles = Sets.newHashSet();
for (File file : hiveConfFilesFiles) {
if (file.getName().matches(".*\\.xml") && !file.getName().equals("logback.xml")) {
if (addedFiles.add(file.getName())) {
LOG.debug("Adding config file: {}", file.getAbsolutePath());
preparer = preparer.withResources(ExploreServiceUtils.updateConfFileForExplore(file, tempDir).toURI());
} else {
LOG.warn("Ignoring duplicate config file: {}", file.getAbsolutePath());
}
}
}
return preparer;
}
private Path saveCConf(CConfiguration conf, Path file) throws IOException {
try (Writer writer = Files.newBufferedWriter(file, Charsets.UTF_8)) {
conf.writeXml(writer);
}
return file;
}
private Path saveHConf(Configuration conf, Path file) throws IOException {
try (Writer writer = Files.newBufferedWriter(file, Charsets.UTF_8)) {
conf.writeXml(writer);
}
return file;
}
private Path saveLogbackConf(Path file) throws IOException {
// Default to system logback if the container logback is not found.
URL logbackResource = getClass().getResource("/logback-container.xml");
if (logbackResource == null) {
logbackResource = getClass().getResource("/logback.xml");
}
if (logbackResource != null) {
try (InputStream input = logbackResource.openStream()) {
Files.copy(input, file);
}
} else {
LOG.warn("Cannot find logback.xml.");
}
return file;
}
}