/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.app.runtime.distributed;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.lang.ClassLoaders;
import co.cask.cdap.common.lang.CombineClassLoader;
import co.cask.cdap.common.lang.jar.BundleJarUtil;
import co.cask.cdap.common.twill.AbortOnTimeoutEventHandler;
import co.cask.cdap.common.twill.HadoopClassExcluder;
import co.cask.cdap.common.utils.DirUtils;
import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory;
import co.cask.cdap.internal.app.runtime.BasicArguments;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.internal.app.runtime.SimpleProgramOptions;
import co.cask.cdap.internal.app.runtime.codec.ArgumentsCodec;
import co.cask.cdap.internal.app.runtime.codec.ProgramOptionsCodec;
import co.cask.cdap.security.TokenSecureStoreUpdater;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import com.google.common.io.Resources;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.twill.api.EventHandler;
import org.apache.twill.api.TwillApplication;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillPreparer;
import org.apache.twill.api.TwillRunner;
import org.apache.twill.api.logging.LogEntry;
import org.apache.twill.api.logging.LogHandler;
import org.apache.twill.api.logging.PrinterLogHandler;
import org.apache.twill.common.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;
/**
* Defines the base framework for starting {@link Program} in the cluster.
*/
public abstract class AbstractDistributedProgramRunner implements ProgramRunner {
private static final Logger LOG = LoggerFactory.getLogger(AbstractDistributedProgramRunner.class);
private static final Gson GSON = new GsonBuilder()
.registerTypeAdapter(Arguments.class, new ArgumentsCodec())
.registerTypeAdapter(ProgramOptions.class, new ProgramOptionsCodec())
.create();
private final TwillRunner twillRunner;
protected final YarnConfiguration hConf;
protected final CConfiguration cConf;
protected final EventHandler eventHandler;
private final TokenSecureStoreUpdater secureStoreUpdater;
/**
* An interface for launching TwillApplication. Used by sub-classes only.
*/
protected abstract class ApplicationLauncher {
/**
* Starts the given application through Twill.
*
* @param twillApplication the application to start
*
* @return the {@link TwillController} for the application.
*/
public TwillController launch(TwillApplication twillApplication) {
return launch(twillApplication, Collections.<String>emptyList(), Collections.<Class<?>>emptyList());
}
/**
* Starts the given application through Twill with extra classpaths appended to the end of the classpath of
* the runnables inside the applications.
*
* @param twillApplication the application to start
* @param extraClassPaths to append
*
* @return the {@link TwillController} for the application.
* @see TwillPreparer#withClassPaths(Iterable)
*/
public TwillController launch(TwillApplication twillApplication, String...extraClassPaths) {
return launch(twillApplication, Arrays.asList(extraClassPaths), Collections.<Class<?>>emptyList());
}
/**
* Starts the given application through Twill with extra classpaths appended to the end of the classpath of
* the runnables inside the applications.
*
* @param twillApplication the application to start
* @param extraClassPaths to append
*
* @return the {@link TwillController} for the application.
* @see TwillPreparer#withClassPaths(Iterable)
*/
public abstract TwillController launch(TwillApplication twillApplication, Iterable<String> extraClassPaths,
Iterable<? extends Class<?>> extraDependencies);
}
protected AbstractDistributedProgramRunner(TwillRunner twillRunner, YarnConfiguration hConf, CConfiguration cConf,
TokenSecureStoreUpdater tokenSecureStoreUpdater) {
this.twillRunner = twillRunner;
this.hConf = hConf;
this.cConf = cConf;
this.eventHandler = createEventHandler(cConf);
this.secureStoreUpdater = tokenSecureStoreUpdater;
}
protected EventHandler createEventHandler(CConfiguration cConf) {
return new AbortOnTimeoutEventHandler(cConf.getLong(Constants.CFG_TWILL_NO_CONTAINER_TIMEOUT, Long.MAX_VALUE));
}
@Override
public final ProgramController run(final Program program, final ProgramOptions oldOptions) {
final String schedulerQueueName = oldOptions.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
final File tempDir = DirUtils.createTempDir(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR),
cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
try {
if (schedulerQueueName != null && !schedulerQueueName.isEmpty()) {
hConf.set(JobContext.QUEUE_NAME, schedulerQueueName);
LOG.info("Setting scheduler queue to {}", schedulerQueueName);
}
Map<String, LocalizeResource> localizeResources = new HashMap<>();
final ProgramOptions options = addArtifactPluginFiles(oldOptions, localizeResources,
DirUtils.createTempDir(tempDir));
// Copy config files and program jar to local temp, and ask Twill to localize it to container.
// What Twill does is to save those files in HDFS and keep using them during the lifetime of application.
// Twill will manage the cleanup of those files in HDFS.
localizeResources.put("hConf.xml",
new LocalizeResource(saveHConf(hConf, File.createTempFile("hConf", ".xml", tempDir))));
localizeResources.put("cConf.xml",
new LocalizeResource(saveCConf(cConf, File.createTempFile("cConf", ".xml", tempDir))));
final URI logbackURI = getLogBackURI(program, tempDir);
final String programOptions = GSON.toJson(options);
// Obtains and add the HBase delegation token as well (if in non-secure mode, it's a no-op)
// Twill would also ignore it if it is not running in secure mode.
// The HDFS token should already obtained by Twill.
return launch(program, options, localizeResources, tempDir, new ApplicationLauncher() {
@Override
public TwillController launch(TwillApplication twillApplication, Iterable<String> extraClassPaths,
Iterable<? extends Class<?>> extraDependencies) {
TwillPreparer twillPreparer = twillRunner.prepare(twillApplication);
// TODO: CDAP-5506. It's a bit hacky to set a Spark environment here. However, we always launch
// Spark using YARN and it is needed for both Workflow and Spark runner. We need to set it
// because inside Spark code, it will set and unset the SPARK_YARN_MODE system properties, causing
// fork in distributed mode not working. Setting it in the environment, which Spark uses for defaults,
// so it can't be unset by Spark
twillPreparer.withEnv(Collections.singletonMap("SPARK_YARN_MODE", "true"));
if (options.isDebug()) {
LOG.info("Starting {} with debugging enabled, programOptions: {}, and logback: {}",
program.getId(), programOptions, logbackURI);
twillPreparer.enableDebugging();
}
// Add scheduler queue name if defined
if (schedulerQueueName != null && !schedulerQueueName.isEmpty()) {
LOG.info("Setting scheduler queue for app {} as {}", program.getId(), schedulerQueueName);
twillPreparer.setSchedulerQueue(schedulerQueueName);
}
if (logbackURI != null) {
twillPreparer.withResources(logbackURI);
}
String logLevelConf = cConf.get(Constants.COLLECT_APP_CONTAINER_LOG_LEVEL).toUpperCase();
if ("OFF".equals(logLevelConf)) {
twillPreparer.addJVMOptions("-Dtwill.disable.kafka=true");
} else {
LogEntry.Level logLevel = LogEntry.Level.ERROR;
if ("ALL".equals(logLevelConf)) {
logLevel = LogEntry.Level.TRACE;
} else {
try {
logLevel = LogEntry.Level.valueOf(logLevelConf.toUpperCase());
} catch (Exception e) {
LOG.warn("Invalid application container log level {}. Defaulting to ERROR.", logLevelConf);
}
}
twillPreparer.addLogHandler(new ApplicationLogHandler(new PrinterLogHandler(new PrintWriter(System.out)),
logLevel));
}
String yarnAppClassPath = hConf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
// Add secure tokens
if (User.isHBaseSecurityEnabled(hConf) || UserGroupInformation.isSecurityEnabled()) {
// TokenSecureStoreUpdater.update() ignores parameters
twillPreparer.addSecureStore(secureStoreUpdater.update(null, null));
}
Iterable<Class<?>> dependencies = Iterables.concat(
Collections.singletonList(HBaseTableUtilFactory.getHBaseTableUtilClass()), extraDependencies
);
twillPreparer
.withDependencies(dependencies)
.withClassPaths(Iterables.concat(extraClassPaths, Splitter.on(',').trimResults()
.split(hConf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, ""))))
.withApplicationClassPaths(Splitter.on(",").trimResults().split(yarnAppClassPath))
.withBundlerClassAcceptor(new HadoopClassExcluder() {
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
// Exclude both hadoop and spark classes.
return super.accept(className, classUrl, classPathUrl) && !className.startsWith("org.apache.spark.");
}
})
.withApplicationArguments(
String.format("--%s", RunnableOptions.JAR), program.getJarLocation().getName(),
String.format("--%s", RunnableOptions.PROGRAM_OPTIONS), programOptions
);
TwillController twillController;
// Change the context classloader to the combine classloader of this ProgramRunner and
// all the classloaders of the dependencies classes so that Twill can trace classes.
ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(new CombineClassLoader(
AbstractDistributedProgramRunner.this.getClass().getClassLoader(),
Iterables.transform(dependencies, new Function<Class<?>, ClassLoader>() {
@Override
public ClassLoader apply(Class<?> input) {
return input.getClassLoader();
}
})));
try {
twillController = twillPreparer.start();
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
return addCleanupListener(twillController, program, tempDir);
}
});
} catch (Exception e) {
deleteDirectory(tempDir);
throw Throwables.propagate(e);
}
}
private ProgramOptions addArtifactPluginFiles(ProgramOptions options, Map<String, LocalizeResource> localizeResources,
File tempDir) throws IOException {
Arguments systemArgs = options.getArguments();
if (!systemArgs.hasOption(ProgramOptionConstants.PLUGIN_DIR)) {
return options;
}
File localDir = new File(systemArgs.getOption(ProgramOptionConstants.PLUGIN_DIR));
File archiveFile = new File(tempDir, "artifacts.jar");
BundleJarUtil.createJar(localDir, archiveFile);
// Localize plugins to two files, one expanded into a directory, one not.
localizeResources.put("artifacts", new LocalizeResource(archiveFile, true));
localizeResources.put("artifacts_archive.jar", new LocalizeResource(archiveFile, false));
Map<String, String> newSystemArgs = Maps.newHashMap(systemArgs.asMap());
newSystemArgs.put(ProgramOptionConstants.PLUGIN_DIR, "artifacts");
newSystemArgs.put(ProgramOptionConstants.PLUGIN_ARCHIVE, "artifacts_archive.jar");
return new SimpleProgramOptions(options.getName(), new BasicArguments(newSystemArgs),
options.getUserArguments(), options.isDebug());
}
/**
* Returns a {@link URI} for the logback.xml file to be localized to container and available in the container
* classpath.
*/
@Nullable
private URI getLogBackURI(Program program, File tempDir) throws IOException, URISyntaxException {
URL logbackURL = program.getClassLoader().getResource("logback.xml");
if (logbackURL != null) {
return logbackURL.toURI();
}
URL resource = getClass().getClassLoader().getResource("logback-container.xml");
if (resource == null) {
return null;
}
// Copy the template
File logbackFile = new File(tempDir, "logback.xml");
Files.copy(Resources.newInputStreamSupplier(resource), logbackFile);
return logbackFile.toURI();
}
/**
* Sub-class overrides this method to launch the twill application.
*
* @param program the program to launch
* @param options the options for the program
* @param localizeResources a mutable map for adding extra resources to localize
* @param tempDir a temporary directory for this launch. Sub-classes can use it to create resources for localization
* which require cleanup after launching completed
* @param launcher an {@link ApplicationLauncher} to actually launching the program
*/
protected abstract ProgramController launch(Program program, ProgramOptions options,
Map<String, LocalizeResource> localizeResources,
File tempDir,
ApplicationLauncher launcher);
private File saveHConf(Configuration conf, File file) throws IOException {
try (Writer writer = Files.newWriter(file, Charsets.UTF_8)) {
conf.writeXml(writer);
}
return file;
}
private File saveCConf(CConfiguration conf, File file) throws IOException {
// Unsettting the runtime extension directory as the necessary extension jars should be shipped to the container
// by the distributed ProgramRunner.
CConfiguration copied = CConfiguration.copy(conf);
copied.unset(Constants.AppFabric.RUNTIME_EXT_DIR);
try (Writer writer = Files.newWriter(file, Charsets.UTF_8)) {
copied.writeXml(writer);
}
return file;
}
/**
* Deletes the given directory recursively. Only log if there is {@link IOException}.
*/
private void deleteDirectory(File directory) {
try {
DirUtils.deleteDirectoryContents(directory);
} catch (IOException e) {
LOG.warn("Failed to delete directory {}", directory, e);
}
}
/**
* Adds a listener to the given TwillController to delete local temp files when the program has started/terminated.
* The local temp files could be removed once the program is started, since Twill would keep the files in
* HDFS and no long needs the local temp files once program is started.
*
* @return The same TwillController instance.
*/
private TwillController addCleanupListener(TwillController controller,
final Program program, final File tempDir) {
final AtomicBoolean deleted = new AtomicBoolean(false);
Runnable cleanup = new Runnable() {
public void run() {
if (!deleted.compareAndSet(false, true)) {
return;
}
LOG.debug("Cleanup tmp files for {}: {}", program.getId(), tempDir);
deleteDirectory(tempDir);
}};
controller.onRunning(cleanup, Threads.SAME_THREAD_EXECUTOR);
controller.onTerminated(cleanup, Threads.SAME_THREAD_EXECUTOR);
return controller;
}
private static final class ApplicationLogHandler implements LogHandler {
private final LogHandler delegate;
private final LogEntry.Level logLevel;
private ApplicationLogHandler(LogHandler delegate, LogEntry.Level logLevel) {
this.delegate = delegate;
this.logLevel = logLevel;
}
@Override
public void onLog(LogEntry logEntry) {
if (logEntry.getLogLevel().ordinal() <= logLevel.ordinal()) {
delegate.onLog(logEntry);
}
}
}
}