/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.app.runtime.spark;
import co.cask.cdap.api.Admin;
import co.cask.cdap.api.RuntimeContext;
import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.metrics.Metrics;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.api.metrics.MetricsContext;
import co.cask.cdap.api.plugin.PluginContext;
import co.cask.cdap.api.plugin.PluginProperties;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.metrics.ProgramUserMetrics;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.services.AbstractServiceDiscoverer;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.logging.LoggingContext;
import co.cask.cdap.data.dataset.SystemDatasetInstantiator;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.dataset2.DynamicDatasetCache;
import co.cask.cdap.data2.dataset2.MultiThreadDatasetCache;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.internal.app.program.ProgramTypeMetricTag;
import co.cask.cdap.internal.app.runtime.DefaultAdmin;
import co.cask.cdap.internal.app.runtime.DefaultPluginContext;
import co.cask.cdap.internal.app.runtime.ProgramRunners;
import co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator;
import co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo;
import co.cask.cdap.logging.context.SparkLoggingContext;
import co.cask.cdap.logging.context.WorkflowProgramLoggingContext;
import co.cask.cdap.proto.ProgramType;
import co.cask.cdap.proto.id.Ids;
import co.cask.cdap.proto.id.ProgramId;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.twill.api.RunId;
import org.apache.twill.discovery.DiscoveryServiceClient;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Context to be used at Spark runtime to provide common functionality that are needed at both the driver and
* the executors.
*/
public final class SparkRuntimeContext extends AbstractServiceDiscoverer
implements RuntimeContext, Metrics, PluginContext, Closeable {
private final Configuration hConf;
private final Program program;
private final RunId runId;
private final Map<String, String> runtimeArguments;
private final long logicalStartTime;
private final TransactionSystemClient txClient;
private final MultiThreadDatasetCache datasetCache;
private final DiscoveryServiceClient discoveryServiceClient;
private final MetricsContext metricsContext;
private final Metrics userMetrics;
private final StreamAdmin streamAdmin;
private final WorkflowProgramInfo workflowProgramInfo;
private final PluginInstantiator pluginInstantiator;
private final PluginContext pluginContext;
private final Admin admin;
private final LoggingContext loggingContext;
SparkRuntimeContext(Configuration hConf, Program program, RunId runId, Map<String, String> runtimeArguments,
TransactionSystemClient txClient,
DatasetFramework datasetFramework,
DiscoveryServiceClient discoveryServiceClient,
MetricsCollectionService metricsCollectionService,
StreamAdmin streamAdmin,
@Nullable WorkflowProgramInfo workflowProgramInfo,
@Nullable PluginInstantiator pluginInstantiator) {
super(program.getId().toEntityId());
this.hConf = hConf;
this.program = program;
this.runId = runId;
Map<String, String> args = new HashMap<>(runtimeArguments);
this.logicalStartTime = ProgramRunners.updateLogicalStartTime(args);
this.runtimeArguments = Collections.unmodifiableMap(args);
this.txClient = txClient;
ProgramId programId = program.getId().toEntityId();
this.metricsContext = createMetricsContext(metricsCollectionService, programId, runId, workflowProgramInfo);
this.datasetCache = new MultiThreadDatasetCache(
new SystemDatasetInstantiator(datasetFramework, program.getClassLoader(),
Collections.singleton(programId.toId())),
txClient, programId.getNamespaceId(), runtimeArguments, metricsContext, null);
this.discoveryServiceClient = discoveryServiceClient;
this.userMetrics = new ProgramUserMetrics(metricsContext);
this.streamAdmin = streamAdmin;
this.workflowProgramInfo = workflowProgramInfo;
this.pluginInstantiator = pluginInstantiator;
this.pluginContext = new DefaultPluginContext(pluginInstantiator, programId,
program.getApplicationSpecification().getPlugins());
this.admin = new DefaultAdmin(datasetFramework, programId.getNamespaceId());
this.loggingContext = createLoggingContext(programId, runId, workflowProgramInfo);
}
private LoggingContext createLoggingContext(ProgramId programId, RunId runId,
@Nullable WorkflowProgramInfo workflowProgramInfo) {
if (workflowProgramInfo == null) {
return new SparkLoggingContext(programId.getNamespace(), programId.getApplication(), programId.getProgram(),
runId.getId());
}
ProgramId workflowProramId = Ids.namespace(programId.getNamespace()).app(programId.getApplication())
.workflow(workflowProgramInfo.getName());
return new WorkflowProgramLoggingContext(workflowProramId.getNamespace(), workflowProramId.getApplication(),
workflowProramId.getProgram(), workflowProgramInfo.getRunId().getId(),
ProgramType.SPARK, programId.getProgram());
}
@Override
public ApplicationSpecification getApplicationSpecification() {
return program.getApplicationSpecification();
}
@Override
public Map<String, String> getRuntimeArguments() {
return runtimeArguments;
}
@Override
public String getNamespace() {
return program.getNamespaceId();
}
@Override
public RunId getRunId() {
return runId;
}
@Override
public Admin getAdmin() {
return admin;
}
@Override
public void count(String metricName, int delta) {
userMetrics.count(metricName, delta);
}
@Override
public void gauge(String metricName, long value) {
userMetrics.gauge(metricName, value);
}
@Override
public PluginProperties getPluginProperties(String pluginId) {
return pluginContext.getPluginProperties(pluginId);
}
@Override
public <T> Class<T> loadPluginClass(String pluginId) {
return pluginContext.loadPluginClass(pluginId);
}
@Override
public <T> T newPluginInstance(String pluginId) throws InstantiationException {
return pluginContext.newPluginInstance(pluginId);
}
@Override
protected DiscoveryServiceClient getDiscoveryServiceClient() {
return discoveryServiceClient;
}
/**
* Returns the {@link SparkSpecification} of the spark program of this context.
*/
public SparkSpecification getSparkSpecification() {
SparkSpecification spec = getApplicationSpecification().getSpark().get(getProgram().getName());
// Spec shouldn't be null, otherwise the spark program won't even get started
Preconditions.checkState(spec != null, "SparkSpecification not found for %s", getProgram().getId());
return spec;
}
/**
* Returns the {@link Program} of this context.
*/
public Program getProgram() {
return program;
}
/**
* Returns the {@link WorkflowProgramInfo} if the spark program is running inside a workflow.
*/
@Nullable
public WorkflowProgramInfo getWorkflowInfo() {
return workflowProgramInfo;
}
/**
* Returns the logical start of this run.
*/
long getLogicalStartTime() {
return logicalStartTime;
}
/**
* Returns the {@link TransactionSystemClient} for this execution.
*/
TransactionSystemClient getTransactionSystemClient() {
return txClient;
}
/**
* Returns the {@link Configuration} used for the execution.
*/
Configuration getConfiguration() {
return hConf;
}
/**
* Returns the {@link DynamicDatasetCache} to be used throughout the execution.
*/
DynamicDatasetCache getDatasetCache() {
return datasetCache;
}
/**
* Returns the {@link PluginInstantiator} if plugin is used.
*/
@Nullable
PluginInstantiator getPluginInstantiator() {
return pluginInstantiator;
}
/**
* Returns the {@link LoggingContext} representing the program.
*/
LoggingContext getLoggingContext() {
return loggingContext;
}
/**
* Returns the {@link MetricsContext} for the program. It can be used to emit either user or system metrics.
*/
MetricsContext getMetricsContext() {
return metricsContext;
}
/**
* Returns the {@link StreamAdmin} used for this execution.
*/
StreamAdmin getStreamAdmin() {
return streamAdmin;
}
/**
* Creates a {@link MetricsContext} to be used for the Spark execution.
*/
private static MetricsContext createMetricsContext(MetricsCollectionService service,
ProgramId programId, RunId runId,
@Nullable WorkflowProgramInfo workflowProgramInfo) {
Map<String, String> tags = Maps.newHashMap();
tags.put(Constants.Metrics.Tag.NAMESPACE, programId.getNamespace());
tags.put(Constants.Metrics.Tag.APP, programId.getApplication());
tags.put(ProgramTypeMetricTag.getTagName(ProgramType.SPARK), programId.getProgram());
tags.put(Constants.Metrics.Tag.RUN_ID, runId.getId());
// todo: use proper spark instance id. For now we have to emit smth for test framework's waitFor metric to work
tags.put(Constants.Metrics.Tag.INSTANCE_ID, "0");
if (workflowProgramInfo != null) {
// If running inside Workflow, add the WorkflowMetricsContext as well
tags.put(Constants.Metrics.Tag.WORKFLOW, workflowProgramInfo.getName());
tags.put(Constants.Metrics.Tag.WORKFLOW_RUN_ID, workflowProgramInfo.getRunId().getId());
tags.put(Constants.Metrics.Tag.NODE, workflowProgramInfo.getNodeId());
}
return service.getContext(tags);
}
@Override
public void close() throws IOException {
datasetCache.close();
}
@Override
public String toString() {
return Objects.toStringHelper(SparkRuntimeContext.class)
.add("id", getProgram().getId())
.add("runId", getRunId())
.toString();
}
}