/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.environment;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.client.program.ClusterClient;
import org.apache.flink.client.program.JobWithJars;
import org.apache.flink.client.program.ProgramInvocationException;
import org.apache.flink.client.program.StandaloneClusterClient;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.graph.StreamGraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A {@link StreamExecutionEnvironment} for executing on a cluster.
*/
@Public
public class RemoteStreamEnvironment extends StreamExecutionEnvironment {
private static final Logger LOG = LoggerFactory.getLogger(RemoteStreamEnvironment.class);
/** The hostname of the JobManager. */
private final String host;
/** The port of the JobManager main actor system. */
private final int port;
/** The configuration used to parametrize the client that connects to the remote cluster. */
private final Configuration clientConfiguration;
/** The jar files that need to be attached to each job. */
private final List<URL> jarFiles;
/** The classpaths that need to be attached to each job. */
private final List<URL> globalClasspaths;
/**
* Creates a new RemoteStreamEnvironment that points to the master
* (JobManager) described by the given host name and port.
*
* @param host
* The host name or address of the master (JobManager), where the
* program should be executed.
* @param port
* The port of the master (JobManager), where the program should
* be executed.
* @param jarFiles
* The JAR files with code that needs to be shipped to the
* cluster. If the program uses user-defined functions,
* user-defined input formats, or any libraries, those must be
* provided in the JAR files.
*/
public RemoteStreamEnvironment(String host, int port, String... jarFiles) {
this(host, port, null, jarFiles);
}
/**
* Creates a new RemoteStreamEnvironment that points to the master
* (JobManager) described by the given host name and port.
*
* @param host
* The host name or address of the master (JobManager), where the
* program should be executed.
* @param port
* The port of the master (JobManager), where the program should
* be executed.
* @param clientConfiguration
* The configuration used to parametrize the client that connects to the
* remote cluster.
* @param jarFiles
* The JAR files with code that needs to be shipped to the
* cluster. If the program uses user-defined functions,
* user-defined input formats, or any libraries, those must be
* provided in the JAR files.
*/
public RemoteStreamEnvironment(String host, int port, Configuration clientConfiguration, String... jarFiles) {
this(host, port, clientConfiguration, jarFiles, null);
}
/**
* Creates a new RemoteStreamEnvironment that points to the master
* (JobManager) described by the given host name and port.
*
* @param host
* The host name or address of the master (JobManager), where the
* program should be executed.
* @param port
* The port of the master (JobManager), where the program should
* be executed.
* @param clientConfiguration
* The configuration used to parametrize the client that connects to the
* remote cluster.
* @param jarFiles
* The JAR files with code that needs to be shipped to the
* cluster. If the program uses user-defined functions,
* user-defined input formats, or any libraries, those must be
* provided in the JAR files.
* @param globalClasspaths
* The paths of directories and JAR files that are added to each user code
* classloader on all nodes in the cluster. Note that the paths must specify a
* protocol (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share).
* The protocol must be supported by the {@link java.net.URLClassLoader}.
*/
public RemoteStreamEnvironment(String host, int port, Configuration clientConfiguration, String[] jarFiles, URL[] globalClasspaths) {
if (!ExecutionEnvironment.areExplicitEnvironmentsAllowed()) {
throw new InvalidProgramException(
"The RemoteEnvironment cannot be used when submitting a program through a client, " +
"or running in a TestEnvironment context.");
}
if (host == null) {
throw new NullPointerException("Host must not be null.");
}
if (port < 1 || port >= 0xffff) {
throw new IllegalArgumentException("Port out of range");
}
this.host = host;
this.port = port;
this.clientConfiguration = clientConfiguration == null ? new Configuration() : clientConfiguration;
this.jarFiles = new ArrayList<>(jarFiles.length);
for (String jarFile : jarFiles) {
try {
URL jarFileUrl = new File(jarFile).getAbsoluteFile().toURI().toURL();
this.jarFiles.add(jarFileUrl);
JobWithJars.checkJarFile(jarFileUrl);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("JAR file path is invalid '" + jarFile + "'", e);
} catch (IOException e) {
throw new RuntimeException("Problem with jar file " + jarFile, e);
}
}
if (globalClasspaths == null) {
this.globalClasspaths = Collections.emptyList();
}
else {
this.globalClasspaths = Arrays.asList(globalClasspaths);
}
}
@Override
public JobExecutionResult execute(String jobName) throws ProgramInvocationException {
StreamGraph streamGraph = getStreamGraph();
streamGraph.setJobName(jobName);
transformations.clear();
return executeRemotely(streamGraph, jarFiles);
}
/**
* Executes the remote job.
*
* @param streamGraph
* Stream Graph to execute
* @param jarFiles
* List of jar file URLs to ship to the cluster
* @return The result of the job execution, containing elapsed time and accumulators.
*/
protected JobExecutionResult executeRemotely(StreamGraph streamGraph, List<URL> jarFiles) throws ProgramInvocationException {
if (LOG.isInfoEnabled()) {
LOG.info("Running remotely at {}:{}", host, port);
}
ClassLoader usercodeClassLoader = JobWithJars.buildUserCodeClassLoader(jarFiles, globalClasspaths,
getClass().getClassLoader());
Configuration configuration = new Configuration();
configuration.addAll(this.clientConfiguration);
configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);
ClusterClient client;
try {
client = new StandaloneClusterClient(configuration);
client.setPrintStatusDuringExecution(getConfig().isSysoutLoggingEnabled());
}
catch (Exception e) {
throw new ProgramInvocationException("Cannot establish connection to JobManager: " + e.getMessage(), e);
}
try {
return client.run(streamGraph, jarFiles, globalClasspaths, usercodeClassLoader).getJobExecutionResult();
}
catch (ProgramInvocationException e) {
throw e;
}
catch (Exception e) {
String term = e.getMessage() == null ? "." : (": " + e.getMessage());
throw new ProgramInvocationException("The program execution failed" + term, e);
}
finally {
try {
client.shutdown();
} catch (Exception e) {
LOG.warn("Could not properly shut down the cluster client.", e);
}
}
}
@Override
public String toString() {
return "Remote Environment (" + this.host + ":" + this.port + " - parallelism = "
+ (getParallelism() == -1 ? "default" : getParallelism()) + ")";
}
/**
* Gets the hostname of the master (JobManager), where the
* program will be executed.
*
* @return The hostname of the master
*/
public String getHost() {
return host;
}
/**
* Gets the port of the master (JobManager), where the
* program will be executed.
*
* @return The port of the master
*/
public int getPort() {
return port;
}
public Configuration getClientConfiguration() {
return clientConfiguration;
}
}