/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.nephele.client; import java.io.IOException; import java.io.PrintStream; import java.net.InetSocketAddress; import java.util.Iterator; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.stratosphere.api.common.JobExecutionResult; import eu.stratosphere.api.common.accumulators.AccumulatorHelper; import eu.stratosphere.configuration.ConfigConstants; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.nephele.event.job.AbstractEvent; import eu.stratosphere.nephele.event.job.JobEvent; import eu.stratosphere.nephele.ipc.RPC; import eu.stratosphere.nephele.jobgraph.JobGraph; import eu.stratosphere.nephele.jobgraph.JobStatus; import eu.stratosphere.nephele.net.NetUtils; import eu.stratosphere.nephele.protocols.AccumulatorProtocol; import eu.stratosphere.nephele.protocols.JobManagementProtocol; import eu.stratosphere.nephele.services.accumulators.AccumulatorEvent; import eu.stratosphere.nephele.types.IntegerRecord; import eu.stratosphere.util.StringUtils; /** * The job client is able to submit, control, and abort jobs. * <p> * This class is thread-safe. */ public class JobClient { /** * The logging object used for debugging. */ private static final Log LOG = LogFactory.getLog(JobClient.class); /** * The job management server stub. */ private final JobManagementProtocol jobSubmitClient; /** * The accumulator protocol stub to request accumulators from JobManager */ private AccumulatorProtocol accumulatorProtocolProxy; /** * The job graph assigned with this job client. */ private final JobGraph jobGraph; /** * The configuration assigned with this job client. */ private final Configuration configuration; /** * The shutdown hook which is executed if the user interrupts the job the job execution. */ private final JobCleanUp jobCleanUp; /** * The sequence number of the last processed event received from the job manager. */ private long lastProcessedEventSequenceNumber = -1; private PrintStream console; /** * Inner class used to perform clean up tasks when the * job client is terminated. */ public static class JobCleanUp extends Thread { /** * Stores a reference to the {@link JobClient} object this clean up object has been created for. */ private final JobClient jobClient; /** * Constructs a new clean up object which is used to perform clean up tasks * when the job client is terminated. * * @param jobClient * the job client this clean up object belongs to */ public JobCleanUp(final JobClient jobClient) { this.jobClient = jobClient; } @Override public void run() { // Close the RPC object this.jobClient.close(); } } /** * Constructs a new job client object and instantiates a local * RPC proxy for the {@link JobSubmissionProtocol}. * * @param jobGraph * the job graph to run * @throws IOException * thrown on error while initializing the RPC connection to the job manager */ public JobClient(final JobGraph jobGraph) throws IOException { this(jobGraph, new Configuration()); } /** * Constructs a new job client object and instantiates a local * RPC proxy for the {@link JobSubmissionProtocol}. * * @param jobGraph * the job graph to run * @param configuration * configuration object which can include special configuration settings for the job client * @throws IOException * thrown on error while initializing the RPC connection to the job manager */ public JobClient(final JobGraph jobGraph, final Configuration configuration) throws IOException { final String address = configuration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null); final int port = configuration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT); final InetSocketAddress inetaddr = new InetSocketAddress(address, port); this.jobSubmitClient = RPC.getProxy(JobManagementProtocol.class, inetaddr, NetUtils.getSocketFactory()); this.accumulatorProtocolProxy = RPC.getProxy(AccumulatorProtocol.class, inetaddr, NetUtils.getSocketFactory()); this.jobGraph = jobGraph; this.configuration = configuration; this.jobCleanUp = new JobCleanUp(this); } /** * Constructs a new job client object and instantiates a local * RPC proxy for the {@link JobSubmissionProtocol}. * * @param jobGraph * the job graph to run * @param configuration * configuration object which can include special configuration settings for the job client * @param jobManagerAddress * IP/Port of the jobmanager (not taken from provided configuration object). * @throws IOException * thrown on error while initializing the RPC connection to the job manager */ public JobClient(final JobGraph jobGraph, final Configuration configuration, final InetSocketAddress jobManagerAddress) throws IOException { this.jobSubmitClient = RPC.getProxy(JobManagementProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); this.jobGraph = jobGraph; this.configuration = configuration; this.jobCleanUp = new JobCleanUp(this); } /** * Closes the <code>JobClient</code> by destroying the RPC stub object. */ public void close() { synchronized (this.jobSubmitClient) { RPC.stopProxy(this.jobSubmitClient); } synchronized (this.accumulatorProtocolProxy) { RPC.stopProxy(this.accumulatorProtocolProxy); } } /** * Returns the {@link Configuration} object which can include special configuration settings for the job client. * * @return the {@link Configuration} object which can include special configuration settings for the job client */ public Configuration getConfiguration() { return this.configuration; } /** * Submits the job assigned to this job client to the job manager. * * @return a <code>JobSubmissionResult</code> object encapsulating the results of the job submission * @throws IOException * thrown in case of submission errors while transmitting the data to the job manager */ public JobSubmissionResult submitJob() throws IOException { synchronized (this.jobSubmitClient) { return this.jobSubmitClient.submitJob(this.jobGraph); } } /** * Cancels the job assigned to this job client. * * @return a <code>JobCancelResult</code> object encapsulating the result of the job cancel request * @throws IOException * thrown if an error occurred while transmitting the request to the job manager */ public JobCancelResult cancelJob() throws IOException { synchronized (this.jobSubmitClient) { return this.jobSubmitClient.cancelJob(this.jobGraph.getJobID()); } } /** * Retrieves the current status of the job assigned to this job client. * * @return a <code>JobProgressResult</code> object including the current job progress * @throws IOException * thrown if an error occurred while transmitting the request */ public JobProgressResult getJobProgress() throws IOException { synchronized (this.jobSubmitClient) { return this.jobSubmitClient.getJobProgress(this.jobGraph.getJobID()); } } /** * Submits the job assigned to this job client to the job manager and queries the job manager * about the progress of the job until it is either finished or aborted. * * @return the duration of the job execution in milliseconds * @throws IOException * thrown if an error occurred while transmitting the request * @throws JobExecutionException * thrown if the job has been aborted either by the user or as a result of an error */ public JobExecutionResult submitJobAndWait() throws IOException, JobExecutionException { synchronized (this.jobSubmitClient) { final JobSubmissionResult submissionResult = this.jobSubmitClient.submitJob(this.jobGraph); if (submissionResult.getReturnCode() == AbstractJobResult.ReturnCode.ERROR) { LOG.error("ERROR: " + submissionResult.getDescription()); throw new JobExecutionException(submissionResult.getDescription(), false); } // Make sure the job is properly terminated when the user shut's down the client Runtime.getRuntime().addShutdownHook(this.jobCleanUp); } long sleep = 0; try { final IntegerRecord interval = this.jobSubmitClient.getRecommendedPollingInterval(); sleep = interval.getValue() * 1000; } catch (IOException ioe) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); // Rethrow error throw ioe; } try { Thread.sleep(sleep / 2); } catch (InterruptedException e) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); logErrorAndRethrow(StringUtils.stringifyException(e)); } long startTimestamp = -1; while (true) { if (Thread.interrupted()) { logErrorAndRethrow("Job client has been interrupted"); } JobProgressResult jobProgressResult = null; try { jobProgressResult = getJobProgress(); } catch (IOException ioe) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); // Rethrow error throw ioe; } if (jobProgressResult == null) { logErrorAndRethrow("Returned job progress is unexpectedly null!"); } if (jobProgressResult.getReturnCode() == AbstractJobResult.ReturnCode.ERROR) { logErrorAndRethrow("Could not retrieve job progress: " + jobProgressResult.getDescription()); } final Iterator<AbstractEvent> it = jobProgressResult.getEvents(); while (it.hasNext()) { final AbstractEvent event = it.next(); // Did we already process that event? if (this.lastProcessedEventSequenceNumber >= event.getSequenceNumber()) { continue; } LOG.info(event.toString()); if (this.console != null) { this.console.println(event.toString()); } this.lastProcessedEventSequenceNumber = event.getSequenceNumber(); // Check if we can exit the loop if (event instanceof JobEvent) { final JobEvent jobEvent = (JobEvent) event; final JobStatus jobStatus = jobEvent.getCurrentJobStatus(); if (jobStatus == JobStatus.SCHEDULED) { startTimestamp = jobEvent.getTimestamp(); } if (jobStatus == JobStatus.FINISHED) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); final long jobDuration = jobEvent.getTimestamp() - startTimestamp; // Request accumulators Map<String, Object> accumulators = null; try { accumulators = AccumulatorHelper.toResultMap(getAccumulators().getAccumulators()); } catch (IOException ioe) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); throw ioe; // Rethrow error } return new JobExecutionResult(jobDuration, accumulators); } else if (jobStatus == JobStatus.CANCELED || jobStatus == JobStatus.FAILED) { Runtime.getRuntime().removeShutdownHook(this.jobCleanUp); LOG.info(jobEvent.getOptionalMessage()); if (jobStatus == JobStatus.CANCELED) { throw new JobExecutionException(jobEvent.getOptionalMessage(), true); } else { throw new JobExecutionException(jobEvent.getOptionalMessage(), false); } } } } try { Thread.sleep(sleep); } catch (InterruptedException e) { logErrorAndRethrow(StringUtils.stringifyException(e)); } } } /** * Returns the recommended interval in seconds in which a client * is supposed to poll for progress information. * * @return the interval in seconds * @throws IOException * thrown if an error occurred while transmitting the request */ public int getRecommendedPollingInterval() throws IOException { synchronized (this.jobSubmitClient) { return this.jobSubmitClient.getRecommendedPollingInterval().getValue(); } } /** * Writes the given error message to the log and throws it in an {@link IOException}. * * @param errorMessage * the error message to write to the log * @throws IOException * thrown after the error message is written to the log */ private void logErrorAndRethrow(final String errorMessage) throws IOException { LOG.error(errorMessage); throw new IOException(errorMessage); } public void setConsoleStreamForReporting(PrintStream stream) { this.console = stream; } private AccumulatorEvent getAccumulators() throws IOException { synchronized (this.jobSubmitClient) { return this.accumulatorProtocolProxy.getAccumulatorResults(this.jobGraph.getJobID()); } } }