/*
* Copyright [2013-2015] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.guagua.worker;
import java.lang.reflect.Method;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import ml.shifu.guagua.GuaguaConstants;
import ml.shifu.guagua.GuaguaRuntimeException;
import ml.shifu.guagua.coordinator.zk.GuaguaZooKeeper.Filter;
import ml.shifu.guagua.io.Bytable;
import ml.shifu.guagua.io.BytableWrapper;
import ml.shifu.guagua.io.HaltBytable;
import ml.shifu.guagua.io.NettyBytableDecoder;
import ml.shifu.guagua.io.NettyBytableEncoder;
import ml.shifu.guagua.util.NetworkUtils;
import ml.shifu.guagua.util.NumberFormatUtils;
import ml.shifu.guagua.util.ReflectionUtils;
import org.apache.zookeeper.KeeperException;
import org.jboss.netty.bootstrap.ClientBootstrap;
import org.jboss.netty.channel.Channel;
import org.jboss.netty.channel.ChannelEvent;
import org.jboss.netty.channel.ChannelFuture;
import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ChannelPipeline;
import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.ChannelStateEvent;
import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Worker coordinator coordinates master with Netty client channel.
*
* <p>
* Worker results and iteration info is not stored in to znode like SyncWorkerCoordinator. For a big task with much more
* workers, this can decrease pressure on zookeeper. To leverage Netty, fast worker and master coordination is expected.
*
* <p>
* Master results are still stored into zookeeper for fail-over. Only one master per each job, this shouldn't be a
* burden to zookeeper.
*/
public class NettyWorkerCoordinator<MASTER_RESULT extends Bytable, WORKER_RESULT extends Bytable> extends
AbstractWorkerCoordinator<MASTER_RESULT, WORKER_RESULT> {
private static final Logger LOG = LoggerFactory.getLogger(NettyWorkerCoordinator.class);
private static final long GUAGUA_DEFAULT_WORKER_GETRESULT_TIMEOUT = 60 * 1000L;
private static final String GUAGUA_WORKER_GETRESULT_TIMEOUT = "guagua.worker.getresult.timeout";
/**
* Master server address with format <name:port>.
*/
private String masterServerAddress;
/**
* Netty client instance to communicate with master.
*/
private ClientBootstrap messageClient;
/**
* Client channel used to connect to master server.
*/
private Channel clientChannel;
/**
* If server is shutdown.
*/
private AtomicBoolean isServerShutdownOrClientDisconnect = new AtomicBoolean(false);
/**
* If get master result time out. Set this to a field to make it updated in inner classes.
*/
private boolean isTimeoutToGetCurrentMasterResult = false;
/**
* If master znode is cleaned, we may get exception on calling
* {@link #setMasterResult(WorkerContext, String, String)}.
*/
private boolean isMasterZnodeCleaned = false;
/**
* If get master server address time out. Set this to a field to make it updated in inner classes.
*/
private boolean isTimeoutToGetMasterServerAddress = false;
/**
* Worker coordinator initialization.
*
* <ul>
* <li>1. Initialize Zookeeper instance to connect to zookeeper ensemble;</li>
* <li>2. check fail over to recover from last failure point;</li>
* <li>3. wait for master initialization and get master address from master initialization znode;</li>
* <li>4. Recover last master result from master znode if fail-over task.</li>
* </ul>
*/
@Override
public void preApplication(final WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
// Initialize zookeeper and other props
initialize(context.getProps());
// Fail over check to get last successful iteration.
new FailOverCoordinatorCommand(context).execute();
// Wait for master init and get master server address.
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
final String appMasterNode = getCurrentMasterNode(appId, GuaguaConstants.GUAGUA_INIT_STEP).toString();
// check whether master is ok to start iterations.
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
return getZooKeeper().exists(appMasterNode, false) != null;
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", appMasterNode);
}
return false;
}
}
}.execute();
NettyWorkerCoordinator.this.masterServerAddress = new String(getBytesFromZNode(appMasterNode, null),
Charset.forName("UTF-8"));
}
}.execute();
// Connect to master server
connectMasterServer();
// If not start with iteration 0, it is fail over task, should recover from laster point.
if(!context.isInitIteration()) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
int currentIteration = context.getCurrentIteration();
final String appMasterNode = getCurrentMasterNode(appId, currentIteration).toString();
final String appMasterSplitNode = getCurrentMasterSplitNode(appId, currentIteration).toString();
setMasterResult(context, appMasterNode, appMasterSplitNode);
}
}.execute();
}
}
/**
* Connect master server for message communication.
*/
private void connectMasterServer() {
this.messageClient = new ClientBootstrap(new NioClientSocketChannelFactory(Executors.newSingleThreadExecutor(),
Executors.newSingleThreadExecutor()));
// Set up the pipeline factory.
this.messageClient.setPipelineFactory(new ChannelPipelineFactory() {
public ChannelPipeline getPipeline() throws Exception {
return Channels.pipeline(new NettyBytableEncoder(), new NettyBytableDecoder(), new ClientHandler());
}
});
String[] namePortGroup = this.masterServerAddress.split(":");
String masterServerName = namePortGroup[0];
int masterServerPort = NumberFormatUtils.getInt(namePortGroup[1]);
// Start the connection attempt.
ChannelFuture future = this.messageClient.connect(new InetSocketAddress(masterServerName, masterServerPort));
this.clientChannel = future.awaitUninterruptibly().getChannel();
LOG.info("Connect to {}:{}", masterServerName, masterServerPort);
}
/**
* ClientHandeler used to update progress to RPC server (AppMaster).
*/
private class ClientHandler extends SimpleChannelUpstreamHandler {
@Override
public void handleUpstream(ChannelHandlerContext ctx, ChannelEvent e) throws Exception {
super.handleUpstream(ctx, e);
}
@Override
public void channelConnected(ChannelHandlerContext ctx, ChannelStateEvent e) {
// Send the first message if this handler is a client-side handler.
LOG.info("Channel connected:{}", e.getValue());
}
@Override
public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) {
LOG.info("Receive status:{}", e.getMessage());
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) {
LOG.error("error in client handler", e.getCause());
e.getChannel().close();
Throwable cause = e.getCause();
if(cause != null && cause instanceof GuaguaRuntimeException) {
throw (GuaguaRuntimeException) cause;
} else {
throw new GuaguaRuntimeException(e.getCause());
}
}
@Override
public void channelDisconnected(ChannelHandlerContext ctx, ChannelStateEvent e) {
// channel is disconnected, master server is down or client connection failed.
LOG.info("Master server is down or channel client is disconnected with event {}", e);
NettyWorkerCoordinator.this.isServerShutdownOrClientDisconnect.compareAndSet(false, true);
}
}
@Override
public void preIteration(final WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
if(isServerShutdownOrClientDisconnect.get()) {
final long masterServerRestartTimout = NumberFormatUtils.getLong(
context.getProps().getProperty("guagua.master.server.restart.timeout"), 60 * 1000L);
// get new server address if master server is down or confirm previous server is alive.
while(true) {
this.isTimeoutToGetMasterServerAddress = false;
// Wait for master init and get master server address.
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
final String appMasterNode = getCurrentMasterNode(appId, GuaguaConstants.GUAGUA_INIT_STEP)
.toString();
final long start = System.nanoTime();
// wait for master restart.
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
String newServerAddress = null;
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
if(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) >= masterServerRestartTimout) {
NettyWorkerCoordinator.this.isTimeoutToGetMasterServerAddress = true;
return true;
}
this.newServerAddress = new String(getBytesFromZNode(appMasterNode, null),
Charset.forName("UTF-8"));
boolean isServerChanged = !this.newServerAddress
.equals(NettyWorkerCoordinator.this.masterServerAddress);
if(isServerChanged) {
NettyWorkerCoordinator.this.masterServerAddress = this.newServerAddress;
}
return isServerChanged;
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", appMasterNode);
}
return false;
}
}
}.execute();
}
}.execute();
if(NettyWorkerCoordinator.this.isTimeoutToGetMasterServerAddress) {
String[] namePortGroup = this.masterServerAddress.split(":");
String masterServerName = namePortGroup[0];
int masterServerPort = NumberFormatUtils.getInt(namePortGroup[1]);
try {
if(NetworkUtils.isServerAlive(InetAddress.getByName(masterServerName), masterServerPort)) {
break;
} else {
continue;
}
} catch (UnknownHostException e) {
throw new GuaguaRuntimeException(e);
}
} else {
break;
}
}
// connect to new master server.
connectMasterServer();
// if server is shutdown, master is down, fail over from last master step
new FailOverCoordinatorCommand(context).execute();
// reset master result to current iteration if master is down.
if(!context.isInitIteration()) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
int lastIteration = context.getCurrentIteration();
final String appMasterNode = getCurrentMasterNode(appId, lastIteration).toString();
final String appMasterSplitNode = getCurrentMasterSplitNode(appId, lastIteration).toString();
setMasterResult(context, appMasterNode, appMasterSplitNode);
}
}.execute();
}
// current iteration is last master successful iteration + 1
context.setCurrentIteration(context.getCurrentIteration() + 1);
// reset server shut down to false;
isServerShutdownOrClientDisconnect.compareAndSet(true, false);
}
LOG.info("Start itertion {} with container id {} and app id {}.", context.getCurrentIteration(),
context.getContainerId(), context.getAppId());
}
/**
* Send worker results to master; wait for current master stop; get current master result.
*/
@Override
public void postIteration(final WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
final long timeOutThreshold = NumberFormatUtils.getLong(
context.getProps().getProperty(GUAGUA_WORKER_GETRESULT_TIMEOUT),
GUAGUA_DEFAULT_WORKER_GETRESULT_TIMEOUT);
while(true) {
this.isTimeoutToGetCurrentMasterResult = false;
this.isMasterZnodeCleaned = false;
// check current iteration from zookeeper latest
int latestIteraton = getLatestMasterIteration(context);
if(context.getCurrentIteration() == latestIteraton + 1
&& context.getCurrentIteration() <= context.getTotalIteration()) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
int currentIteration = context.getCurrentIteration();
final String appMasterNode = getCurrentMasterNode(appId, currentIteration).toString();
// send message
// TODO do we need to send several times.
BytableWrapper workerMessage = new BytableWrapper();
workerMessage.setBytes(NettyWorkerCoordinator.this.getWorkerSerializer().objectToBytes(
context.getWorkerResult()));
workerMessage.setCurrentIteration(context.getCurrentIteration());
workerMessage.setContainerId(context.getContainerId());
workerMessage.setStopMessage(false);
LOG.debug("Message:{}", workerMessage);
NettyWorkerCoordinator.this.clientChannel.write(workerMessage);
final long start = System.nanoTime();
// wait for master computation stop
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
if(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) >= timeOutThreshold) {
NettyWorkerCoordinator.this.isTimeoutToGetCurrentMasterResult = true;
return true;
}
return getZooKeeper().exists(appMasterNode, false) != null
|| NettyWorkerCoordinator.this.isServerShutdownOrClientDisconnect.get();
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", appMasterNode);
}
return false;
}
}
}.execute();
if(!NettyWorkerCoordinator.this.isTimeoutToGetCurrentMasterResult) {
LOG.info("Application {} container {} iteration {} waiting ends with {}ms execution time.",
context.getAppId(), context.getContainerId(), context.getCurrentIteration(),
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
// set master result for next iteration.
if(!NettyWorkerCoordinator.this.isServerShutdownOrClientDisconnect.get()) {
String appMasterSplitNode = getCurrentMasterSplitNode(appId, currentIteration)
.toString();
try {
setMasterResult(context, appMasterNode, appMasterSplitNode);
} catch (KeeperException.NoNodeException e) {
// this exception may happen after checking znode existing, cleaned by master znode
NettyWorkerCoordinator.this.isMasterZnodeCleaned = true;
LOG.warn("No such node:{}", appMasterNode);
}
LOG.info("Master computation is done.");
}
}
}
}.execute();
if(NettyWorkerCoordinator.this.isTimeoutToGetCurrentMasterResult
|| NettyWorkerCoordinator.this.isMasterZnodeCleaned) {
// if time out to get master result, continue and retry the whole postIteration logic (while(true)).
continue;
} else {
// break while loop
break;
}
} else {
// current iteration is last master successful iteration
LOG.info("Application {} container {}, current iteration is switched to {}.", context.getAppId(),
context.getContainerId(), latestIteraton);
context.setCurrentIteration(latestIteraton);
// set master result
if(!context.isInitIteration()) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
String appId = context.getAppId();
int lastIteration = context.getCurrentIteration();
final String appMasterNode = getCurrentMasterNode(appId, lastIteration).toString();
final String appMasterSplitNode = getCurrentMasterSplitNode(appId, lastIteration)
.toString();
try {
setMasterResult(context, appMasterNode, appMasterSplitNode);
} catch (KeeperException.NoNodeException e) {
// this exception may happen after checking znode existing, cleaned by master znode
NettyWorkerCoordinator.this.isMasterZnodeCleaned = true;
LOG.warn("No such node:{}", appMasterNode);
}
}
}.execute();
}
// break while loop or if master znode is already cleaned
if(NettyWorkerCoordinator.this.isMasterZnodeCleaned) {
continue;
} else {
break;
}
}
}
}
/**
* Send stop message to master and then clean resources.
*/
@Override
public void postApplication(final WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws Exception, InterruptedException {
try {
// send stop message to server
MASTER_RESULT masterResult = context.getLastMasterResult();
if((context.getCurrentIteration() == context.getTotalIteration() + 1)
|| ((masterResult instanceof HaltBytable) && ((HaltBytable) masterResult).isHalt())) {
// only send stop message if it is last iteration or isHalt is true, if exception in iteration,
// guagua will stop here to call postApplication
BytableWrapper stopMessage = new BytableWrapper();
stopMessage.setCurrentIteration(context.getCurrentIteration());
stopMessage.setContainerId(context.getContainerId());
stopMessage.setStopMessage(true);
ChannelFuture future = NettyWorkerCoordinator.this.clientChannel.write(stopMessage);
future.await(30, TimeUnit.SECONDS);
// wait 2s to send stop message out.
Thread.sleep(2 * 1000L);
}
} finally {
NettyWorkerCoordinator.this.clientChannel.close();
Method shutDownMethod = ReflectionUtils.getMethod(
NettyWorkerCoordinator.this.messageClient.getClass(), "shutdown");
if(shutDownMethod != null) {
shutDownMethod.invoke(NettyWorkerCoordinator.this.messageClient, (Object[]) null);
}
NettyWorkerCoordinator.this.messageClient.releaseExternalResources();
close();
}
}
}.execute();
}
private int getLatestMasterIteration(final WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
try {
String masterBaseNode = getMasterBaseNode(context.getAppId()).toString();
List<String> masterIterations = null;
try {
masterIterations = getZooKeeper().getChildrenExt(masterBaseNode, false, false, false, new Filter() {
@Override
public boolean filter(String path) {
try {
Integer.parseInt(path);
return false;
} catch (Exception e) {
return true;
}
}
});
} catch (KeeperException.NoNodeException e) {
LOG.warn("No such node:{}", masterBaseNode);
}
if(masterIterations != null && masterIterations.size() > 0) {
Collections.sort(masterIterations, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return Integer.valueOf(o1).compareTo(Integer.valueOf(o2));
}
});
LOG.debug("DEBUG: master children:{}", masterIterations);
try {
return Integer.valueOf(masterIterations.get(masterIterations.size() - 1));
} catch (NumberFormatException e) {
throw new GuaguaRuntimeException(e);
}
}
} catch (InterruptedException e) {
// transfer interrupt state to caller thread.
Thread.currentThread().interrupt();
} catch (Exception e) {
throw new GuaguaRuntimeException(e);
}
throw new GuaguaRuntimeException("Cannot get valid latest master iteration.");
}
}