/*
* Copyright [2013-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.guagua.master;
import java.util.List;
import java.util.concurrent.TimeUnit;
import ml.shifu.guagua.GuaguaConstants;
import ml.shifu.guagua.io.Bytable;
import ml.shifu.guagua.util.NumberFormatUtils;
import ml.shifu.guagua.util.ProgressLock;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooDefs.Ids;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link AsyncMasterCoordinator} is used to as a barrier for each iteration.
*
* <p>
* For each iteration, {@link AsyncMasterCoordinator} will wait until all workers are done.
*
* <p>
* To start a new iteration, {@link AsyncMasterCoordinator} will write a znode for each iteration like
* '/_guagua/job_201312041304_189025/master/{currentIteration}' with with {@link MasterComputable} result as its data.
* This is like a signal to notify workers.
*
* <p>
* Workers are waiting on current master znode, if got current master znode, it will start another iteration.
*
* @param <MASTER_RESULT>
* master result for computation in each iteration.
* @param <WORKER_RESULT>
* worker result for computation in each iteration.
*/
public class AsyncMasterCoordinator<MASTER_RESULT extends Bytable, WORKER_RESULT extends Bytable> extends
AbstractMasterCoordinator<MASTER_RESULT, WORKER_RESULT> {
private static final Logger LOG = LoggerFactory.getLogger(AsyncMasterCoordinator.class);
/**
* Current iteration
*/
private int currentIteration;
/**
* Current app id.
*/
private String appId;
/**
* Lock is used to check register info from all workers.
*/
protected ProgressLock workerInitLock = new ProgressLock();
/**
* Lock is used to check iteration info from all workers.
*/
protected ProgressLock workerIterationLock = new ProgressLock();
@Override
public void process(WatchedEvent event) {
LOG.debug("DEBUG: process: Got a new event, path = {}, type = {}, state = {}", event.getPath(),
event.getType(), event.getState());
if((event.getPath() == null) && (event.getType() == EventType.None)) {
if(event.getState() == KeeperState.SyncConnected) {
LOG.info("process: Asynchronous connection complete.");
super.getZkConnLatch().countDown();
} else {
LOG.warn("process: Got unknown null path event " + event);
}
return;
}
/**
* Check lock signal condition.
*/
String appWorkerBaseNode = getWorkerBaseNode(getAppId(), getCurrentIteration()).toString();
if(event.getPath().equals(appWorkerBaseNode) && (event.getType() == EventType.NodeChildrenChanged)) {
if(getCurrentIteration() == 0) {
this.workerInitLock.signal();
} else {
this.workerIterationLock.signal();
}
}
}
public int getCurrentIteration() {
return currentIteration;
}
public void setCurrentIteration(int currentIteration) {
this.currentIteration = currentIteration;
}
public String getAppId() {
return appId;
}
public void setAppId(String appId) {
this.appId = appId;
}
@Override
public void preApplication(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
initialize(context.getProps());
this.setAppId(context.getAppId());
// Master election which is used here to use the same zookeeper instance.
if(NumberFormatUtils.getInt(context.getProps().getProperty(GuaguaConstants.GUAGUA_MASTER_NUMBER),
GuaguaConstants.DEFAULT_MASTER_NUMBER) > 1) {
new MasterElectionCommand(context.getAppId()).execute();
}
// Check last successful iteration
new FailOverCommand(context).execute();
if(context.getCurrentIteration() != GuaguaConstants.GUAGUA_INIT_STEP) {
// if not init step, return, because of no need initialize twice for fail-over task
return;
}
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
final String appWorkersNode = getWorkerBaseNode(context.getAppId(), context.getCurrentIteration())
.toString();
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
// to avoid re-watching
List<String> children = getZooKeeper().getChildrenExt(appWorkersNode, false, false, false);
int size = children == null ? 0 : children.size();
if(isTerminated(size, context.getWorkers(), context.getMinWorkersRatio(),
context.getMinWorkersTimeOut())) {
return true;
}
children = getZooKeeper().getChildrenExt(appWorkersNode, true, false, false);
size = children == null ? 0 : children.size();
if(isTerminated(size, context.getWorkers(), context.getMinWorkersRatio(),
context.getMinWorkersTimeOut())) {
return true;
}
// to avoid log flood
if(System.nanoTime() % 20 == 0) {
LOG.info("workers already initialized: {}, still {} workers are not synced.", size,
(context.getWorkers() - size));
}
AsyncMasterCoordinator.this.workerInitLock.waitForever();
AsyncMasterCoordinator.this.workerInitLock.reset();
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", appWorkersNode);
}
}
return false;
}
}.execute();
LOG.info("All workers are initiliazed successfully.");
String znode = null;
try {
// create worker znode 1: '/_guagua/<jobId>/workers/1' to avoid re-create znode from workers
znode = getWorkerBaseNode(context.getAppId(), context.getCurrentIteration() + 1).toString();
getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false);
// create master init znode
znode = getMasterBaseNode(context.getAppId()).toString();
getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false);
znode = getCurrentMasterNode(context.getAppId(), context.getCurrentIteration()).toString();
getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("Node exists: {}", znode);
}
}
}.execute();
}
@Override
public void preIteration(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
this.setCurrentIteration(context.getCurrentIteration());
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
// wait All Workers Done
final int currentIteration = context.getCurrentIteration();
final int workers = context.getWorkers();
final String appCurrentWorkersNode = getWorkerBaseNode(context.getAppId(), currentIteration).toString();
long start = System.nanoTime();
// wait to get all workers results.
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
List<String> workerChildern = getZooKeeper().getChildrenExt(appCurrentWorkersNode, false,
false, false);
int size = workerChildern == null ? 0 : workerChildern.size();
if(isTerminated(size, context.getWorkers(), context.getMinWorkersRatio(),
context.getMinWorkersTimeOut())) {
return true;
}
workerChildern = getZooKeeper().getChildrenExt(appCurrentWorkersNode, true, false, false);
size = workerChildern == null ? 0 : workerChildern.size();
if(isTerminated(size, context.getWorkers(), context.getMinWorkersRatio(),
context.getMinWorkersTimeOut())) {
return true;
}
// to avoid log flood
if(System.nanoTime() % 20 == 0) {
LOG.info("iteration {}, workers compelted: {}, still {} workers are not synced.",
currentIteration, size, (workers - size));
}
AsyncMasterCoordinator.this.workerIterationLock.waitForever();
AsyncMasterCoordinator.this.workerIterationLock.reset();
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", appCurrentWorkersNode);
}
}
return false;
}
}.execute();
LOG.info("Application {} container {} iteration {} waiting ends with {}ms execution time.",
context.getAppId(), context.getContainerId(), context.getCurrentIteration(),
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
// wait until worker results are set from zookeeper znodes.
setWorkerResults(context, appCurrentWorkersNode, context.getAppId(), currentIteration);
}
}.execute();
}
}