/*
* Copyright [2013-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.guagua.master;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import ml.shifu.guagua.BasicCoordinator;
import ml.shifu.guagua.GuaguaConstants;
import ml.shifu.guagua.GuaguaRuntimeException;
import ml.shifu.guagua.coordinator.zk.GuaguaZooKeeper.Filter;
import ml.shifu.guagua.io.Bytable;
import ml.shifu.guagua.io.HaltBytable;
import ml.shifu.guagua.util.NumberFormatUtils;
import ml.shifu.guagua.util.StringUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Ids;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link AbstractMasterCoordinator} has some common implementation for both async and sync worker coordinator.
*
* <p>
* Common functions include: znodes cleaning up, fail-over support and others.
*
* @param <MASTER_RESULT>
* master result for computation in each iteration.
* @param <WORKER_RESULT>
* worker result for computation in each iteration.
*/
public abstract class AbstractMasterCoordinator<MASTER_RESULT extends Bytable, WORKER_RESULT extends Bytable> extends
BasicCoordinator<MASTER_RESULT, WORKER_RESULT> implements MasterInterceptor<MASTER_RESULT, WORKER_RESULT> {
private static final Logger LOG = LoggerFactory.getLogger(AbstractMasterCoordinator.class);
private String myBid;
@Override
public void postIteration(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
// update master halt status.
// commented this line: since 0.5.0 only master will be allowed to halt whole guagua app, no matter what
// halt status in worker result, it will be ingnored.
updateMasterHaltStatus(context);
// create worker znode in next iteration: '/_guagua/<jobId>/workers/2' to avoid re-create znode from
// workers
String workerBaseNode = null;
try {
workerBaseNode = getWorkerBaseNode(context.getAppId(), context.getCurrentIteration() + 1)
.toString();
getZooKeeper().createExt(workerBaseNode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("Node exists: {}", workerBaseNode);
}
// create master znode
boolean isSplit = false;
String appCurrentMasterNode = getCurrentMasterNode(context.getAppId(), context.getCurrentIteration())
.toString();
String appCurrentMasterSplitNode = getCurrentMasterSplitNode(context.getAppId(),
context.getCurrentIteration()).toString();
try {
byte[] bytes = getMasterSerializer().objectToBytes(context.getMasterResult());
isSplit = setBytesToZNode(appCurrentMasterNode, appCurrentMasterSplitNode, bytes,
CreateMode.PERSISTENT);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("Has such node:", e);
}
// remove -2 znode, no need, -1 is needed for fail-over.
if(context.getCurrentIteration() >= 2) {
String znode = getMasterNode(context.getAppId(), context.getCurrentIteration() - 2).toString();
try {
getZooKeeper().deleteExt(znode, -1, false);
if(isSplit) {
znode = getCurrentMasterSplitNode(context.getAppId(), context.getCurrentIteration() - 2)
.toString();
getZooKeeper().deleteExt(znode, -1, true);
}
} catch (KeeperException.NoNodeException e) {
if(System.nanoTime() % 20 == 0) {
LOG.warn("No such node:{}", znode);
}
}
}
LOG.info("master results write to znode.");
}
}.execute();
}
@Override
public void postApplication(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
new BasicCoordinatorCommand() {
@Override
public void doExecute() throws KeeperException, InterruptedException {
try {
// if clean up zk znodes cost two much running time, one can set zk cleanup flag. But to make sure
// clean the znodes manually after application.
String zkCleanUpEnabled = StringUtils.get(
context.getProps().getProperty(GuaguaConstants.GUAGUA_ZK_CLEANUP_ENABLE),
GuaguaConstants.GUAGUA_ZK_DEFAULT_CLEANUP_VALUE);
String appId = context.getAppId();
boolean isLastMaster = true;
if(NumberFormatUtils.getInt(context.getProps().getProperty(GuaguaConstants.GUAGUA_MASTER_NUMBER),
GuaguaConstants.DEFAULT_MASTER_NUMBER) > 1) {
String masterElectionPath = getBaseMasterElectionNode(appId).toString();
List<String> masterElectionNodes = getZooKeeper().getChildrenExt(masterElectionPath, false,
true, true);
isLastMaster = isLastMaster(masterElectionNodes);
}
if(isLastMaster && Boolean.TRUE.toString().equalsIgnoreCase(zkCleanUpEnabled)) {
final int currentIteration = context.getCurrentIteration();
final int workers = context.getWorkers();
final String endWorkersNode = getWorkerBaseNode(appId, currentIteration).toString();
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
try {
List<String> workerChildern = getZooKeeper().getChildrenExt(endWorkersNode, false,
false, true);
int workersEndCompleted = workerChildern.size();
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.info("iteration {}, workers ended: {}, still {} workers are not synced.",
currentIteration, workersEndCompleted, (workers - workersEndCompleted));
}
return workers == workersEndCompleted;
} catch (KeeperException.NoNodeException e) {
// to avoid log flood
if(System.nanoTime() % 10 == 0) {
LOG.warn("No such node:{}", endWorkersNode);
}
return false;
}
}
}.execute();
// delete app znode
String appNode = getAppNode(appId).toString();
try {
getZooKeeper().deleteExt(appNode, -1, true);
} catch (KeeperException.NoNodeException e) {
if(System.nanoTime() % 20 == 0) {
LOG.warn("No such node:{}", appNode);
}
}
}
} finally {
close();
}
}
private boolean isLastMaster(List<String> masterElectionNodes) {
return masterElectionNodes == null || masterElectionNodes.size() == 0
|| masterElectionNodes.get(masterElectionNodes.size() - 1).equals(getMyBid());
}
}.execute();
}
/**
* {@link FailOverCommand} is used to read last iteration before task failed.
*
* <p>
* To read last iteration, just read all iterations from master znodes and get the maximal one.
*
* <p>
* Master znodes should be set as persistent type.
*/
protected class FailOverCommand extends BasicCoordinatorCommand {
private final MasterContext<MASTER_RESULT, WORKER_RESULT> context;
public FailOverCommand(MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
this.context = context;
}
@Override
public void doExecute() throws KeeperException, InterruptedException {
String masterBaseNode = getMasterBaseNode(context.getAppId()).toString();
List<String> masterIterations = null;
try {
masterIterations = getZooKeeper().getChildrenExt(masterBaseNode, false, false, false, new Filter() {
@Override
public boolean filter(String path) {
try {
Integer.parseInt(path);
return false;
} catch (Exception e) {
return true;
}
}
});
} catch (KeeperException.NoNodeException e) {
LOG.warn("No such node:{}", masterBaseNode);
}
if(masterIterations != null && masterIterations.size() > 0) {
Collections.sort(masterIterations, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return Integer.valueOf(o1).compareTo(Integer.valueOf(o2));
}
});
LOG.info("DEBUG: master children:{}", masterIterations);
try {
int restartedIteration = Integer.valueOf(masterIterations.get(masterIterations.size() - 1));
this.context.setCurrentIteration(restartedIteration);
LOG.info("Container {} restarted at: {} step.", context.getContainerId(), restartedIteration);
} catch (NumberFormatException e) {
this.context.setCurrentIteration(GuaguaConstants.GUAGUA_INIT_STEP);
}
}
}
}
/**
* Set worker results from znodes.
*/
protected void setWorkerResults(final MasterContext<MASTER_RESULT, WORKER_RESULT> context,
final String appCurrentWorkersNode, final String appId, final int iteration) throws KeeperException,
InterruptedException {
// No need to get data from init step since in that step there is no results setting.
if(context.getCurrentIteration() == GuaguaConstants.GUAGUA_INIT_STEP) {
return;
}
final List<String> workerChildern = getZooKeeper().getChildrenExt(appCurrentWorkersNode, false, false, false);
context.setWorkerResults(new Iterable<WORKER_RESULT>() {
@Override
public Iterator<WORKER_RESULT> iterator() {
return new Iterator<WORKER_RESULT>() {
private Iterator<String> itr;
private volatile AtomicBoolean isStart = new AtomicBoolean();
@Override
public boolean hasNext() {
if(this.isStart.compareAndSet(false, true)) {
this.itr = workerChildern.iterator();
}
boolean hasNext = this.itr.hasNext();
if(!hasNext) {
// to make sure it can be iterated again, it shouldn't be a good case for iterator, we will
// iterate again to check if all workers are halt.
this.itr = workerChildern.iterator();
return false;
}
return hasNext;
}
@Override
public WORKER_RESULT next() {
String worker = this.itr.next();
String appCurrentWorkerSplitNode = getCurrentWorkerSplitNode(appId, worker, iteration)
.toString();
byte[] data = null;
try {
data = getBytesFromZNode(appCurrentWorkersNode + GuaguaConstants.ZOOKEEPER_SEPARATOR
+ worker, appCurrentWorkerSplitNode);
} catch (KeeperException e) {
throw new GuaguaRuntimeException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if(data != null) {
WORKER_RESULT workerResult = getWorkerSerializer().bytesToObject(data,
context.getWorkerResultClassName());
return workerResult;
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
});
}
/**
* Check whether GuaguaConstants.GUAGUA_WORKER_HALT_ENABLE) is enabled, if yes, check whether all workers are halted
* and update master status.
*/
protected void updateMasterHaltStatus(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
MASTER_RESULT result = context.getMasterResult();
// a switch to make all workers have the right to terminate the application
if(Boolean.TRUE.toString().equalsIgnoreCase(
context.getProps().getProperty(GuaguaConstants.GUAGUA_WORKER_HALT_ENABLE,
GuaguaConstants.GUAGUA_WORKER_DEFAULT_HALT_ENABLE))) {
if(isAllWorkersHalt(context.getWorkerResults()) && result instanceof HaltBytable) {
((HaltBytable) result).setHalt(true);
context.setMasterResult(result);
}
}
}
protected void setMasterResult(final MasterContext<MASTER_RESULT, WORKER_RESULT> context,
final String appMasterNode, final String appMasterSplitNode) throws KeeperException, InterruptedException {
if(context.getCurrentIteration() == GuaguaConstants.GUAGUA_INIT_STEP) {
return;
}
byte[] data = getBytesFromZNode(appMasterNode, appMasterSplitNode);
if(data != null && data.length > 0) {
MASTER_RESULT lastMasterResult = getMasterSerializer().bytesToObject(data,
context.getMasterResultClassName());
context.setMasterResult(lastMasterResult);
}
}
/**
* Check whether all workers are halted.
*/
protected boolean isAllWorkersHalt(final Iterable<WORKER_RESULT> workerResults) {
// This boolean is for a bug, if no element in worker results, return true is not correct, should return false.
boolean isHasWorkerResults = false;
for(WORKER_RESULT workerResult: workerResults) {
isHasWorkerResults = true;
if(!(workerResult instanceof HaltBytable) || !((HaltBytable) workerResult).isHalt()) {
return false;
}
}
return isHasWorkerResults ? true : false;
}
/**
* Elect master from several backup masters.
*
* <p>
* Wait until it is the first bid, then it is elected as master.
*
* <p>
* Since fail-over in Hadoop map-reduce tasks is very fast. Using multiple-master is not a good choice especially
* time out is too large.
*
* <p>
* Multiple masters are used in environment in which no fail-over.
*/
protected class MasterElectionCommand extends BasicCoordinatorCommand {
private final String appId;
public MasterElectionCommand(String appId) {
this.appId = appId;
}
@Override
public void doExecute() throws KeeperException, InterruptedException {
final String masterElectionPath = getBaseMasterElectionNode(this.appId).toString();
String masterElectionNode = getMasterElectionNode(this.appId, getZooKeeper().getZooKeeper().getSessionId())
.toString();
try {
getZooKeeper().createExt(masterElectionPath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("Node exists: {}", masterElectionPath);
}
setMyBid(getZooKeeper().createExt(masterElectionNode, null, Ids.OPEN_ACL_UNSAFE,
CreateMode.EPHEMERAL_SEQUENTIAL, true));
new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) {
@Override
public boolean retryExecution() throws KeeperException, InterruptedException {
List<String> masterChildArr = getZooKeeper().getChildrenExt(masterElectionPath, false, true, true);
// to avoid log flood
if(System.nanoTime() % 20 == 0) {
LOG.info("becomeMaster: First child is '{}' and my bid is '{}'", masterChildArr.get(0),
getMyBid());
}
return masterChildArr.get(0).equals(getMyBid());
}
}.execute();
LOG.info("Become master.");
}
}
public String getMyBid() {
return myBid;
}
public void setMyBid(String myBid) {
this.myBid = myBid;
}
}