/* * Copyright [2013-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.guagua.master; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import ml.shifu.guagua.GuaguaConstants; import ml.shifu.guagua.io.Bytable; import ml.shifu.guagua.util.NumberFormatUtils; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooDefs.Ids; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * {@link SyncMasterCoordinator} is used to as a barrier for each iteration. * * <p> * For each iteration, {@link SyncMasterCoordinator} will wait until all workers are done. * * <p> * To start a new iteration, {@link SyncMasterCoordinator} will write a znode for each iteration like * '/_guagua/job_201312041304_189025/master/{currentIteration}' with with {@link MasterComputable} MASTER_RESULT, * WORKER_RESULT as its data. This is like a signal to notify workers. * * <p> * Workers are waiting on current master znode, if got current master znode, it will start another iteration * * @param <MASTER_RESULT> * master result for computation in each iteration. * @param <WORKER_RESULT> * worker result for computation in each iteration. */ // TODO metrics in our system // TODO guagua-hadoop for guagua-mapreduce and guagua-yarn // TODO set all time info to counter for mapreduce??? public class SyncMasterCoordinator<MASTER_RESULT extends Bytable, WORKER_RESULT extends Bytable> extends AbstractMasterCoordinator<MASTER_RESULT, WORKER_RESULT> { private static final Logger LOG = LoggerFactory.getLogger(SyncMasterCoordinator.class); @Override public void preApplication(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) { // initialize zookeeper and other props initialize(context.getProps()); // Master election which is used here to use the same zookeeper instance. if(NumberFormatUtils.getInt(context.getProps().getProperty(GuaguaConstants.GUAGUA_MASTER_NUMBER), GuaguaConstants.DEFAULT_MASTER_NUMBER) > 1) { new MasterElectionCommand(context.getAppId()).execute(); } new FailOverCommand(context).execute(); if(context.getCurrentIteration() != GuaguaConstants.GUAGUA_INIT_STEP) { // if not init step, return, because of no need initialize twice for fail-over task return; } new BasicCoordinatorCommand() { @Override public void doExecute() throws KeeperException, InterruptedException { String appId = context.getAppId(); int currentIteration = context.getCurrentIteration(); final String appWorkersNode = getWorkerBaseNode(appId, currentIteration).toString(); final int workers = context.getWorkers(); new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) { @Override public boolean retryExecution() throws KeeperException, InterruptedException { try { List<String> childrenExt = getZooKeeper().getChildrenExt(appWorkersNode, false, false, false); int initDoneWorkers = childrenExt.size(); // to avoid log flood if(System.nanoTime() % 20 == 0) { LOG.info("workers already initialized: {}, still {} workers are not synced.", initDoneWorkers, (context.getWorkers() - initDoneWorkers)); LOG.info("DEBUG: left workers:{}", notInList(childrenExt, context.getWorkers())); } return isTerminated(initDoneWorkers, workers, context.getMinWorkersRatio(), context.getMinWorkersTimeOut()); } catch (KeeperException.NoNodeException e) { // to avoid log flood if(System.nanoTime() % 10 == 0) { LOG.warn("No such node:{}", appWorkersNode); } return false; } } }.execute(); LOG.info("All workers are initiliazed successfully."); String znode = null; try { // create worker znode 1: '/_guagua/<jobId>/workers/1' to avoid re-create znode from workers znode = getWorkerBaseNode(context.getAppId(), context.getCurrentIteration() + 1).toString(); getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false); // create master init znode znode = getMasterBaseNode(context.getAppId()).toString(); getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false); znode = getCurrentMasterNode(context.getAppId(), context.getCurrentIteration()).toString(); getZooKeeper().createExt(znode, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, false); } catch (KeeperException.NodeExistsException e) { LOG.warn("Node exists: {}", znode); } } }.execute(); } @Override public void preIteration(final MasterContext<MASTER_RESULT, WORKER_RESULT> context) { new BasicCoordinatorCommand() { @Override public void doExecute() throws KeeperException, InterruptedException { // wait All Workers Done final int currentIteration = context.getCurrentIteration(); final int workers = context.getWorkers(); final String appCurrentWorkersNode = getWorkerBaseNode(context.getAppId(), currentIteration).toString(); long start = System.nanoTime(); new RetryCoordinatorCommand(isFixedTime(), getSleepTime()) { @Override public boolean retryExecution() throws KeeperException, InterruptedException { try { List<String> workerChildern = getZooKeeper().getChildrenExt(appCurrentWorkersNode, false, false, false); int workersCompleted = workerChildern.size(); // to avoid log flood if(System.nanoTime() % 20 == 0) { LOG.info("iteration {}, workers compelted: {}, still {} workers are not synced.", currentIteration, workersCompleted, (workers - workersCompleted)); LOG.info("DEBUG: left workers:{}", notInList(workerChildern, context.getWorkers())); } return isTerminated(workersCompleted, workers, context.getMinWorkersRatio(), context.getMinWorkersTimeOut()); } catch (KeeperException.NoNodeException e) { // to avoid log flood if(System.nanoTime() % 10 == 0) { LOG.warn("No such node:{}", appCurrentWorkersNode); } return false; } } }.execute(); LOG.info("Application {} container {} iteration {} waiting ends with {}ms execution time.", context.getAppId(), context.getContainerId(), context.getCurrentIteration(), TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); setWorkerResults(context, appCurrentWorkersNode, context.getAppId(), currentIteration); } }.execute(); } /** * This is a debug method used by master to check which partition still not synced. Shouldn't be used outside. */ private static List<Integer> notInList(List<String> inputs, int num) { List<Integer> set = new ArrayList<Integer>(); for(int i = 1; i <= num; i++) { set.add(Integer.valueOf(i)); } for(String string: inputs) { Integer n = Integer.valueOf(string); set.remove(n); } Collections.sort(set); return set; } }