package org.apache.mesos.hbase.scheduler; import com.google.inject.Inject; import com.google.protobuf.ByteString; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.mesos.MesosSchedulerDriver; import org.apache.mesos.Protos; import org.apache.mesos.Protos.CommandInfo; import org.apache.mesos.Protos.Credential; import org.apache.mesos.Protos.Environment; import org.apache.mesos.Protos.ExecutorID; import org.apache.mesos.Protos.ExecutorInfo; import org.apache.mesos.Protos.FrameworkID; import org.apache.mesos.Protos.FrameworkInfo; import org.apache.mesos.Protos.MasterInfo; import org.apache.mesos.Protos.Offer; import org.apache.mesos.Protos.OfferID; import org.apache.mesos.Protos.Resource; import org.apache.mesos.Protos.SlaveID; import org.apache.mesos.Protos.TaskID; import org.apache.mesos.Protos.TaskInfo; import org.apache.mesos.Protos.TaskState; import org.apache.mesos.Protos.TaskStatus; import org.apache.mesos.Protos.Value; import org.apache.mesos.SchedulerDriver; import org.apache.mesos.hbase.state.AcquisitionPhase; import org.apache.mesos.hbase.state.LiveState; import org.apache.mesos.hbase.state.PersistenceException; import org.apache.mesos.hbase.state.IPersistentStateStore; import org.apache.mesos.hbase.util.DnsResolver; import java.io.UnsupportedEncodingException; import java.net.URL; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.Timer; import java.util.TimerTask; import org.apache.mesos.hbase.config.HBaseFrameworkConfig; import org.apache.mesos.hbase.util.HBaseConstants; import org.apache.mesos.hbase.util.HdfsConfFileUrlJsonFinder; import org.codehaus.jackson.map.ObjectMapper; /** * HBase Mesos Framework Scheduler class implementation. * TODO: add start of https://wiki.apache.org/hadoop/Hbase/Stargate */ public class HBaseScheduler implements org.apache.mesos.Scheduler, Runnable { // TODO (elingg) remove as much logic as possible from Scheduler to clean up code private final Log log = LogFactory.getLog(HBaseScheduler.class); private static final int SECONDS_FROM_MILLIS = 1000; private final HBaseFrameworkConfig hbaseFrameworkConfig; private final LiveState liveState; private final IPersistentStateStore persistenceStore; private final DnsResolver dnsResolver; private MasterInfo masterInfo; private ObjectMapper mapper = new ObjectMapper(); @Inject public HBaseScheduler(HBaseFrameworkConfig hbaseFrameworkConfig, LiveState liveState, IPersistentStateStore persistenceStore) { this.hbaseFrameworkConfig = hbaseFrameworkConfig; this.liveState = liveState; this.persistenceStore = persistenceStore; this.dnsResolver = new DnsResolver(this, hbaseFrameworkConfig); } @Override public void disconnected(SchedulerDriver driver) { log.info("Scheduler driver disconnected"); } @Override public void error(SchedulerDriver driver, String message) { log.error("Scheduler driver error: " + message); // Currently, it's pretty hard to disambiguate this error from other causes of framework errors. // Watch MESOS-2522 which will add a reason field for framework errors to help with this. // For now the frameworkId is removed for all messages. boolean removeFrameworkId = message.contains("re-register"); suicide(removeFrameworkId); } /** * Exits the JVM process, optionally deleting Marathon's FrameworkID * from the backing persistence store. * * If `removeFrameworkId` is set, the next Marathon process elected * leader will fail to find a stored FrameworkID and invoke `register` * instead of `reregister`. This is important because on certain kinds * of framework errors (such as exceeding the framework failover timeout), * the scheduler may never re-register with the saved FrameworkID until * the leading Mesos master process is killed. */ private void suicide(Boolean removeFrameworkId) { if (removeFrameworkId) { persistenceStore.setFrameworkId(null); System.exit(9); } } @Override public void executorLost(SchedulerDriver driver, ExecutorID executorID, SlaveID slaveID, int status) { log.info("Executor lost: executorId=" + executorID.getValue() + " slaveId=" + slaveID.getValue() + " status=" + status); } @Override public void frameworkMessage(SchedulerDriver driver, ExecutorID executorID, SlaveID slaveID, byte[] data) { log.info("Framework message: executorId=" + executorID.getValue() + " slaveId=" + slaveID.getValue() + " data='" + Arrays.toString(data) + "'"); } @Override public void offerRescinded(SchedulerDriver driver, OfferID offerId) { log.info("Offer rescinded: offerId=" + offerId.getValue()); } @Override public void registered(SchedulerDriver driver, FrameworkID frameworkId, MasterInfo masterInfo) { try { persistenceStore.setFrameworkId(frameworkId); } catch (PersistenceException e) { // these are zk exceptions... we are unable to maintain state. final String msg = "Error setting framework id in persistent state"; log.error(msg, e); throw new SchedulerException(msg, e); } this.masterInfo = masterInfo; log.info("Registered framework frameworkId=" + frameworkId.getValue()); // reconcile tasks upon registration reconcileTasks(driver); } @Override public void reregistered(SchedulerDriver driver, MasterInfo masterInfo) { this.masterInfo = masterInfo; log.info("Reregistered framework: starting task reconciliation"); // reconcile tasks upon reregistration reconcileTasks(driver); } @Override public void statusUpdate(SchedulerDriver driver, TaskStatus status) { log.info(String.format( "Received status update for taskId=%s state=%s message='%s' stagingTasks.size=%d", status.getTaskId().getValue(), status.getState().toString(), status.getMessage(), liveState.getStagingTasksSize())); if (!isStagingState(status)) { liveState.removeStagingTask(status.getTaskId()); } if (isTerminalState(status)) { liveState.removeRunningTask(status.getTaskId()); persistenceStore.removeTaskId(status.getTaskId().getValue()); // Correct the phase when a task dies after the reconcile period is over if (!liveState.getCurrentAcquisitionPhase().equals(AcquisitionPhase.RECONCILING_TASKS)) { correctCurrentPhase(); } } else if (isRunningState(status)) { liveState.updateTaskForStatus(status); log.info(String.format("Current Acquisition Phase: %s", liveState .getCurrentAcquisitionPhase().toString())); switch (liveState.getCurrentAcquisitionPhase()) { case RECONCILING_TASKS: break; case START_MASTER_NODES: if (liveState.getMasterNodeSize() == HBaseConstants.TOTAL_MASTER_NODES) { // TODO (elingg) move the reload to correctCurrentPhase and make it idempotent reloadConfigsOnAllRunningTasks(driver); correctCurrentPhase(); } break; // TODO (elingg) add a configurable number of data nodes case SLAVE_NODES: reloadConfigsOnAllRunningTasks(driver); // all nodes need fetch // HBaseConstants.REGION_SERVERS_FILENAME break; } } else { log.warn(String.format("Don't know how to handle state=%s for taskId=%s", status.getState(), status.getTaskId().getValue())); } } @Override public void resourceOffers(SchedulerDriver driver, List<Offer> offers) { log.info(String.format("Received %d offers", offers.size())); // TODO (elingg) within each phase, accept offers based on the number of nodes you need boolean acceptedOffer = false; for (Offer offer : offers) { if (acceptedOffer) { driver.declineOffer(offer.getId()); } else { switch (liveState.getCurrentAcquisitionPhase()) { case RECONCILING_TASKS: log.info("Declining offers while reconciling tasks"); driver.declineOffer(offer.getId()); break; case START_MASTER_NODES: if (tryToLaunchMasterNode(driver, offer)) { acceptedOffer = true; } else { driver.declineOffer(offer.getId()); } break; case SLAVE_NODES: if (tryToLaunchSlaveNode(driver, offer)) { acceptedOffer = true; } else { driver.declineOffer(offer.getId()); } break; } } } } @Override public void slaveLost(SchedulerDriver driver, SlaveID slaveId) { log.info("Slave lost slaveId=" + slaveId.getValue()); } @Override public void run() { FrameworkInfo.Builder frameworkInfo = FrameworkInfo.newBuilder() .setName(hbaseFrameworkConfig.getFrameworkName()) .setFailoverTimeout(hbaseFrameworkConfig.getFailoverTimeout()) .setUser(hbaseFrameworkConfig.getHbaseUser()) .setRole(hbaseFrameworkConfig.getHbaseRole()) .setCheckpoint(true); try { FrameworkID frameworkID = persistenceStore.getFrameworkId(); if (frameworkID != null) { frameworkInfo.setId(frameworkID); } } catch (PersistenceException e) { final String msg = "Error recovering framework id"; log.error(msg, e); throw new SchedulerException(msg, e); } registerFramework(this, frameworkInfo.build(), hbaseFrameworkConfig.getMesosMasterUri()); } private void registerFramework(HBaseScheduler sched, FrameworkInfo fInfo, String masterUri) { Credential cred = getCredential(); if (cred != null) { log.info("Registering with credentials."); new MesosSchedulerDriver(sched, fInfo, masterUri, cred).run(); } else { log.info("Registering without authentication"); new MesosSchedulerDriver(sched, fInfo, masterUri).run(); } } private Credential getCredential() { if (hbaseFrameworkConfig.cramCredentialsEnabled()) { try { Credential.Builder credentialBuilder = Credential.newBuilder() .setPrincipal(hbaseFrameworkConfig.getPrincipal()) .setSecret(ByteString.copyFrom(hbaseFrameworkConfig.getSecret().getBytes("UTF-8"))); return credentialBuilder.build(); } catch (UnsupportedEncodingException ex) { log.error("Failed to encode secret when creating Credential."); } } return null; } private boolean launchNode(SchedulerDriver driver, Offer offer, String nodeName, String taskType, String executorName) { // nodeName is the type of executor to launch // executorName is to distinguish different types of nodes // taskType is the type of task in mesos to launch on the node // taskName is a name chosen to identify the task in mesos and mesos-dns (if used) log.info(String.format("Launching node of type %s with task %s", nodeName, taskType)); String taskIdName = String.format("%s.%s.%d", nodeName, executorName, System.currentTimeMillis()); List<Resource> resources = getExecutorResources(); ExecutorInfo executorInfo = createExecutor(taskIdName, taskType, nodeName, executorName, resources); List<Resource> taskResources = getTaskResources(taskType); String taskName = getNextTaskName(taskType); TaskID taskId = TaskID.newBuilder() .setValue(String.format("task.%s.%s", taskType, taskIdName)) .build(); TaskInfo task = TaskInfo.newBuilder() .setExecutor(executorInfo) .setName(taskName) .setTaskId(taskId) .setSlaveId(offer.getSlaveId()) .addAllResources(taskResources) .setData(ByteString.copyFromUtf8( getCommand(taskType))) .build(); liveState.addStagingTask(task.getTaskId()); persistenceStore.addHBaseNode(taskId, offer.getHostname(), taskType, taskName); driver.launchTasks(Arrays.asList(offer.getId()), Arrays.asList(task)); return true; } private String getCommand(String taskType) { if (HBaseConstants.STARGATE_NODE_ID.equals(taskType)) return String.format("bin/hbase-mesos-%s %d", taskType, hbaseFrameworkConfig.getStargateServerPort()); else return String.format("bin/hbase-mesos-%s", taskType); } private String getNextTaskName(String taskType) { if (taskType.equals(HBaseConstants.MASTER_NODE_ID)) { Collection<String> masterNodeTaskNames = persistenceStore.getPrimaryNodeTaskNames().values(); for (int i = 1; i <= HBaseConstants.TOTAL_MASTER_NODES; i++) { if (!masterNodeTaskNames.contains(HBaseConstants.MASTER_NODE_ID + i)) { return HBaseConstants.MASTER_NODE_ID + i; } } String errorStr = "Cluster is in inconsistent state. " + "Trying to launch more masternodes, but they are all already running."; log.error(errorStr); throw new SchedulerException(errorStr); } return taskType; } private ExecutorInfo createExecutor(String taskIdName, String taskType, String nodeName, String executorName, List<Resource> resources) { int confServerPort = hbaseFrameworkConfig.getConfigServerPort(); String cmd = "export JAVA_HOME=$MESOS_DIRECTORY/" + hbaseFrameworkConfig.getJreVersion() + " && env ; cd hbase-mesos-* && " + "exec `if [ -z \"$JAVA_HOME\" ]; then echo java; " + "else echo $JAVA_HOME/bin/java; fi` " + "$HADOOP_OPTS " + "$EXECUTOR_OPTS " + "-cp \"hbase-executor-uber.jar\" org.apache.mesos.hbase.executor." + executorName; return ExecutorInfo .newBuilder() .setName(nodeName + " executor") .setExecutorId(ExecutorID.newBuilder().setValue("executor." + taskIdName).build()) .addAllResources(resources) .setCommand(CommandInfo .newBuilder() .addAllUris(Arrays.asList( CommandInfo.URI .newBuilder() .setValue(String.format("http://%s:%d/%s", hbaseFrameworkConfig.getFrameworkHostAddress(), confServerPort, HBaseConstants.HBASE_BINARY_FILE_NAME)) .build(), CommandInfo.URI .newBuilder() .setValue(String.format("http://%s:%d/%s", hbaseFrameworkConfig.getFrameworkHostAddress(), confServerPort, HBaseConstants.REGION_SERVERS_FILENAME)) .build(), CommandInfo.URI .newBuilder() .setValue(String.format("http://%s:%d/%s", hbaseFrameworkConfig.getFrameworkHostAddress(), confServerPort, HBaseConstants.HBASE_CONFIG_FILE_NAME)) .build(), CommandInfo.URI .newBuilder() .setValue(getHdfsFileUrl()) .build(), CommandInfo.URI .newBuilder() .setValue(hbaseFrameworkConfig.getJreUrl()) .build())) .setEnvironment(Environment .newBuilder() .addAllVariables(Arrays.asList(Environment.Variable.newBuilder() .setName("LD_LIBRARY_PATH") .setValue(hbaseFrameworkConfig.getLdLibraryPath()).build(), Environment.Variable.newBuilder() .setName("HBASE_OPTS") .setValue(getJvmOpts(taskType)).build(), Environment.Variable .newBuilder() .setName("HBASE_HEAPSIZE") .setValue(getHeapSizeConfig(taskType)) .build()))) .setValue(cmd).build()) .build(); } private String getJvmOpts(String taskType) { if (HBaseConstants.STARGATE_NODE_ID.equals(taskType)) return hbaseFrameworkConfig.getJvmOpts(); else if (HBaseConstants.MASTER_NODE_ID.equals(taskType)) return hbaseFrameworkConfig.getJvmOpts(); else if (HBaseConstants.SLAVE_NODE_ID.equals(taskType)) return hbaseFrameworkConfig.getJvmOpts(); else return hbaseFrameworkConfig.getJvmOpts(); } private String getHeapSizeConfig(String taskType) { int heapSize = hbaseFrameworkConfig.getHadoopHeapSize(); if (null != taskType) switch (taskType) { case HBaseConstants.STARGATE_NODE_ID: heapSize = hbaseFrameworkConfig.getStargateNodeHeapSize(); break; case HBaseConstants.MASTER_NODE_ID: heapSize = hbaseFrameworkConfig.getMasterNodeHeapSize(); break; case HBaseConstants.SLAVE_NODE_ID: heapSize = hbaseFrameworkConfig.getSlaveNodeHeapSize(); break; } return String.format("%dm", heapSize); } private List<Resource> getExecutorResources() { return Arrays.asList(Resource.newBuilder() .setName("cpus") .setType(Value.Type.SCALAR) .setScalar(Value.Scalar.newBuilder() .setValue(hbaseFrameworkConfig.getExecutorCpus()).build()) .setRole(hbaseFrameworkConfig.getHbaseRole()) .build(), Resource .newBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Value.Scalar .newBuilder() .setValue(hbaseFrameworkConfig.getExecutorHeap() * hbaseFrameworkConfig.getJvmOverhead()).build()) .setRole(hbaseFrameworkConfig.getHbaseRole()) .build()); } private List<Resource> getTaskResources(String taskName) { return Arrays.asList(Resource.newBuilder() .setName("cpus") .setType(Value.Type.SCALAR) .setScalar(Value.Scalar.newBuilder() .setValue(hbaseFrameworkConfig.getTaskCpus(taskName)).build()) .setRole(hbaseFrameworkConfig.getHbaseRole()) .build(), Resource.newBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Value.Scalar.newBuilder() .setValue(hbaseFrameworkConfig.getTaskHeapSize(taskName) * hbaseFrameworkConfig.getJvmOverhead()).build()) .setRole(hbaseFrameworkConfig.getHbaseRole()) .build()); } private boolean acceptOffer(Offer offer, String nodeType, double cpu, int memory) { if (offerNotEnoughCpu(offer, cpu)) { log.info(nodeType + " node offer does not have enough cpu.\n Required " + cpu + ". (ConfNodeCpus)"); return false; } else if (offerNotEnoughMemory(offer, memory)) { double requiredMem = (memory * hbaseFrameworkConfig.getJvmOverhead()) + (hbaseFrameworkConfig.getExecutorHeap() * hbaseFrameworkConfig.getJvmOverhead()); String memLog = "Required " + requiredMem + " mem (" + nodeType + "NodeHeapSize * jvmOverhead) + (executorHeap * jvmOverhead)"; log.info(nodeType + " node offer does not have enough memory.\n" + memLog); return false; } else { return true; } } private boolean tryToLaunchMasterNode(SchedulerDriver driver, Offer offer) { if (!acceptOffer(offer, "master", hbaseFrameworkConfig.getMasterNodeCpus(), hbaseFrameworkConfig.getMasterNodeHeapSize())) return false; boolean launch = false; List<String> deadMasterNodes = persistenceStore.getDeadMasterNodes(); if (deadMasterNodes.isEmpty()) { if (persistenceStore.getPrimaryNodes().size() == HBaseConstants.TOTAL_MASTER_NODES) { log.info(String.format("Already running %s masters", HBaseConstants.TOTAL_MASTER_NODES)); } else if (persistenceStore.masterNodeRunningOnSlave(offer.getHostname())) { log.info(String.format("Already running masternode on %s", offer.getHostname())); } else if (persistenceStore.slaveNodeRunningOnSlave(offer.getHostname())) { log.info(String.format("Cannot colocate masternode and slavenode on %s", offer.getHostname())); } else { launch = true; } } else if (deadMasterNodes.contains(offer.getHostname())) { launch = true; } if (launch) { return launchNode(driver, offer, HBaseConstants.MASTER_NODE_ID, HBaseConstants.MASTER_NODE_ID, HBaseConstants.NODE_EXECUTOR_ID); } return false; } private boolean tryToLaunchSlaveNode(SchedulerDriver driver, Offer offer) { if (!acceptOffer(offer, "slave", hbaseFrameworkConfig.getMasterNodeCpus(), hbaseFrameworkConfig.getMasterNodeHeapSize())) return false; boolean launch = false; List<String> deadDataNodes = persistenceStore.getDeadDataNodes(); // TODO (elingg) Relax this constraint to only wait for DN's when the number of DN's is small // What number of DN's should we try to recover or should we remove this constraint // entirely? if (deadDataNodes.isEmpty()) { if (persistenceStore.slaveNodeRunningOnSlave(offer.getHostname()) || persistenceStore.masterNodeRunningOnSlave(offer.getHostname())) { log.info(String.format("Already running hbase task on %s", offer.getHostname())); return tryToLaunchStargateNode(driver, offer); } else { launch = true; } } else if (deadDataNodes.contains(offer.getHostname())) { launch = true; } if (launch) { return launchNode(driver, offer, HBaseConstants.SLAVE_NODE_ID, HBaseConstants.SLAVE_NODE_ID, HBaseConstants.NODE_EXECUTOR_ID); } return false; } private boolean tryToLaunchStargateNode(SchedulerDriver driver, Offer offer) { if (!acceptOffer(offer, "stargate", hbaseFrameworkConfig.getStargateNodeCpus(), hbaseFrameworkConfig.getStargateNodeHeapSize())) return false; boolean launch = false; List<String> deadStargateNodes = persistenceStore.getDeadStargateNodes(); if (deadStargateNodes.isEmpty()) { if (persistenceStore.getStargateNodes().size() >= hbaseFrameworkConfig.getStargateNodeCount()) { log.info(String.format("Already running %s stargate nodes", hbaseFrameworkConfig.getStargateNodeCount())); } else { launch = true; } } else if (deadStargateNodes.contains(offer.getHostname())) { launch = true; } if (launch) { return launchNode(driver, offer, HBaseConstants.STARGATE_NODE_ID, HBaseConstants.STARGATE_NODE_ID, HBaseConstants.NODE_EXECUTOR_ID); } return false; } public void sendMessageTo(SchedulerDriver driver, TaskID taskId, SlaveID slaveID, String message) { log.info(String.format("Sending message '%s' to taskId=%s, slaveId=%s", message, taskId.getValue(), slaveID.getValue())); String postfix = taskId.getValue(); postfix = postfix.substring(postfix.indexOf('.') + 1, postfix.length()); postfix = postfix.substring(postfix.indexOf('.') + 1, postfix.length()); driver.sendFrameworkMessage( ExecutorID.newBuilder().setValue("executor." + postfix).build(), slaveID, message.getBytes(Charset.defaultCharset())); } private boolean isTerminalState(TaskStatus taskStatus) { return taskStatus.getState().equals(TaskState.TASK_FAILED) || taskStatus.getState().equals(TaskState.TASK_FINISHED) || taskStatus.getState().equals(TaskState.TASK_KILLED) || taskStatus.getState().equals(TaskState.TASK_LOST) || taskStatus.getState().equals(TaskState.TASK_ERROR); } private boolean isRunningState(TaskStatus taskStatus) { return taskStatus.getState().equals(TaskState.TASK_RUNNING); } private boolean isStagingState(TaskStatus taskStatus) { return taskStatus.getState().equals(TaskState.TASK_STAGING); } private void reloadConfigsOnAllRunningTasks(SchedulerDriver driver) { if (hbaseFrameworkConfig.usingNativeHadoopBinaries()) { return; } for (Protos.TaskStatus taskStatus : liveState.getRunningTasks().values()) { sendMessageTo(driver, taskStatus.getTaskId(), taskStatus.getSlaveId(), HBaseConstants.RELOAD_CONFIG); } } private void correctCurrentPhase() { if (liveState.getMasterNodeSize() < HBaseConstants.TOTAL_MASTER_NODES) { liveState.transitionTo(AcquisitionPhase.START_MASTER_NODES); } else { liveState.transitionTo(AcquisitionPhase.SLAVE_NODES); } } private boolean offerNotEnoughCpu(Offer offer, double cpus) { for (Resource offerResource : offer.getResourcesList()) { if (offerResource.getName().equals("cpus") && cpus + hbaseFrameworkConfig.getExecutorCpus() > offerResource.getScalar().getValue()) { return true; } } return false; } private boolean offerNotEnoughMemory(Offer offer, int mem) { for (Resource offerResource : offer.getResourcesList()) { if (offerResource.getName().equals("mem") && (mem * hbaseFrameworkConfig.getJvmOverhead()) + (hbaseFrameworkConfig.getExecutorHeap() * hbaseFrameworkConfig.getJvmOverhead()) > offerResource.getScalar().getValue()) { return true; } } return false; } private void reconcileTasks(SchedulerDriver driver) { // TODO (elingg) run this method repeatedly with exponential backoff in the case that it takes // time for // different slaves to reregister upon master failover. driver.reconcileTasks(Collections.<Protos.TaskStatus>emptyList()); Timer timer = new Timer(); timer.schedule(new ReconcileStateTask(), hbaseFrameworkConfig.getReconciliationTimeout() * SECONDS_FROM_MILLIS); } private String getHdfsFileUrl() { if (masterInfo == null) { log.error("Invalid scheduler state - masterInfo is null"); return getHbaseConfigServerHdfsFileUrl(); } else if (hbaseFrameworkConfig.usingMesosHdfs()) { String masterStateUrl = String.format("http://%s:%d/%s", masterInfo.getHostname(), masterInfo.getPort(), "master/state.json"); try { URL url = new URL(masterStateUrl); HdfsConfFileUrlJsonFinder finder = new HdfsConfFileUrlJsonFinder(mapper); String findedUrl = finder.findUrl(url); return findedUrl; } catch (IOException e) { log.error("", e); } } else { return getHbaseConfigServerHdfsFileUrl(); } return null; } private String getHbaseConfigServerHdfsFileUrl() { return String.format("http://%s:%d/%s", hbaseFrameworkConfig.getFrameworkHostAddress(), hbaseFrameworkConfig.getConfigServerPort(), HBaseConstants.HDFS_CONFIG_FILE_NAME); } private class ReconcileStateTask extends TimerTask { @Override public void run() { log.info("Current persistent state:"); log.info(String.format("Primary Nodes: %s, %s", persistenceStore.getPrimaryNodes(), persistenceStore.getPrimaryNodeTaskNames())); log.info(String.format("Slave Nodes: %s", persistenceStore.getRegionNodes())); Set<String> taskIds = persistenceStore.getAllTaskIds(); Set<String> runningTaskIds = liveState.getRunningTasks().keySet(); for (String taskId : taskIds) { if (taskId != null && !runningTaskIds.contains(taskId)) { log.info("Removing task id: " + taskId); persistenceStore.removeTaskId(taskId); } } correctCurrentPhase(); } } }