/**
* Copyright (C) 2015 meltmedia (christian.trimble@meltmedia.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.meltmedia.dropwizard.etcd.cluster;
import java.math.RoundingMode;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.google.common.collect.Lists;
import com.google.common.math.IntMath;
import com.meltmedia.dropwizard.etcd.cluster.ClusterAssignmentTracker.AssignmentState;
import com.meltmedia.dropwizard.etcd.cluster.ClusterStateTracker.State;
import com.meltmedia.dropwizard.etcd.cluster.ProcessorStateTracker.ProcessorState;
import com.meltmedia.dropwizard.etcd.json.EtcdDirectoryDao;
import com.meltmedia.dropwizard.etcd.json.EtcdDirectoryException;
import com.meltmedia.dropwizard.etcd.json.EtcdJson.MappedEtcdDirectory;
import com.meltmedia.dropwizard.etcd.json.RunnableWithException;
public class ClusterAssignmentService {
private static Logger logger = LoggerFactory.getLogger(ClusterAssignmentService.class);
public static class Builder {
private ScheduledExecutorService executor;
private ClusterNode thisNode;
private MappedEtcdDirectory<ClusterProcess> processDir;
private ClusterStateTracker stateTracker;
private Optional<FixedDelay> crashCleanupDelay = Optional.empty();
private MetricRegistry registry;
private Supplier<AssignmentState> assignmentState;
Function<String, String> metricName;
private ProcessorStateTracker processorState;
public Builder withExecutor(ScheduledExecutorService executor) {
this.executor = executor;
return this;
}
public Builder withProcessDir(MappedEtcdDirectory<ClusterProcess> processDir) {
this.processDir = processDir;
return this;
}
public Builder withThisNode(ClusterNode thisNode) {
this.thisNode = thisNode;
return this;
}
public Builder withClusterState(ClusterStateTracker stateTracker) {
this.stateTracker = stateTracker;
return this;
}
public Builder withProcessorState(ProcessorStateTracker processorState ) {
this.processorState = processorState;
return this;
}
public Builder withCrashCleanupDelay(FixedDelay crashCleanupDelay) {
this.crashCleanupDelay = Optional.ofNullable(crashCleanupDelay);
return this;
}
public Builder withMetricRegistry( MetricRegistry registry ) {
this.registry = registry;
return this;
}
public Builder withMetricName( Function<String, String> metricName ) {
this.metricName = metricName;
return this;
}
public Builder withAssignmentState( Supplier<AssignmentState> assignmentState ) {
this.assignmentState = assignmentState;
return this;
}
private static FixedDelay DEFAULT_DELAY = FixedDelay.builder()
.withDelay(10)
.withInitialDelay(10)
.withTimeUnit(TimeUnit.SECONDS)
.build();
public ClusterAssignmentService build() {
if( metricName == null ) {
String dirName = processDir.getName().replace('.', '_');
metricName = name->MetricRegistry.name(ClusterAssignmentService.class, dirName, name);
}
if( registry == null ) throw new IllegalStateException("metric registry is required");
if( assignmentState == null ) throw new IllegalStateException("assignment state is required");
if( processorState == null ) throw new IllegalStateException("processor state is required");
return new ClusterAssignmentService(executor, thisNode, processDir, stateTracker, processorState, assignmentState,
crashCleanupDelay.orElse(DEFAULT_DELAY),
registry, metricName);
}
}
public static Builder builder() {
return new Builder();
}
public String getId() {
return thisNode.getId();
}
public ClusterAssignmentService(ScheduledExecutorService executor, ClusterNode thisNode,
MappedEtcdDirectory<ClusterProcess> processDir, ClusterStateTracker stateTracker, ProcessorStateTracker processorState,
Supplier<AssignmentState> assignmentState, FixedDelay crashCleanupDelay, MetricRegistry registry, Function<String, String> metricName) {
this.executor = executor;
this.thisNode = thisNode;
this.processDir = processDir;
this.stateTracker = stateTracker;
this.processorState = processorState;
this.assignmentState = assignmentState;
this.processDao = processDir.newDao();
this.crashCleanupDelay = crashCleanupDelay;
this.registry = registry;
this.metricName = metricName;
}
//
// Metric Names
//
public static final String EXCEPTIONS = "exceptions";
public static final String UNASSIGNMENT_FAILURES = "unassignmentFailures";
public static final String ASSIGNMENT_FAILURES = "assignmentFailures";
public static final String CLEAN_UP_TASK = "cleanUpTask";
public static final String ASSIGNMENT_TASK = "assignmentTask";
public static final List<String> ALL_METRICS = Lists.newArrayList(EXCEPTIONS, UNASSIGNMENT_FAILURES, ASSIGNMENT_FAILURES, CLEAN_UP_TASK, ASSIGNMENT_TASK);
// extenral dependencies
ScheduledExecutorService executor;
ClusterNode thisNode;
EtcdDirectoryDao<ClusterProcess> processDao;
ClusterStateTracker stateTracker;
ProcessorStateTracker processorState;
MappedEtcdDirectory<ClusterProcess> processDir;
FixedDelay crashCleanupDelay;
MetricRegistry registry;
Function<String, String> metricName;
private Supplier<AssignmentState> assignmentState;
// used by the service.
ScheduledFuture<?> assignmentFuture;
volatile long lastAssignmentIndex = 0L;
private ScheduledFuture<?> cleanupFuture;
final CleanShutdownCondition shutdown = new CleanShutdownCondition();
private Meter assignmentTask;
private Meter cleanUpTask;
private Meter assignmentFailures;
private Meter unassignmentFailures;
private Meter exceptions;
public void start() {
logger.debug("starting assignments for {}", thisNode.getId());
assignmentTask = registry.meter(metricName.apply(ASSIGNMENT_TASK));
cleanUpTask = registry.meter(metricName.apply(CLEAN_UP_TASK));
assignmentFailures = registry.meter(metricName.apply(ASSIGNMENT_FAILURES));
unassignmentFailures = registry.meter(metricName.apply(UNASSIGNMENT_FAILURES));
exceptions = registry.meter(metricName.apply(EXCEPTIONS));
lastAssignmentIndex = 0L;
startNodeAssignmentTask();
startFailureCleanupTask();
}
public void stop() {
logger.debug("stopping assignments for {}", thisNode.getId());
shutdown.await(10, TimeUnit.MINUTES);
stopFailureCleanupTask();
stopNodeAssignmentTask();
unassignJobs();
ALL_METRICS.forEach(name->registry.remove(metricName.apply(name)));
}
public void startNodeAssignmentTask() {
logger.info("starting assignment task for {}", thisNode.getId());
assignmentFuture =
executor.scheduleWithFixedDelay(
() -> {
assignmentTask.mark();
try {
ProcessorState processorState = this.processorState.getState();
AssignmentState assignmentState = this.assignmentState.get();
State clusterState = stateTracker.getState();
long lastSeenIndex = Math.max(assignmentState.etcdIndex, clusterState.lastModifiedIndex());
if( lastAssignmentIndex > lastSeenIndex ) return;
boolean active = clusterState.hasMember(thisNode.getId()) && processorState.hasProcessor(this.getId());
int localProcesses = assignmentState.nodeProcessCount();
int unassigned = assignmentState.unassignedProcessCount();
int processorNodes = processorState.processorCount();
int totalProcesses = assignmentState.totalProcessCount();
int maxProcessCount =
!active || totalProcesses == 0 || processorNodes == 0
? 0
: IntMath.divide(assignmentState.totalProcessCount(), processorNodes, RoundingMode.CEILING);
boolean giveProcess = localProcesses > maxProcessCount && unassigned == 0;
boolean takeProcess = active && localProcesses < maxProcessCount && unassigned > 0;
boolean abandonProcess = processorNodes == 0 && localProcesses > 0;
boolean terminate = !active && localProcesses == 0;
if( terminate ) {
shutdown.signalAll();
return;
}
else if (takeProcess) {
for (String toAssign : assignmentState.unassigned) {
try {
lastAssignmentIndex = processDao.update(toAssign, p -> p.getAssignedTo() == null,
p -> p.withAssignedTo(thisNode.getId()));
return;
} catch (IndexOutOfBoundsException | EtcdDirectoryException e) {
assignmentFailures.mark();
logger.debug("could not assign process {}", e.getMessage());
}
}
} else if (giveProcess || abandonProcess) {
for (String toUnassign : assignmentState.processes.get(thisNode.getId())) {
try {
lastAssignmentIndex = processDao.update(toUnassign, p -> thisNode.getId().equals(p.getAssignedTo()),
p -> p.withAssignedTo(null));
return;
} catch (IndexOutOfBoundsException | EtcdDirectoryException e) {
unassignmentFailures.mark();
logger.warn("could not unassign process {}", e.getMessage());
}
}
}
}
catch (Exception e) {
exceptions.mark();
logger.error("exception thrown in assignment process", e);
}
}, 100L, 100L, TimeUnit.MILLISECONDS);
}
public void startFailureCleanupTask() {
logger.info("starting failure clean up task for {}", thisNode.getId());
cleanupFuture =
executor
.scheduleWithFixedDelay(
() -> {
cleanUpTask.mark();
// iterate over the process map and make sure we have an entry in the state nodes.
State state = stateTracker.getState();
if (state.isLeader(thisNode)) {
processorState.getState().getProcessors().stream()
.filter(p->!stateTracker.getState().hasMember(p.getId()))
.forEach(p->processorState.removeCrashedProcessor(p));
assignmentState.get().processes
.entrySet()
.stream()
.filter(processEntry -> !stateTracker.getState().hasMember(processEntry.getKey()))
.forEach(
(processEntry) -> {
logger.info("cleaning up assignments for node {}", processEntry.getKey());
processEntry
.getValue()
.stream()
.forEach(processId ->{
try {
processDao.update(processId, (process) -> processEntry
.getKey().equals(process.getAssignedTo()), (process) -> process
.withAssignedTo(null));
} catch( Exception e ) {
unassignmentFailures.mark();
logger.debug("could not unassign process after crash", e);
}});
});
}
}, crashCleanupDelay.getInitialDelay(), crashCleanupDelay.getDelay(), crashCleanupDelay
.getTimeUnit());
}
public void stopFailureCleanupTask() {
try {
cleanupFuture.cancel(true);
} catch (Exception e) {
exceptions.mark();
logger.warn("error thrown while stoping cleanup task");
}
cleanupFuture = null;
}
static void ignoreException(RunnableWithException<?> r) {
try {
r.run();
} catch (Exception e) {
// do nothing.
}
}
public void stopNodeAssignmentTask() {
try {
assignmentFuture.cancel(true);
} catch (Exception e) {
exceptions.mark();
logger.warn("error thrown while stoping assignment task");
}
assignmentFuture = null;
}
public void unassignJobs() {
assignmentState.get().processes
.getOrDefault(thisNode.getId(), Collections.emptySet())
.stream()
.forEach(
processKey -> {
try {
processDao.update(processKey,
process -> thisNode.getId().equals(process.getAssignedTo()),
process -> process.withAssignedTo(null));
} catch (Exception e) {
unassignmentFailures.mark();
logger.warn("could not unassign process {}", processKey);
}
});
}
public static class FixedDelay {
public static class Builder {
private long initialDelay;
private long delay;
private TimeUnit timeUnit;
public Builder withInitialDelay(long initialDelay) {
this.initialDelay = initialDelay;
return this;
}
public Builder withDelay(long delay) {
this.delay = delay;
return this;
}
public Builder withTimeUnit(TimeUnit timeUnit) {
this.timeUnit = timeUnit;
return this;
}
public FixedDelay build() {
return new FixedDelay(initialDelay, delay, timeUnit);
}
}
public static Builder builder() {
return new Builder();
}
private long initialDelay;
private long delay;
private TimeUnit timeUnit;
public FixedDelay(long initialDelay, long delay, TimeUnit timeUnit) {
this.initialDelay = initialDelay;
this.delay = delay;
this.timeUnit = timeUnit;
}
public long getInitialDelay() {
return initialDelay;
}
public long getDelay() {
return delay;
}
public TimeUnit getTimeUnit() {
return timeUnit;
}
}
public static class CleanShutdownCondition {
final Lock emptyLock = new ReentrantLock();
final Condition empty = emptyLock.newCondition();
public boolean await( long time, TimeUnit unit ) {
try {
if( emptyLock.tryLock(10, TimeUnit.SECONDS) ) {
try {
if( !empty.await(time, unit) ) {
logger.warn("forcing shutdown of processes");
return false;
}
return true;
}
finally {
emptyLock.unlock();
}
}
else {
return false;
}
} catch( InterruptedException e ) {
logger.warn("interrupted while shutting down processes");
Thread.currentThread().interrupt();
return false;
}
}
public void signalAll() {
try {
if( emptyLock.tryLock(10, TimeUnit.SECONDS) ) {
try {
empty.signalAll();
}
finally {
emptyLock.unlock();
}
}
}
catch( InterruptedException ie ) {
logger.warn("interrupted while signaling shutdown");
Thread.currentThread().interrupt();
}
}
}
}