/*
* Copyright 2015 Collective, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.collective.celos;
import java.util.*;
import com.collective.celos.database.StateDatabaseConnection;
import com.google.common.collect.Maps;
import org.apache.log4j.Logger;
import com.collective.celos.trigger.Trigger;
/**
* Master control program.
*/
public class Scheduler {
private final int slidingWindowHours;
private final WorkflowConfiguration configuration;
private static Logger LOGGER = Logger.getLogger(Scheduler.class);
public Scheduler(WorkflowConfiguration configuration, int slidingWindowHours) {
if (slidingWindowHours <= 0) {
throw new IllegalArgumentException("Sliding window hours must greater then zero.");
}
this.slidingWindowHours = slidingWindowHours;
this.configuration = Util.requireNonNull(configuration);
}
/**
* Returns the start of the sliding window, given the current time.
*/
ScheduledTime getSlidingWindowStartTime(ScheduledTime current) {
return new ScheduledTime(current.getDateTime().minusHours(slidingWindowHours));
}
/**
* Main method, called every minute.
* <p>
* Steps through all workflows.
*/
public void step(ScheduledTime current, StateDatabaseConnection connection) throws Exception {
// by default, schedule all workflows
step(current, Collections.<WorkflowID>emptySet(), connection);
}
/**
* If workflowIDs is empty, schedule all workflows.
* <p>
* Otherwise, schedule only workflows in the set.
*/
public void step(ScheduledTime current, Set<WorkflowID> workflowIDs, StateDatabaseConnection connection) throws Exception {
LOGGER.info("Starting scheduler step: " + current + " -- " + getSlidingWindowStartTime(current));
for (Workflow wf : configuration.getWorkflows()) {
WorkflowID id = wf.getID();
boolean shouldProcess = workflowIDs.isEmpty() || workflowIDs.contains(id);
if (!shouldProcess) {
LOGGER.info("Ignoring workflow: " + id);
} else if (connection.isPaused(id)) {
LOGGER.info("Workflow is paused: " + id);
} else {
try {
stepWorkflow(wf, current, connection);
} catch (Exception e) {
LOGGER.error("Exception in workflow: " + id + ": " + e.getMessage(), e);
}
}
}
LOGGER.info("Ending scheduler step: " + current + " -- " + getSlidingWindowStartTime(current));
}
/**
* Steps a single workflow:
* <p>
* - Submit any READY slots to the external service.
* <p>
* - Check any WAITING slots for data availability.
* <p>
* - Check any RUNNING slots for their current external status.
*/
private void stepWorkflow(Workflow wf, ScheduledTime current, StateDatabaseConnection connection) throws Exception {
LOGGER.info("Processing workflow: " + wf.getID() + " at: " + current);
List<SlotState> slotStates = getSlotStatesIncludingMarkedForRerun(wf, current, getWorkflowStartTime(wf, current), current, connection);
runExternalWorkflows(wf, slotStates, connection);
for (SlotState slotState : slotStates) {
updateSlotState(wf, slotState, current, connection);
}
}
/**
* Get the slot states of all slots of the workflow from within the window defined by start (inclusive) and end (exclusive),
* as well as the slots states of all slots marked for rerun in the database.
*/
public List<SlotState> getSlotStatesIncludingMarkedForRerun(Workflow wf, ScheduledTime current, ScheduledTime start, ScheduledTime end, StateDatabaseConnection connection) throws Exception {
SortedSet<ScheduledTime> timesMarkedForRerun = connection.getTimesMarkedForRerun(wf.getID(), current);
SortedSet<ScheduledTime> times = new TreeSet<>();
times.addAll(wf.getSchedule().getScheduledTimes(this, start, end));
times.addAll(timesMarkedForRerun);
Map<SlotID, SlotState> fetchedSlots = Maps.newHashMap();
fetchedSlots.putAll(connection.getSlotStates(wf.getID(), start, end));
fetchedSlots.putAll(connection.getSlotStates(wf.getID(), timesMarkedForRerun));
return matchScheduledToFetched(wf, times, fetchedSlots);
}
/**
* Get the slot states of all slots of the workflow from within the window defined by start (inclusive) and end (exclusive).
* This is used for servlets that return the slot states within the window, and don't care about rerun slots.
*/
public List<SlotState> getSlotStates(Workflow wf, ScheduledTime start, ScheduledTime end, StateDatabaseConnection connection) throws Exception {
SortedSet<ScheduledTime> times = new TreeSet<>();
times.addAll(wf.getSchedule().getScheduledTimes(this, start, end));
Map<SlotID, SlotState> fetchedSlots = connection.getSlotStates(wf.getID(), start, end);
return matchScheduledToFetched(wf, times, fetchedSlots);
}
private List<SlotState> matchScheduledToFetched(Workflow wf, SortedSet<ScheduledTime> scheduledTimes, Map<SlotID, SlotState> timeToSlots) throws Exception {
List<SlotState> slotStates = new ArrayList<SlotState>(scheduledTimes.size());
for (ScheduledTime t : scheduledTimes) {
SlotID slotID = new SlotID(wf.getID(), t);
SlotState slotState = timeToSlots.get(slotID);
if (slotState != null) {
slotStates.add(slotState);
} else {
// Database doesn't have any info on the slot yet -
// synthesize a fresh waiting slot and put it in the list
// (not in the database).
slotStates.add(new SlotState(slotID, SlotState.Status.WAITING));
}
}
return Collections.unmodifiableList(slotStates);
}
public ScheduledTime getWorkflowStartTime(Workflow wf, ScheduledTime current) {
ScheduledTime slidingWindowStartTime = getSlidingWindowStartTime(current);
ScheduledTime workflowStartTime = wf.getStartTime();
return Util.max(slidingWindowStartTime, workflowStartTime);
}
/**
* Get scheduled slots from scheduling strategy and submit them to external system.
*/
void runExternalWorkflows(Workflow wf, List<SlotState> slotStates, StateDatabaseConnection connection) throws Exception {
List<SlotState> scheduledSlots = wf.getSchedulingStrategy().getSchedulingCandidates(slotStates);
for (SlotState slotState : scheduledSlots) {
if (!slotState.getStatus().equals(SlotState.Status.READY)) {
throw new IllegalStateException("Scheduling strategy returned non-ready slot: " + slotState);
}
SlotID slotID = slotState.getSlotID();
LOGGER.info("Submitting slot to external service: " + slotID);
String externalID = wf.getExternalService().submit(slotID);
connection.putSlotState(slotState.transitionToRunning(externalID));
LOGGER.info("Starting slot: " + slotID + " with external ID: " + externalID);
wf.getExternalService().start(slotID, externalID);
}
}
/**
* Check the trigger for all WAITING slots, and update them to READY if data is available.
* <p>
* Check the external status of all RUNNING slots, and update them to SUCCESS or FAILURE if they're finished.
*/
void updateSlotState(Workflow wf, SlotState slotState, ScheduledTime current, StateDatabaseConnection connection) throws Exception {
SlotID slotID = slotState.getSlotID();
SlotState.Status status = slotState.getStatus();
if (status.equals(SlotState.Status.WAITING)) {
if (callTrigger(wf, slotState, current, connection)) {
LOGGER.info("Slot is ready: " + slotID);
connection.putSlotState(slotState.transitionToReady());
} else if (isSlotTimedOut(slotState.getScheduledTime(), current, wf.getWaitTimeoutSeconds())) {
LOGGER.info("Slot timed out waiting: " + slotID);
connection.putSlotState(slotState.transitionToWaitTimeout());
} else {
LOGGER.info("Waiting for slot: " + slotID);
}
} else if (status.equals(SlotState.Status.RUNNING)) {
String externalID = slotState.getExternalID();
ExternalStatus xStatus = wf.getExternalService().getStatus(slotID, externalID);
if (!xStatus.isRunning()) {
if (xStatus.isSuccess()) {
LOGGER.info("Slot successful: " + slotID + " external ID: " + externalID);
connection.putSlotState(slotState.transitionToSuccess());
} else {
if (slotState.getRetryCount() < wf.getMaxRetryCount()) {
LOGGER.info("Slot failed, preparing for retry: " + slotID + " external ID: " + externalID);
connection.putSlotState(slotState.transitionToRetry());
} else {
LOGGER.info("Slot failed permanently: " + slotID + " external ID: " + externalID);
connection.putSlotState(slotState.transitionToFailure());
}
}
} else {
LOGGER.info("Slot still running: " + slotID + " external ID: " + externalID);
}
}
}
private boolean callTrigger(Workflow wf, SlotState slotState, ScheduledTime current, StateDatabaseConnection connection) throws Exception {
Trigger trigger = wf.getTrigger();
ScheduledTime scheduledTime = slotState.getScheduledTime();
return trigger.isDataAvailable(connection, current, scheduledTime);
}
static boolean isSlotTimedOut(ScheduledTime nominalTime, ScheduledTime current, int timeoutSeconds) {
ScheduledTime timeoutTime = nominalTime.plusSeconds(timeoutSeconds);
return current.getDateTime().isAfter(timeoutTime.getDateTime());
}
public int getSlidingWindowHours() {
return slidingWindowHours;
}
public WorkflowConfiguration getWorkflowConfiguration() {
return configuration;
}
}