/*******************************************************************************
* Copyright 2013 Michael Marconi
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package oncue.backingstore;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.json.simple.JSONValue;
import com.typesafe.config.Config;
import akka.actor.ActorSystem;
import akka.event.Logging;
import akka.event.LoggingAdapter;
import oncue.common.messages.Job;
import oncue.common.messages.Job.State;
import oncue.common.settings.Settings;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import redis.clients.jedis.Protocol;
import redis.clients.jedis.Transaction;
import redis.clients.jedis.exceptions.JedisException;
public class RedisBackingStore extends AbstractBackingStore {
/**
* An AutoCloseable wrapper that manages fetching a connection from the redis connection pool
* and safely returning it when finished.
*
* This class proxies methods from Jedis for convenience.
*/
public static class RedisConnection implements AutoCloseable {
// A Redis connection pool
private static JedisPool redisPool;
private Jedis connection;
public RedisConnection() {
if (redisPool == null) {
redisPool = new JedisPool(new JedisPoolConfig(), host, port,
Protocol.DEFAULT_TIMEOUT, null);
}
this.connection = redisPool.getResource();
}
@Override
public void close() {
this.connection.close();
}
public Long incr(String key) {
return this.connection.incr(key);
}
public Transaction multi() {
return this.connection.multi();
}
public void lpush(String key, String value) {
this.connection.lpush(key, value);
}
public List<String> lrange(String key, int start, int end) {
return this.connection.lrange(key, start, end);
}
public String hget(String key, String field) {
return this.connection.hget(key, field);
}
public void hset(String key, String field, String value) {
this.connection.hset(key, field, value);
}
public void lrem(String key, int count, String value) {
this.connection.lrem(key, count, value);
}
public void del(String key) {
this.connection.del(key);
}
public Object rpoplpush(String srckey, String dstkey) {
return this.connection.rpoplpush(srckey, dstkey);
}
/**
* Flush the entire redis database. This should only be used in tests.
*/
public void flushDB() {
this.connection.flushDB();
}
public boolean exists(String key) {
return this.connection.exists(key);
}
public Long llen(String key) {
return this.connection.llen(key);
}
public List<String> brpop(int timeout, String key) {
return this.connection.brpop(timeout, key);
}
}
// Redis host config key
private static final String REDIS_HOST = "oncue.scheduler.backing-store.redis.host";
// Redis port config key
private static final String REDIS_PORT = "oncue.scheduler.backing-store.redis.port";
// Redis port
private static int port = Protocol.DEFAULT_PORT;
// The jobs that have completed successfully
public static final String COMPLETED_JOBS = "oncue:jobs:complete";
// The jobs that have failed
public static final String FAILED_JOBS = "oncue:jobs:failed";
// Redis host
private static String host = "localhost";
// The total count of persisted jobs
public static final String JOB_COUNT_KEY = "oncue:job_count";
// The time the job was enqueued
public static final String JOB_ENQUEUED_AT = "job_enqueued_at";
// The time the job was started
public static final String JOB_STARTED_AT = "job_started_at";
// The time the job was completed
public static final String JOB_COMPLETED_AT = "job_completed_at";
// The message associated with a failed job
public static final String JOB_ERROR_MESSAGE = "job_failure_message";
// The ID of a job
public static final String JOB_ID = "job_id";
// The key to a particular job
public static final String JOB_KEY = "oncue:jobs:%s";
// The job parameters
public static final String JOB_PARAMS = "job_params";
// The progress against a job
public static final String JOB_PROGRESS = "job_progress";
// The job state
public static final String JOB_STATE = "job_state";
// The worker type assigned to a job
public static final String JOB_WORKER_TYPE = "job_worker_type";
// The re-run status of a job
public static final String JOB_RERUN_STATUS = "job_rerun_status";
/*
* The queue of jobs that acts as an external interface; the scheduler component will watch this
* queue for new jobs
*/
public static final String NEW_JOBS = "oncue:jobs:new";
// The scheduled jobs dispatched by the scheduler component
public static final String SCHEDULED_JOBS = "oncue:jobs:scheduled";
// The unscheduled jobs held by the scheduler
public static final String UNSCHEDULED_JOBS = "oncue:jobs:unscheduled";
/**
* Create a new {@linkplain Job} and persist it in Redis
*
* @param workerType is the type of worker required to complete this job
*
* @param params is a map of job parameters
*
* @return a new {@linkplain Job}
*/
public static Job createJob(String workerType, Map<String, String> params) {
try (RedisConnection redis = new RedisConnection()) {
// Get the latest job ID
Long jobId = redis.incr(RedisBackingStore.JOB_COUNT_KEY);
// Create a new job
Job job = new Job(jobId, workerType);
if (params != null)
job.setParams(params);
// Now, persist the job and release the connection
persistJob(job, RedisBackingStore.NEW_JOBS, redis);
return job;
}
}
/**
* Construct a job from a given Job ID
*
* @param id is the id of the job
* @param redis is a connection to Redis
* @return a {@linkplain Job} that represents the job hash in Redis
*/
@SuppressWarnings("unchecked")
public static Job loadJob(long id, RedisConnection redis) {
String jobKey = String.format(JOB_KEY, id);
Job job;
try {
DateTime enqueuedAt = DateTime.parse(redis.hget(jobKey, JOB_ENQUEUED_AT));
DateTime startedAt = null;
String startedAtRaw = redis.hget(jobKey, JOB_STARTED_AT);
if (startedAtRaw != null)
startedAt = DateTime.parse(startedAtRaw);
DateTime completedAt = null;
String completedAtRaw = redis.hget(jobKey, JOB_COMPLETED_AT);
if (completedAtRaw != null)
completedAt = DateTime.parse(completedAtRaw);
String workerType = redis.hget(jobKey, JOB_WORKER_TYPE);
String state = redis.hget(jobKey, JOB_STATE);
String progress = redis.hget(jobKey, JOB_PROGRESS);
String params = redis.hget(jobKey, JOB_PARAMS);
String errorMessage = redis.hget(jobKey, JOB_ERROR_MESSAGE);
String rerunStatus = redis.hget(jobKey, JOB_RERUN_STATUS);
job = new Job(new Long(id), workerType);
job.setEnqueuedAt(enqueuedAt);
if (startedAt != null)
job.setStartedAt(startedAt);
if (completedAt != null)
job.setCompletedAt(completedAt);
job.setRerun(Boolean.parseBoolean(rerunStatus));
if (params != null)
job.setParams((Map<String, String>) JSONValue.parse(params));
if (state != null)
job.setState(State.valueOf(state.toUpperCase()));
if (progress != null)
job.setProgress(new Double(progress));
if (errorMessage != null)
job.setErrorMessage(errorMessage);
} catch (Exception e) {
throw new RuntimeException(
String.format("Could not load job with id %s from Redis", id), e);
}
return job;
}
/**
* Persist a job as a hash in Redis
*
* @param job is the {@linkplain Job} to persist
* @param queueName is the name of the queue to push the job onto
* @param redis is a connection to Redis
*/
public static void persistJob(Job job, String queueName, RedisConnection redis) {
// Persist the job in a transaction
try (Transaction transaction = redis.multi()) {
// Create a map describing the job
String jobKey = String.format(JOB_KEY, job.getId());
transaction.hset(jobKey, JOB_ENQUEUED_AT, job.getEnqueuedAt().toString());
if (job.getStartedAt() != null)
transaction.hset(jobKey, JOB_STARTED_AT, job.getStartedAt().toString());
if (job.getCompletedAt() != null)
transaction.hset(jobKey, JOB_COMPLETED_AT, job.getCompletedAt().toString());
transaction.hset(jobKey, JOB_WORKER_TYPE, job.getWorkerType());
transaction.hset(jobKey, JOB_RERUN_STATUS, Boolean.toString(job.isRerun()));
if (job.getParams() != null) {
Map<String, String> params = null;
switch (job.getState()) {
case COMPLETE:
case FAILED:
params = job.getParams(false);
break;
default:
params = job.getParams();
break;
}
transaction.hset(jobKey, JOB_PARAMS, JSONValue.toJSONString(params));
}
if (job.getState() != null)
transaction.hset(jobKey, JOB_STATE, job.getState().toString());
transaction.hset(jobKey, JOB_PROGRESS, String.valueOf(job.getProgress()));
if (job.getErrorMessage() != null)
transaction.hset(jobKey, JOB_ERROR_MESSAGE, job.getErrorMessage());
// Add the job to the specified queue
transaction.lpush(queueName, Long.toString(job.getId()));
// Exec the transaction
transaction.exec();
} catch (IOException e) {
// Jedis' Transaction.close() method does not actually throw IOException, it just says
// that it does. In fact it can only throw a JedisConnectionException, an instance of a
// RuntimeException. Let's wrap this in a JedisException anyway to be sure.
throw new JedisException(e);
}
}
// Logger
private LoggingAdapter log;
public RedisBackingStore(ActorSystem system, Settings settings) {
super(system, settings);
/*
* Override Redis hostname and port from configuration
*/
Config config = system.settings().config();
if (config.hasPath(REDIS_HOST)) {
host = config.getString(REDIS_HOST);
}
if (config.hasPath(REDIS_PORT)) {
port = config.getInt(REDIS_PORT);
}
log = Logging.getLogger(system, this);
log.info("Backing store expects Redis at: host={}, port={}", host, port);
}
@Override
public void addScheduledJobs(List<Job> jobs) {
try (RedisConnection redis = new RedisConnection()) {
for (Job job : jobs) {
redis.lpush(SCHEDULED_JOBS, Long.toString(job.getId()));
}
}
}
@Override
public void addUnscheduledJob(Job job) {
try (RedisConnection redis = new RedisConnection()) {
persistJob(job, UNSCHEDULED_JOBS, redis);
}
}
@Override
public List<Job> getCompletedJobs() {
List<Job> jobs = new ArrayList<>();
try (RedisConnection redis = new RedisConnection()) {
List<String> jobIDs = redis.lrange(COMPLETED_JOBS, 0, -1);
for (String jobID : jobIDs) {
jobs.add(loadJob(new Long(jobID), redis));
}
}
return jobs;
}
@Override
public List<Job> getFailedJobs() {
List<Job> jobs = new ArrayList<>();
try (RedisConnection redis = new RedisConnection()) {
List<String> jobIDs = redis.lrange(FAILED_JOBS, 0, -1);
for (String jobID : jobIDs) {
jobs.add(loadJob(new Long(jobID), redis));
}
}
return jobs;
}
@Override
public long getNextJobID() {
try (RedisConnection redis = new RedisConnection()) {
// Increment and return the latest job ID
return redis.incr(RedisBackingStore.JOB_COUNT_KEY);
}
}
@Override
public void persistJobFailure(Job job) {
try (RedisConnection redis = new RedisConnection()) {
persistJob(job, FAILED_JOBS, redis);
}
}
@Override
public void persistJobProgress(Job job) {
try (RedisConnection redis = new RedisConnection()) {
String jobKey = String.format(JOB_KEY, job.getId());
redis.hset(jobKey, JOB_PROGRESS, String.valueOf(job.getProgress()));
redis.hset(jobKey, JOB_STATE, job.getState().toString());
if (job.getStartedAt() != null)
redis.hset(jobKey, JOB_STARTED_AT, job.getStartedAt().toString());
if (job.getState() == Job.State.COMPLETE) {
if (job.getCompletedAt() != null)
redis.hset(jobKey, JOB_COMPLETED_AT, job.getCompletedAt().toString());
redis.lpush(COMPLETED_JOBS, Long.toString(job.getId()));
}
}
}
@Override
public void removeCompletedJobById(long jobId) {
try (RedisConnection redis = new RedisConnection()) {
redis.lrem(COMPLETED_JOBS, 0, Long.toString(jobId));
removeJobById(jobId, redis);
}
}
@Override
public void removeFailedJobById(long jobId) {
try (RedisConnection redis = new RedisConnection()) {
redis.lrem(FAILED_JOBS, 0, Long.toString(jobId));
removeJobById(jobId, redis);
}
}
@Override
public void removeScheduledJobById(long jobId) {
try (RedisConnection redis = new RedisConnection()) {
redis.lrem(SCHEDULED_JOBS, 0, Long.toString(jobId));
}
}
@Override
public void removeUnscheduledJobById(long jobId) {
try (RedisConnection redis = new RedisConnection()) {
redis.lrem(UNSCHEDULED_JOBS, 0, Long.toString(jobId));
}
}
public void removeJobById(long jobId, RedisConnection redis) {
redis.del(String.format(JOB_KEY, jobId));
}
/**
* When restoring the jobs queue, we need to look for all the jobs that were on the scheduler
* jobs queue in Redis, as well as the jobs that had been scheduled against agents, which we
* assume are dead.
*/
@Override
public List<Job> restoreJobs() {
List<Job> jobs = new ArrayList<>();
try (RedisConnection redis = new RedisConnection()) {
// Pop all scheduled jobs back onto the unscheduled jobs queue
while (redis.rpoplpush(SCHEDULED_JOBS, UNSCHEDULED_JOBS) != null) {
}
// Get all the unscheduled jobs
List<String> jobIDs = redis.lrange(UNSCHEDULED_JOBS, 0, -1);
for (String jobID : jobIDs) {
jobs.add(loadJob(new Long(jobID), redis));
}
}
return jobs;
}
@Override
public int cleanupJobs(boolean includeFailedJobs, Duration expirationAge) {
int cleanedJobsCount = 0;
for (Job completedJob : getCompletedJobs()) {
DateTime expirationThreshold = DateTime.now().minus(expirationAge.getMillis());
boolean isExpired = completedJob.getCompletedAt()
.isBefore(expirationThreshold.toInstant());
if (isExpired) {
removeCompletedJobById(completedJob.getId());
cleanedJobsCount++;
}
}
if (!includeFailedJobs) {
return cleanedJobsCount;
}
for (Job failedJob : getFailedJobs()) {
if (failedJob.getCompletedAt() == null) {
log.error(
"Found a failed job with no completion time. Setting completion time to now and defering to next clean up. ("
+ failedJob.toString() + ")");
failedJob.setCompletedAt(DateTime.now());
persistJobFailure(failedJob);
continue;
}
DateTime expirationThreshold = DateTime.now().minus(expirationAge.getMillis());
boolean isExpired = failedJob.getCompletedAt()
.isBefore(expirationThreshold.toInstant());
if (isExpired) {
removeFailedJobById(failedJob.getId());
cleanedJobsCount++;
}
}
return cleanedJobsCount;
}
}