/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.job;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import com.addthis.basis.annotations.Scaling;
import com.addthis.basis.util.Parameter;
import com.addthis.basis.util.TokenReplacerOverflowException;
import com.addthis.codec.Codec;
import com.addthis.codec.json.CodecJSON;
import com.addthis.hydra.job.store.SpawnDataStore;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Histogram;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.addthis.basis.annotations.Scaling.Scale.SETUP;
import static com.addthis.hydra.job.store.SpawnDataStoreKeys.SPAWN_JOB_CONFIG_PATH;
import static com.google.common.base.Preconditions.checkArgument;
/**
* Stuff about jobs that *others* care about, not in the giant Spawnstate ball of mud.
* We assume that only Spawn updates the job znodes, and only through this class.
*/
// {queryconfig,config,jobtask/[n]} under job uuid
// Can't use ZkMessageProducer because this involves multiple znodes
public class JobConfigManager {
private static final Logger logger = LoggerFactory.getLogger(JobConfigManager.class);
private static final Codec codec = CodecJSON.INSTANCE;
/* metrics */
private static final Histogram jobSizePersistHisto =
Metrics.newHistogram(JobConfigManager.class, "jobSizePersistHisto");
private static final Timer addJobTimer = Metrics.newTimer(JobConfigManager.class, "addJobTimer");
private static final Timer updateJobTimer = Metrics.newTimer(JobConfigManager.class, "updateJobTimer");
private static final int loadThreads = Parameter.intValue("job.config.load.threads", 8);
private static final int jobChunkSize = Parameter.intValue("job.config.chunk.size", 30);
private static final String configChildName = "/config";
private static final String queryConfigChildName = "/queryconfig";
private static final String alertChildName = "/alerts";
private static final String tasksChildName = "/tasks";
private static final String brokerInfoChildName = "/brokerinfo";
private static final String taskChildName = "/task";
@Nonnull
private final SpawnDataStore spawnDataStore;
@Nullable
private final JobExpander jobExpander;
@Deprecated
public JobConfigManager(SpawnDataStore spawnDataStore) {
this.jobExpander = null;
this.spawnDataStore = spawnDataStore;
}
public JobConfigManager(SpawnDataStore spawnDataStore, JobExpander jobExpander) {
this.spawnDataStore = spawnDataStore;
this.jobExpander = jobExpander;
}
public String getExpandedConfig(String jobUUID) throws FailedJobExpansionException {
IJob job = getJob(jobUUID);
checkArgument(job != null, "job with uid " + jobUUID + " does not exist");
String config = getConfig(jobUUID);
return getExpandedConfig(config, job.getParameters());
}
public String getExpandedConfig(String rawConfig, Collection<JobParameter> parameters) throws FailedJobExpansionException {
return jobExpander.expandJob(rawConfig, parameters);
}
public void writeUpdateIfDataNotNull(String path, String data) throws Exception {
if (data == null) {
logger.warn("Was going to update znode {} but data was null", new Object[]{path});
} else {
spawnDataStore.put(path, data);
}
}
// todo: fail if already present?
public void addJob(IJob ijob) throws Exception {
TimerContext addJobTimerContext = addJobTimer.time();
ZnodeJob job = new ZnodeJob(ijob);
String jobPath = getJobPath(ijob.getId());
try {
final String jobCodec = new String(codec.encode(job.getRootData()));
jobSizePersistHisto.update(jobCodec.length());
spawnDataStore.putAsChild(SPAWN_JOB_CONFIG_PATH, job.getId(), jobCodec);
if (job.getConfig() == null) {
writeUpdateIfDataNotNull(jobPath + configChildName, "");
} else {
writeUpdateIfDataNotNull(jobPath + configChildName, job.getConfig());
}
if (job.getQueryConfig() == null) {
writeUpdateIfDataNotNull(jobPath + queryConfigChildName, "");
} else {
writeUpdateIfDataNotNull(jobPath + queryConfigChildName,
new String(codec.encode(job.getQueryConfig())));
}
// this is just a marker so that we know to use the 'new' configuration
spawnDataStore.put(jobPath + tasksChildName, "");
} finally {
addJobTimerContext.stop();
}
}
public void updateJob(IJob ijob) {
TimerContext updateJobTimerContext = updateJobTimer.time();
ZnodeJob job = new ZnodeJob(ijob);
String jobPath = getJobPath(ijob.getId());
// for transition, it's possible an already existing job does
// not have a znode, (There should probably be a zkutils for
// writedata and make sure it exists.
try {
final String jobCodec = new String(codec.encode(job.getRootData()));
jobSizePersistHisto.update(jobCodec.length());
spawnDataStore.putAsChild(SPAWN_JOB_CONFIG_PATH, job.getId(), jobCodec);
writeUpdateIfDataNotNull(jobPath + queryConfigChildName, new String(codec.encode(job.getQueryConfig())));
// this is just a marker so that we know to use the 'new' configuration
spawnDataStore.put(jobPath + tasksChildName, "");
} catch (Exception e) {
logger.warn("Failing to update job, bailing", e);
throw new RuntimeException(e);
} finally {
updateJobTimerContext.stop();
}
}
/**
* Use query data fetched from a SpawnDataStore to create the job object
*
* @param jobId The jobId in question
* @param queryData Query data describing the job configuration. Data for other jobs will be ignored.
* @return The reconstituted job object
* @throws Exception
*/
@Nullable
private IJob createJobFromQueryData(String jobId, Map<String, String> queryData) throws Exception {
String jobPath = getJobPath(jobId);
String rstring = spawnDataStore.getChild(SPAWN_JOB_CONFIG_PATH, jobId);
if (rstring == null) {
return null;
}
ZnodeJob.RootZnodeData rznd = codec.decode(ZnodeJob.RootZnodeData.class, rstring.getBytes());
String config = queryData.get(jobPath + configChildName);
String queryConfigString = queryData.get(jobPath + queryConfigChildName);
JobQueryConfig jqc = codec.decode(JobQueryConfig.class, queryConfigString.getBytes());
// Make sure job config path exists
spawnDataStore.put(SPAWN_JOB_CONFIG_PATH, "");
String tasksData = queryData.get(jobPath + tasksChildName);
if (tasksData != null) {
// load from new config where task data is stored on root node
return new ZnodeJob(rznd, config, jqc);
} else {
String taskData = queryData.get(jobPath + taskChildName);
if (taskData != null) {
// old style, will be removed in future versions
return loadLegacyTaskData(jobPath, rznd, config, jqc);
}
}
logger.info("No tasks available for path: {}", jobPath);
return new ZnodeJob(rznd, config, jqc);
}
public IJob getJob(String jobId) {
try {
Map<String, String> queryData = fetchJobData(jobId);
if (queryData == null) {
return null;
}
return createJobFromQueryData(jobId, queryData);
} catch (Exception e) {
logger.error("Failure creating job: {}", jobId, e);
throw new RuntimeException(e);
}
}
/**
* Internal function to fetch the job config, query config, etc. for a group of jobs using a single SpawnDataStore operation.
*
* @param jobId The jobId to fetch
* @return A map describing the results of querying all the relevant paths
*/
private Map<String, String> fetchJobData(String jobId) {
String jobPath = getJobPath(jobId);
String[] queryPaths =
{jobPath, jobPath + configChildName, jobPath + queryConfigChildName, jobPath + alertChildName,
jobPath + tasksChildName, jobPath + taskChildName};
return spawnDataStore.get(queryPaths);
}
private IJob loadLegacyTaskData(String jobPath,
ZnodeJob.RootZnodeData rznd,
String config,
JobQueryConfig jqc) throws Exception {
List<JobTask> tasks = new ArrayList<>();
List<String> children = spawnDataStore.getChildrenNames(jobPath + taskChildName);
Collections.sort(children);
for (String taskId : children) {
String taskString = spawnDataStore.get(jobPath + taskChildName + "/" + taskId);
JobTask task = codec.decode(JobTask.class, taskString.getBytes());
tasks.add(task);
}
return new ZnodeJob(rznd, config, jqc, tasks);
}
public String getConfig(String jobUUID) {
return spawnDataStore.get(getJobPath(jobUUID) + configChildName);
}
public void setConfig(String jobId, String config) throws Exception {
if (jobId != null && config != null) {
spawnDataStore.put(getJobPath(jobId) + configChildName, config);
}
}
/**
* Find all job ids in the SpawnDataStore, split them into chunks, and then load the jobs from each chunk in parallel
*
* @return A map of all jobs found in the SpawnDataStore
*/
@Scaling(SETUP)
public Map<String, IJob> loadJobs() {
final Map<String, IJob> jobs = new HashMap<>();
List<String> jobNodes = spawnDataStore.getChildrenNames(SPAWN_JOB_CONFIG_PATH);
if (jobNodes != null) {
logger.info("Using {} threads to pull data on {} jobs", loadThreads, jobNodes.size());
// Use multiple threads to query the database, and gather the results together
ExecutorService executorService = new ThreadPoolExecutor(
loadThreads, loadThreads, 1000L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(),
new ThreadFactoryBuilder().setDaemon(true).build());
for (List<String> jobIdChunk : Lists.partition(jobNodes, jobChunkSize)) {
executorService.submit(new MapChunkLoader(this, jobs, jobIdChunk));
}
logger.info("Waiting for job loading threads to finish...");
MoreExecutors.shutdownAndAwaitTermination(executorService, 600, TimeUnit.SECONDS);
logger.info("Job loading complete");
}
return jobs;
}
/**
* Internal class to fetch a chunk of jobIds, then push the results into a master map
*/
private static class MapChunkLoader implements Runnable {
private final Map<String, IJob> jobs;
private final List<String> chunk;
private final JobConfigManager jobConfigManager;
private MapChunkLoader(JobConfigManager jobConfigManager, Map<String, IJob> jobs, List<String> chunk) {
this.jobs = jobs;
this.chunk = chunk;
this.jobConfigManager = jobConfigManager;
}
@Override
public void run() {
try {
Map<String, IJob> rv = new HashMap<>();
for (String jobId : chunk) {
loadChunk(rv, jobId);
}
synchronized (jobs) {
jobs.putAll(rv);
}
} catch (Exception e) {
logger.error("While getting all jobs, error getting: {}", chunk, e);
throw new RuntimeException(e);
}
}
private void loadChunk(Map<String, IJob> rv, String jobId) {
try {
IJob jobFromQueryData =
jobConfigManager.createJobFromQueryData(jobId, jobConfigManager.fetchJobData(jobId));
rv.put(jobId, jobFromQueryData);
} catch (Exception ex) {
logger.error("Failed while reconstituting job {}", jobId, ex);
}
}
}
public void deleteJob(String jobUUID) {
try {
String jobPath = getJobPath(jobUUID);
for (String subnode : Arrays.asList(alertChildName, queryConfigChildName, taskChildName, tasksChildName, configChildName, brokerInfoChildName)) {
spawnDataStore.delete(jobPath + subnode);
}
spawnDataStore.delete(jobPath);
spawnDataStore.deleteChild(SPAWN_JOB_CONFIG_PATH, jobUUID);
} catch (Exception e) {
logger.warn("Failing to delete job, bailing", e);
throw new RuntimeException(e);
}
}
private static String getJobPath(String jobId) {
return SPAWN_JOB_CONFIG_PATH + "/" + jobId;
}
}