/** * Licensed to The Apereo Foundation under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * * The Apereo Foundation licenses this file to you under the Educational * Community License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License * at: * * http://opensource.org/licenses/ecl2.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opencastproject.serviceregistry.impl; import static com.entwinemedia.fn.Stream.$; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.opencastproject.job.api.AbstractJobProducer.ACCEPT_JOB_LOADS_EXCEEDING_PROPERTY; import static org.opencastproject.job.api.AbstractJobProducer.DEFAULT_ACCEPT_JOB_LOADS_EXCEEDING; import static org.opencastproject.job.api.Job.FailureReason.DATA; import static org.opencastproject.job.api.Job.Status.FAILED; import static org.opencastproject.job.jpa.JpaJob.fnToJob; import static org.opencastproject.security.api.SecurityConstants.ORGANIZATION_HEADER; import static org.opencastproject.security.api.SecurityConstants.USER_HEADER; import static org.opencastproject.serviceregistry.api.ServiceState.ERROR; import static org.opencastproject.serviceregistry.api.ServiceState.NORMAL; import static org.opencastproject.serviceregistry.api.ServiceState.WARNING; import static org.opencastproject.util.OsgiUtil.getOptContextProperty; import org.opencastproject.job.api.Job; import org.opencastproject.job.api.Job.Status; import org.opencastproject.job.jpa.JpaJob; import org.opencastproject.rest.RestConstants; import org.opencastproject.security.api.Organization; import org.opencastproject.security.api.OrganizationDirectoryService; import org.opencastproject.security.api.SecurityService; import org.opencastproject.security.api.TrustedHttpClient; import org.opencastproject.security.api.TrustedHttpClientException; import org.opencastproject.security.api.User; import org.opencastproject.security.api.UserDirectoryService; import org.opencastproject.serviceregistry.api.HostRegistration; import org.opencastproject.serviceregistry.api.IncidentService; import org.opencastproject.serviceregistry.api.Incidents; import org.opencastproject.serviceregistry.api.JaxbServiceStatistics; import org.opencastproject.serviceregistry.api.ServiceRegistration; import org.opencastproject.serviceregistry.api.ServiceRegistry; import org.opencastproject.serviceregistry.api.ServiceRegistryException; import org.opencastproject.serviceregistry.api.ServiceStatistics; import org.opencastproject.serviceregistry.api.SystemLoad; import org.opencastproject.serviceregistry.api.SystemLoad.NodeLoad; import org.opencastproject.serviceregistry.impl.jmx.HostsStatistics; import org.opencastproject.serviceregistry.impl.jmx.JobsStatistics; import org.opencastproject.serviceregistry.impl.jmx.ServicesStatistics; import org.opencastproject.serviceregistry.impl.jpa.HostRegistrationJpaImpl; import org.opencastproject.serviceregistry.impl.jpa.ServiceRegistrationJpaImpl; import org.opencastproject.systems.MatterhornConstants; import org.opencastproject.util.NotFoundException; import org.opencastproject.util.UrlSupport; import org.opencastproject.util.data.functions.Strings; import org.opencastproject.util.jmx.JmxUtil; import com.entwinemedia.fn.Fn; import com.entwinemedia.fn.Fn2; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpHead; import org.apache.http.client.methods.HttpPost; import org.apache.http.message.BasicNameValuePair; import org.osgi.framework.BundleContext; import org.osgi.framework.InvalidSyntaxException; import org.osgi.framework.ServiceReference; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.cm.ManagedService; import org.osgi.service.component.ComponentContext; import org.osgi.util.tracker.ServiceTracker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.InetAddress; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.Dictionary; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.management.ObjectInstance; import javax.persistence.EntityManager; import javax.persistence.EntityManagerFactory; import javax.persistence.EntityTransaction; import javax.persistence.LockModeType; import javax.persistence.NoResultException; import javax.persistence.PersistenceException; import javax.persistence.Query; import javax.persistence.RollbackException; import javax.persistence.TemporalType; import javax.persistence.TypedQuery; /** JPA implementation of the {@link ServiceRegistry} */ public class ServiceRegistryJpaImpl implements ServiceRegistry, ManagedService { /** JPA persistence unit name */ public static final String PERSISTENCE_UNIT = "org.opencastproject.common"; /** Id of the workflow's start operation operation, need to match the corresponding enum value in WorkflowServiceImpl */ public static final String START_OPERATION = "START_OPERATION"; /** Id of the workflow's start workflow operation, need to match the corresponding enum value in WorkflowServiceImpl */ public static final String START_WORKFLOW = "START_WORKFLOW"; /** Id of the workflow's resume operation, need to match the corresponding enum value in WorkflowServiceImpl */ public static final String RESUME = "RESUME"; /** Identifier for the workflow service */ public static final String TYPE_WORKFLOW = "org.opencastproject.workflow"; static final Logger logger = LoggerFactory.getLogger(ServiceRegistryJpaImpl.class); /** The list of registered JMX beans */ protected List<ObjectInstance> jmxBeans = new ArrayList<ObjectInstance>(); /** Hosts statistics JMX type */ private static final String JMX_HOSTS_STATISTICS_TYPE = "HostsStatistics"; /** Services statistics JMX type */ private static final String JMX_SERVICES_STATISTICS_TYPE = "ServicesStatistics"; /** Jobs statistics JMX type */ private static final String JMX_JOBS_STATISTICS_TYPE = "JobsStatistics"; /** The JMX business object for hosts statistics */ private HostsStatistics hostsStatistics; /** The JMX business object for services statistics */ private ServicesStatistics servicesStatistics; /** The JMX business object for jobs statistics */ private JobsStatistics jobsStatistics; /** Current job used to process job in the service registry */ private static final ThreadLocal<Job> currentJob = new ThreadLocal<Job>(); /** Configuration key for the maximum load */ protected static final String OPT_MAXLOAD = "org.opencastproject.server.maxload"; /** Configuration key for the dispatch interval in milliseconds */ protected static final String OPT_DISPATCHINTERVAL = "dispatchinterval"; /** Configuration key for the interval to check whether the hosts in the service registry are still alive [sec] * */ protected static final String OPT_HEARTBEATINTERVAL = "heartbeat.interval"; /** Configuration key for the collection of job statistics */ protected static final String OPT_JOBSTATISTICS = "jobstats.collect"; /** Configuration key for the retrieval of service statistics: Do not consider jobs older than max_job_age (in days) */ protected static final String OPT_SERVICE_STATISTICS_MAX_JOB_AGE = "org.opencastproject.statistics.services.max_job_age"; /** The http client to use when connecting to remote servers */ protected TrustedHttpClient client = null; /** Minimum delay between job dispatching attempts, in milliseconds */ static final long MIN_DISPATCH_INTERVAL = 1000; /** Default delay between job dispatching attempts, in milliseconds */ static final long DEFAULT_DISPATCH_INTERVAL = 5000; /** Default jobs limit during dispatching * (larger value will fetch more entries from the database at the same time and increase RAM usage) */ static final int DEFAULT_DISPATCH_JOBS_LIMIT = 100; /** Default setting on job statistics collection */ static final boolean DEFAULT_JOB_STATISTICS = true; /** Default setting on service statistics retrieval */ static final int DEFAULT_SERVICE_STATISTICS_MAX_JOB_AGE = 14; /** Default value for {@link #maxAttemptsBeforeErrorState} */ private static final int MAX_FAILURE_BEFORE_ERROR_STATE = 1; /** The configuration key for setting {@link #maxAttemptsBeforeErrorState} */ private static final String MAX_ATTEMPTS_CONFIG_KEY = "max.attempts"; /** Number of failed jobs on a service before to set it in error state */ protected int maxAttemptsBeforeErrorState = MAX_FAILURE_BEFORE_ERROR_STATE; /** Default delay between checking if hosts are still alive in seconds * */ static final long DEFAULT_HEART_BEAT = 60; /** This host's base URL */ protected String hostName; /** The base URL for job URLs */ protected String jobHost; /** The factory used to generate the entity manager */ protected EntityManagerFactory emf = null; /** Tracks services published locally and adds them to the service registry */ protected RestServiceTracker tracker = null; /** The thread pool to use for dispatching queued jobs and checking on phantom services. */ protected ScheduledExecutorService scheduledExecutor = null; /** The security service */ protected SecurityService securityService = null; /** The user directory service */ protected UserDirectoryService userDirectoryService = null; /** The organization directory service */ protected OrganizationDirectoryService organizationDirectoryService = null; protected Incidents incidents; /** Whether to collect detailed job statistics */ protected boolean collectJobstats = DEFAULT_JOB_STATISTICS; /** Maximum age of jobs being considering for service statistics */ protected int maxJobAge = DEFAULT_SERVICE_STATISTICS_MAX_JOB_AGE; /** A static list of statuses that influence how load balancing is calculated */ protected static final List<Status> JOB_STATUSES_INFLUENCING_LOAD_BALANCING; static { JOB_STATUSES_INFLUENCING_LOAD_BALANCING = new ArrayList<Status>(); JOB_STATUSES_INFLUENCING_LOAD_BALANCING.add(Status.QUEUED); JOB_STATUSES_INFLUENCING_LOAD_BALANCING.add(Status.DISPATCHING); JOB_STATUSES_INFLUENCING_LOAD_BALANCING.add(Status.RUNNING); } /** The dispatcher priority list */ protected final Map<Long, String> dispatchPriorityList = new HashMap<>(); /** Whether to accept a job whose load exceeds the host’s max load */ protected Boolean acceptJobLoadsExeedingMaxLoad = true; /** OSGi DI */ void setEntityManagerFactory(EntityManagerFactory emf) { this.emf = emf; } public void activate(ComponentContext cc) { logger.info("Activate service registry"); // Find this host's url if (cc == null || StringUtils.isBlank(cc.getBundleContext().getProperty(MatterhornConstants.SERVER_URL_PROPERTY))) { hostName = UrlSupport.DEFAULT_BASE_URL; } else { hostName = cc.getBundleContext().getProperty(MatterhornConstants.SERVER_URL_PROPERTY); } // Clean all undispatchable jobs that were orphaned when this host was last deactivated cleanUndispatchableJobs(hostName); // Register JMX beans with statistics try { List<ServiceStatistics> serviceStatistics = getServiceStatistics(); hostsStatistics = new HostsStatistics(serviceStatistics); servicesStatistics = new ServicesStatistics(hostName, serviceStatistics); jobsStatistics = new JobsStatistics(hostName); jmxBeans.add(JmxUtil.registerMXBean(hostsStatistics, JMX_HOSTS_STATISTICS_TYPE)); jmxBeans.add(JmxUtil.registerMXBean(servicesStatistics, JMX_SERVICES_STATISTICS_TYPE)); jmxBeans.add(JmxUtil.registerMXBean(jobsStatistics, JMX_JOBS_STATISTICS_TYPE)); } catch (ServiceRegistryException e) { logger.error("Error registering JMX statistic beans {}", e); } // Find the jobs URL if (cc == null || StringUtils.isBlank(cc.getBundleContext().getProperty("org.opencastproject.jobs.url"))) { jobHost = hostName; } else { jobHost = cc.getBundleContext().getProperty("org.opencastproject.jobs.url"); } // Register this host try { float maxLoad = Runtime.getRuntime().availableProcessors(); if (cc != null && StringUtils.isNotBlank(cc.getBundleContext().getProperty(OPT_MAXLOAD))) { try { maxLoad = Float.parseFloat(cc.getBundleContext().getProperty(OPT_MAXLOAD)); logger.info("Max load has been manually to {}", maxLoad); } catch (NumberFormatException e) { logger.warn("Configuration key '{}' is not an integer. Falling back to the number of cores ({})", OPT_MAXLOAD, maxLoad); } } logger.info("Node maximum load set to {}", maxLoad); String address = InetAddress.getByName(URI.create(hostName).getHost()).getHostAddress(); long maxMemory = Runtime.getRuntime().maxMemory(); int cores = Runtime.getRuntime().availableProcessors(); registerHost(hostName, address, maxMemory, cores, maxLoad); } catch (Exception e) { throw new IllegalStateException("Unable to register host " + hostName + " in the service registry", e); } // Track any services from this host that need to be added to the service registry if (cc != null) { try { tracker = new RestServiceTracker(cc.getBundleContext()); tracker.open(true); } catch (InvalidSyntaxException e) { logger.error("Invalid filter syntax: {}", e); throw new IllegalStateException(e); } } // Whether a service accepts a job whose load exceeds the host’s max load if (cc != null) { acceptJobLoadsExeedingMaxLoad = getOptContextProperty(cc, ACCEPT_JOB_LOADS_EXCEEDING_PROPERTY).map(Strings.toBool) .getOrElse(DEFAULT_ACCEPT_JOB_LOADS_EXCEEDING); } } @Override public String getRegistryHostname() { return hostName; } public void deactivate() { logger.debug("deactivate"); for (ObjectInstance mbean : jmxBeans) { JmxUtil.unregisterMXBean(mbean); } if (tracker != null) { tracker.close(); } try { unregisterHost(hostName); } catch (ServiceRegistryException e) { throw new IllegalStateException("Unable to unregister host " + hostName + " from the service registry", e); } // Stop the job dispatcher if (scheduledExecutor != null) { scheduledExecutor.shutdownNow(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String) */ @Override public Job createJob(String type, String operation) throws ServiceRegistryException { return createJob(this.hostName, type, operation, null, null, true, getCurrentJob(), 1.0f); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, Float) */ @Override public Job createJob(String type, String operation, Float jobLoad) throws ServiceRegistryException { return createJob(this.hostName, type, operation, null, null, true, getCurrentJob(), jobLoad); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List) */ @Override public Job createJob(String type, String operation, List<String> arguments) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, null, true, getCurrentJob(), 1.0f); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, Float) */ @Override public Job createJob(String type, String operation, List<String> arguments, Float jobLoad) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, null, true, getCurrentJob(), jobLoad); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, java.lang.String) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, true, getCurrentJob(), 1.0f); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, java.lang.String, Float) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload, Float jobLoad) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, true, getCurrentJob(), jobLoad); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, String, boolean) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload, boolean dispatchable) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, dispatchable, getCurrentJob(), 1.0f); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, java.lang.String, boolean, Float) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload, boolean dispatchable, Float jobLoad) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, dispatchable, getCurrentJob(), jobLoad); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(String, String, List, String, boolean, Job) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload, boolean dispatchable, Job parentJob) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, dispatchable, parentJob, 1.0f); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#createJob(java.lang.String, java.lang.String, * java.util.List, java.lang.String, boolean, org.opencastproject.job.api.Job, Float) */ @Override public Job createJob(String type, String operation, List<String> arguments, String payload, boolean dispatchable, Job parentJob, Float jobLoad) throws ServiceRegistryException { return createJob(this.hostName, type, operation, arguments, payload, dispatchable, parentJob, jobLoad); } /** * Creates a job on a remote host with a jobLoad of 1.0. */ public Job createJob(String host, String serviceType, String operation, List<String> arguments, String payload, boolean dispatchable, Job parentJob) throws ServiceRegistryException { return createJob(host, serviceType, operation, arguments, payload, dispatchable, parentJob, 1.0f); } /** * Creates a job on a remote host. */ public Job createJob(String host, String serviceType, String operation, List<String> arguments, String payload, boolean dispatchable, Job parentJob, float jobLoad) throws ServiceRegistryException { if (StringUtils.isBlank(host)) { throw new IllegalArgumentException("Host can't be null"); } if (StringUtils.isBlank(serviceType)) { throw new IllegalArgumentException("Service type can't be null"); } if (StringUtils.isBlank(operation)) { throw new IllegalArgumentException("Operation can't be null"); } EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); ServiceRegistrationJpaImpl creatingService = getServiceRegistration(em, serviceType, host); if (creatingService == null) { throw new ServiceRegistryException("No service registration exists for type '" + serviceType + "' on host '" + host + "'"); } if (creatingService.getHostRegistration().isMaintenanceMode()) { logger.warn("Creating a job from {}, which is currently in maintenance mode.", creatingService.getHost()); } else if (!creatingService.getHostRegistration().isActive()) { logger.warn("Creating a job from {}, which is currently inactive.", creatingService.getHost()); } User currentUser = securityService.getUser(); Organization currentOrganization = securityService.getOrganization(); JpaJob jpaJob = new JpaJob(currentUser, currentOrganization, creatingService, operation, arguments, payload, dispatchable, jobLoad); // Bind the given parent job to the new job if (parentJob != null) { // Get the JPA instance of the parent job JpaJob jpaParentJob; try { jpaParentJob = getJpaJob(parentJob.getId()); } catch (NotFoundException e) { logger.error("{} not found in the persistence context", parentJob); throw new ServiceRegistryException(e); } jpaJob.setParentJob(jpaParentJob); // Get the JPA instance of the root job JpaJob jpaRootJob = jpaParentJob; if (parentJob.getRootJobId() != null) { try { jpaRootJob = getJpaJob(parentJob.getRootJobId()); } catch (NotFoundException e) { logger.error("job with id {} not found in the persistence context", parentJob.getRootJobId()); throw new ServiceRegistryException(e); } } jpaJob.setRootJob(jpaRootJob); } // if this job is not dispatchable, it must be handled by the host that has created it if (dispatchable) { jpaJob.setStatus(Status.QUEUED); } else { jpaJob.setProcessorServiceRegistration(creatingService); } em.persist(jpaJob); tx.commit(); setJobUri(jpaJob); Job job = jpaJob.toJob(); return job; } catch (RollbackException e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw e; } finally { if (em != null) em.close(); } } @Override public void removeJob(long jobId) throws NotFoundException, ServiceRegistryException { if (jobId < 1) throw new NotFoundException("Job ID must be greater than zero (0)"); logger.debug("Start deleting job with ID '{}'", jobId); EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); JpaJob job = em.find(JpaJob.class, jobId); if (job == null) throw new NotFoundException("Job with ID '" + jobId + "' not found"); deleteChildJobs(jobId); tx.begin(); em.remove(job); tx.commit(); logger.debug("Job with ID '{}' deleted", jobId); } catch (NotFoundException e) { throw e; } catch (Exception e) { logger.error("Unable to remove job {}: {}", jobId, e); if (tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } private void deleteChildJobs(long jobId) throws ServiceRegistryException { List<Job> childJobs = getChildJobs(jobId); if (childJobs.isEmpty()) { logger.debug("No child jobs of job '{}' found to delete.", jobId); return; } logger.debug("Start deleting child jobs of job '{}'", jobId); EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); for (int i = childJobs.size() - 1; i >= 0; i--) { Job job = childJobs.get(i); JpaJob jobToDelete = em.find(JpaJob.class, job.getId()); tx.begin(); em.remove(jobToDelete); tx.commit(); logger.debug("Job '{}' deleted", job.getId()); } logger.debug("Deleted all child jobs of job '{}'", jobId); } catch (Exception e) { logger.error("Unable to remove child jobs from {}: {}", jobId, e); if (tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } @Override public void removeParentlessJobs(int lifetime) throws ServiceRegistryException { EntityManager em = null; EntityTransaction tx = null; Date d = DateUtils.addDays(new Date(), -lifetime); int count = 0; try { em = emf.createEntityManager(); tx = em.getTransaction(); TypedQuery<JpaJob> query = em.createNamedQuery("Job.withoutParent", JpaJob.class); List<JpaJob> jobs = query.getResultList(); tx.begin(); for (JpaJob jpaJob : jobs) { Job job = jpaJob.toJob(); if (job.getDateCreated().after(d)) continue; // DO NOT DELETE workflow instances and operations! if (START_OPERATION.equals(job.getOperation()) || START_WORKFLOW.equals(job.getOperation()) || RESUME.equals(job.getOperation())) continue; if (job.getStatus().isTerminated()) { try { removeJob(job.getId()); logger.debug("Parentless job '{}' removed", job.getId()); count++; } catch (NotFoundException e) { logger.debug("Parentless job '{} ' not found in database: {}", job.getId(), e); } } } tx.commit(); if (count > 0) logger.info("Successfully removed {} parentless jobs", count); else logger.info("No parentless jobs found to remove", count); } finally { if (em != null) em.close(); } return; } /** * {@inheritDoc} * * @see org.osgi.service.cm.ManagedService#updated(java.util.Dictionary) */ @Override @SuppressWarnings("rawtypes") public void updated(Dictionary properties) throws ConfigurationException { logger.info("Updating service registry"); String maxAttempts = StringUtils.trimToNull((String) properties.get(MAX_ATTEMPTS_CONFIG_KEY)); if (maxAttempts != null) { try { maxAttemptsBeforeErrorState = Integer.parseInt(maxAttempts); logger.info("Set max attempts before error state to {}", maxAttempts); } catch (NumberFormatException e) { logger.warn("Can not set max attempts before error state to {}. {} must be an integer", maxAttempts, MAX_ATTEMPTS_CONFIG_KEY); } } long dispatchInterval = DEFAULT_DISPATCH_INTERVAL; String dispatchIntervalString = StringUtils.trimToNull((String) properties.get(OPT_DISPATCHINTERVAL)); if (StringUtils.isNotBlank(dispatchIntervalString)) { try { dispatchInterval = Long.parseLong(dispatchIntervalString); } catch (Exception e) { logger.warn("Dispatch interval '{}' is malformed, setting to {}", dispatchIntervalString, MIN_DISPATCH_INTERVAL); dispatchInterval = MIN_DISPATCH_INTERVAL; } if (dispatchInterval == 0) { logger.info("Dispatching disabled"); } else if (dispatchInterval < MIN_DISPATCH_INTERVAL) { logger.warn("Dispatch interval {} ms too low, adjusting to {}", dispatchInterval, MIN_DISPATCH_INTERVAL); dispatchInterval = MIN_DISPATCH_INTERVAL; } else { logger.info("Dispatch interval set to {} ms", dispatchInterval); } } long heartbeatInterval = DEFAULT_HEART_BEAT; String heartbeatIntervalString = StringUtils.trimToNull((String) properties.get(OPT_HEARTBEATINTERVAL)); if (StringUtils.isNotBlank(heartbeatIntervalString)) { try { heartbeatInterval = Long.parseLong(heartbeatIntervalString); } catch (Exception e) { logger.warn("Heartbeat interval '{}' is malformed, setting to {}", heartbeatIntervalString, DEFAULT_HEART_BEAT); heartbeatInterval = DEFAULT_HEART_BEAT; } if (heartbeatInterval == 0) { logger.info("Heartbeat disabled"); } else if (heartbeatInterval < 0) { logger.warn("Heartbeat interval {} minutes too low, adjusting to {}", heartbeatInterval, DEFAULT_HEART_BEAT); heartbeatInterval = DEFAULT_HEART_BEAT; } else { logger.info("Dispatch interval set to {} minutes", heartbeatInterval); } } String jobStatsString = StringUtils.trimToNull((String) properties.get(OPT_JOBSTATISTICS)); if (StringUtils.isNotBlank(jobStatsString)) { try { collectJobstats = Boolean.valueOf(jobStatsString); } catch (Exception e) { logger.warn("Job statistics collection flag '{}' is malformed, setting to {}", jobStatsString, DEFAULT_JOB_STATISTICS); collectJobstats = DEFAULT_JOB_STATISTICS; } } String maxJobAgeString = StringUtils.trimToNull((String) properties.get(OPT_SERVICE_STATISTICS_MAX_JOB_AGE)); if (maxJobAgeString != null) { try { maxJobAge = Integer.parseInt(maxJobAgeString); logger.info("Set service statistics max job age to {}", maxJobAgeString); } catch (NumberFormatException e) { logger.warn("Can not set service statistics max job age to {}. {} must be an integer", maxJobAgeString, OPT_SERVICE_STATISTICS_MAX_JOB_AGE); } } // Stop the current scheduled executors so we can configure new ones if (scheduledExecutor != null) { scheduledExecutor.shutdown(); } scheduledExecutor = Executors.newScheduledThreadPool(2); // Schedule the service heartbeat if the interval is > 0 if (heartbeatInterval > 0) { logger.debug("Starting service heartbeat at a custom interval of {}s", heartbeatInterval); scheduledExecutor.scheduleWithFixedDelay(new JobProducerHeartbeat(), heartbeatInterval, heartbeatInterval, TimeUnit.SECONDS); } // Schedule the job dispatching. if (dispatchInterval > 0) { logger.debug("Starting job dispatching at a custom interval of {}s", dispatchInterval / 1000); scheduledExecutor.scheduleWithFixedDelay(new JobDispatcher(), dispatchInterval, dispatchInterval, TimeUnit.MILLISECONDS); } } /** * OSGI callback when the configuration is updated. This method is only here to prevent the * configuration admin service from calling the service deactivate and activate methods * for a config update. It does not have to do anything as the updates are handled by updated(). */ public void modified(Map<String, Object> config) throws ConfigurationException { logger.debug("Modified serviceregistry"); } private JpaJob getJpaJob(long id) throws NotFoundException, ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); JpaJob jpaJob = em.find(JpaJob.class, id); if (jpaJob == null) { throw new NotFoundException("Job " + id + " not found"); } // JPA's caches can be out of date if external changes (e.g. another node in the cluster) have been made to // this row in the database em.refresh(jpaJob); setJobUri(jpaJob); return jpaJob; } catch (Exception e) { if (e instanceof NotFoundException) { throw (NotFoundException) e; } else { throw new ServiceRegistryException(e); } } finally { if (em != null) em.close(); } } @Override public Job getJob(long id) throws NotFoundException, ServiceRegistryException { return getJpaJob(id).toJob(); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getCurrentJob() */ @Override public Job getCurrentJob() { return currentJob.get(); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#setCurrentJob(Job) */ @Override public void setCurrentJob(Job job) { currentJob.set(job); } private JpaJob updateJob(JpaJob job) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Job oldJob = getJob(job.getId()); JpaJob jpaJob = updateInternal(em, job); // All WorkflowService Jobs will be ignored if (oldJob.getStatus() != job.getStatus() && !TYPE_WORKFLOW.equals(job.getJobType())) { updateServiceForFailover(job); } return jpaJob; } catch (PersistenceException e) { throw new ServiceRegistryException(e); } catch (NotFoundException e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } @Override public Job updateJob(Job job) throws ServiceRegistryException { JpaJob jpaJob = JpaJob.from(job); jpaJob.setProcessorServiceRegistration( (ServiceRegistrationJpaImpl) getServiceRegistration(job.getJobType(), job.getProcessingHost())); return updateJob(jpaJob).toJob(); } protected JpaJob setJobUri(JpaJob job) { try { job.setUri(new URI(jobHost + "/services/job/" + job.getId() + ".xml")); } catch (URISyntaxException e) { logger.warn("Can not set the job URI", e); } return job; } private Fn<JpaJob, JpaJob> fnSetJobUri() { return new Fn<JpaJob, JpaJob>() { @Override public JpaJob apply(JpaJob job) { return setJobUri(job); } }; } /** * Internal method to update a job, throwing unwrapped JPA exceptions. * * @param em * the current entity manager * @param job * the job to update * @return the updated job * @throws PersistenceException * if there is an exception thrown while persisting the job via JPA * @throws IllegalArgumentException */ protected JpaJob updateInternal(EntityManager em, JpaJob job) throws PersistenceException { EntityTransaction tx = em.getTransaction(); try { tx.begin(); JpaJob fromDb = em.find(JpaJob.class, job.getId()); if (fromDb == null) { throw new NoResultException(); } update(fromDb, job); em.merge(fromDb); tx.commit(); job.setVersion(fromDb.toJob().getVersion()); setJobUri(job); return job; } catch (PersistenceException e) { if (tx.isActive()) { tx.rollback(); } throw e; } } /** * Internal method to update the service registration state, throwing unwrapped JPA exceptions. * * @param em * the current entity manager * @param registration * the service registration to update * @return the updated service registration * @throws PersistenceException * if there is an exception thrown while persisting the job via JPA * @throws IllegalArgumentException */ private ServiceRegistration updateServiceState(EntityManager em, ServiceRegistrationJpaImpl registration) throws PersistenceException { EntityTransaction tx = em.getTransaction(); try { tx.begin(); ServiceRegistrationJpaImpl fromDb; fromDb = em.find(ServiceRegistrationJpaImpl.class, registration.getId()); if (fromDb == null) { throw new NoResultException(); } fromDb.setServiceState(registration.getServiceState()); fromDb.setStateChanged(registration.getStateChanged()); fromDb.setWarningStateTrigger(registration.getWarningStateTrigger()); fromDb.setErrorStateTrigger(registration.getErrorStateTrigger()); tx.commit(); servicesStatistics.updateService(registration); return registration; } catch (PersistenceException e) { if (tx.isActive()) { tx.rollback(); } throw e; } } /** * Sets the queue and runtimes and other elements of a persistent job based on a job that's been modified in memory. * Times on both the objects must be modified, since the in-memory job must not be stale. * * @param fromDb * The job from the database * @param jpaJob * The in-memory job */ private void update(JpaJob fromDb, JpaJob jpaJob) { final Job job = jpaJob.toJob(); final Date now = new Date(); final Status status = job.getStatus(); final Status fromDbStatus = fromDb.getStatus(); fromDb.setPayload(job.getPayload()); fromDb.setStatus(job.getStatus()); fromDb.setDispatchable(job.isDispatchable()); fromDb.setVersion(job.getVersion()); fromDb.setOperation(job.getOperation()); fromDb.setArguments(job.getArguments()); fromDb.setBlockedJobIds(job.getBlockedJobIds()); fromDb.setBlockingJobId(job.getBlockingJobId()); if (job.getDateCreated() == null) { jpaJob.setDateCreated(now); fromDb.setDateCreated(now); job.setDateCreated(now); } if (job.getProcessingHost() != null) { ServiceRegistrationJpaImpl processingService = (ServiceRegistrationJpaImpl) getServiceRegistration( job.getJobType(), job.getProcessingHost()); fromDb.setProcessorServiceRegistration(processingService); } if (Status.RUNNING.equals(status) && !Status.WAITING.equals(fromDbStatus)) { jpaJob.setDateStarted(now); jpaJob.setQueueTime(now.getTime() - job.getDateCreated().getTime()); fromDb.setDateStarted(now); fromDb.setQueueTime(now.getTime() - job.getDateCreated().getTime()); job.setDateStarted(now); job.setQueueTime(now.getTime() - job.getDateCreated().getTime()); } else if (Status.FAILED.equals(status)) { // failed jobs may not have even started properly fromDb.setDateCompleted(now); jpaJob.setDateCompleted(now); job.setDateCompleted(now); if (job.getDateStarted() != null) { jpaJob.setRunTime(now.getTime() - job.getDateStarted().getTime()); fromDb.setRunTime(now.getTime() - job.getDateStarted().getTime()); job.setRunTime(now.getTime() - job.getDateStarted().getTime()); } } else if (Status.FINISHED.equals(status)) { if (job.getDateStarted() == null) { // Some services (e.g. ingest) don't use job dispatching, since they start immediately and handle their own // lifecycle. In these cases, if the start date isn't set, use the date created as the start date jpaJob.setDateStarted(job.getDateCreated()); job.setDateStarted(job.getDateCreated()); } jpaJob.setDateCompleted(now); jpaJob.setRunTime(now.getTime() - job.getDateStarted().getTime()); fromDb.setDateCompleted(now); fromDb.setRunTime(now.getTime() - job.getDateStarted().getTime()); job.setDateCompleted(now); job.setRunTime(now.getTime() - job.getDateStarted().getTime()); } } /** * Fetches a host registration from persistence. * * @param em * an active entity manager * @param host * the host name * @return the host registration, or null if none exists */ protected HostRegistrationJpaImpl fetchHostRegistration(EntityManager em, String host) { Query query = em.createNamedQuery("HostRegistration.byHostName"); query.setParameter("host", host); try { return (HostRegistrationJpaImpl) query.getSingleResult(); } catch (NoResultException e) { logger.debug("No existing host registration for {}", host); return null; } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#registerHost(String, String, long, int, float) */ @Override public void registerHost(String host, String address, long memory, int cores, float maxLoad) throws ServiceRegistryException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); // Find the existing registrations for this host and if it exists, update it HostRegistrationJpaImpl hostRegistration = fetchHostRegistration(em, host); if (hostRegistration == null) { hostRegistration = new HostRegistrationJpaImpl(host, address, memory, cores, maxLoad, true, false); em.persist(hostRegistration); } else { hostRegistration.setIpAddress(address); hostRegistration.setMemory(memory); hostRegistration.setCores(cores); hostRegistration.setMaxLoad(maxLoad); hostRegistration.setOnline(true); em.merge(hostRegistration); } logger.info("Registering {} with a maximum load of {}", host, maxLoad); tx.commit(); hostsStatistics.updateHost(hostRegistration); } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#unregisterHost(java.lang.String) */ @Override public void unregisterHost(String host) throws ServiceRegistryException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); HostRegistrationJpaImpl existingHostRegistration = fetchHostRegistration(em, host); if (existingHostRegistration == null) { throw new ServiceRegistryException("Host '" + host + "' is not currently registered, so it can not be unregistered"); } else { existingHostRegistration.setOnline(false); for (ServiceRegistration serviceRegistration : getServiceRegistrationsByHost(host)) { unRegisterService(serviceRegistration.getServiceType(), serviceRegistration.getHost()); } em.merge(existingHostRegistration); } logger.info("Unregistering {}", host); tx.commit(); hostsStatistics.updateHost(existingHostRegistration); } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#enableHost(String) */ @Override public void enableHost(String host) throws ServiceRegistryException, NotFoundException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); // Find the existing registrations for this host and if it exists, update it HostRegistrationJpaImpl hostRegistration = fetchHostRegistration(em, host); if (hostRegistration == null) { throw new NotFoundException("Host '" + host + "' is currently not registered, so it can not be enabled"); } else { hostRegistration.setActive(true); em.merge(hostRegistration); } logger.info("Enabling {}", host); tx.commit(); tx.begin(); for (ServiceRegistration serviceRegistration : getServiceRegistrationsByHost(host)) { ServiceRegistrationJpaImpl registration = (ServiceRegistrationJpaImpl) serviceRegistration; registration.setActive(true); em.merge(registration); servicesStatistics.updateService(registration); } tx.commit(); hostsStatistics.updateHost(hostRegistration); } catch (NotFoundException e) { throw e; } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#disableHost(String) */ @Override public void disableHost(String host) throws ServiceRegistryException, NotFoundException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); HostRegistrationJpaImpl hostRegistration = fetchHostRegistration(em, host); if (hostRegistration == null) { throw new NotFoundException("Host '" + host + "' is not currently registered, so it can not be disabled"); } else { hostRegistration.setActive(false); for (ServiceRegistration serviceRegistration : getServiceRegistrationsByHost(host)) { ServiceRegistrationJpaImpl registration = (ServiceRegistrationJpaImpl) serviceRegistration; registration.setActive(false); em.merge(registration); servicesStatistics.updateService(registration); } em.merge(hostRegistration); } logger.info("Disabling {}", host); tx.commit(); hostsStatistics.updateHost(hostRegistration); } catch (NotFoundException e) { throw e; } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#registerService(java.lang.String, java.lang.String, * java.lang.String) */ @Override public ServiceRegistration registerService(String serviceType, String baseUrl, String path) throws ServiceRegistryException { return registerService(serviceType, baseUrl, path, false); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#registerService(java.lang.String, java.lang.String, * java.lang.String, boolean) */ @Override public ServiceRegistration registerService(String serviceType, String baseUrl, String path, boolean jobProducer) throws ServiceRegistryException { cleanRunningJobs(serviceType, baseUrl); return setOnlineStatus(serviceType, baseUrl, path, true, jobProducer); } protected ServiceRegistrationJpaImpl getServiceRegistration(EntityManager em, String serviceType, String host) { try { Query q = em.createNamedQuery("ServiceRegistration.getRegistration"); q.setParameter("serviceType", serviceType); q.setParameter("host", host); return (ServiceRegistrationJpaImpl) q.getSingleResult(); } catch (NoResultException e) { return null; } } /** * Sets the online status of a service registration. * * @param serviceType * The job type * @param baseUrl * the host URL * @param online * whether the service is online or off * @param jobProducer * whether this service produces jobs for long running operations * @return the service registration */ protected ServiceRegistration setOnlineStatus(String serviceType, String baseUrl, String path, boolean online, Boolean jobProducer) throws ServiceRegistryException { if (isBlank(serviceType) || isBlank(baseUrl)) { throw new IllegalArgumentException("serviceType and baseUrl must not be blank"); } EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); HostRegistrationJpaImpl hostRegistration = fetchHostRegistration(em, baseUrl); if (hostRegistration == null) { throw new IllegalStateException( "A service registration can not be updated when it has no associated host registration"); } ServiceRegistrationJpaImpl registration = getServiceRegistration(em, serviceType, baseUrl); if (registration == null) { if (isBlank(path)) { // we can not create a new registration without a path throw new IllegalArgumentException("path must not be blank when registering new services"); } if (jobProducer == null) { // if we are not provided a value, consider it to be false registration = new ServiceRegistrationJpaImpl(hostRegistration, serviceType, path, false); } else { registration = new ServiceRegistrationJpaImpl(hostRegistration, serviceType, path, jobProducer); } em.persist(registration); } else { if (StringUtils.isNotBlank(path)) registration.setPath(path); registration.setOnline(online); if (jobProducer != null) { // if we are not provided a value, don't update the persistent value registration.setJobProducer(jobProducer); } em.merge(registration); } tx.commit(); hostsStatistics.updateHost(hostRegistration); servicesStatistics.updateService(registration); return registration; } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#unRegisterService(java.lang.String, java.lang.String) */ @Override public void unRegisterService(String serviceType, String baseUrl) throws ServiceRegistryException { logger.info("Unregistering Service " + serviceType + "@" + baseUrl); // TODO: create methods that accept an entity manager, so we can execute multiple queries using the same em and tx setOnlineStatus(serviceType, baseUrl, null, false, null); cleanRunningJobs(serviceType, baseUrl); } /** Find all undispatchable jobs that were orphaned when this host was last deactivated and set them to CANCELED. */ private void cleanUndispatchableJobs(String hostName) { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); Query query = em.createNamedQuery("Job.undispatchable.status"); List<Integer> statuses = new ArrayList<Integer>(); statuses.add(Status.INSTANTIATED.ordinal()); statuses.add(Status.RUNNING.ordinal()); query.setParameter("statuses", statuses); @SuppressWarnings("unchecked") List<JpaJob> undispatchableJobs = query.getResultList(); for (JpaJob job : undispatchableJobs) { // Make sure the job was processed on this host String jobHost = ""; if (job.getProcessorServiceRegistration() != null) { jobHost = job.getProcessorServiceRegistration().getHost(); } if (!jobHost.equals(hostName)) { logger.debug("Will not cancel undispatchable job {}, it is running on a different host", job); } else { logger.info("Cancelling the running undispatchable job {}, it was orphaned on this host", job); job.setStatus(Status.CANCELED); em.merge(job); } } tx.commit(); } catch (Exception e) { logger.error("Unable to clean undispatchable jobs! {}", e.getMessage()); if (tx != null && tx.isActive()) { tx.rollback(); } } finally { if (em != null) em.close(); } } /** * Find all running jobs on this service and set them to RESET or CANCELED. * * @param serviceType * the service type * @param baseUrl * the base url * @throws ServiceRegistryException * if there is a problem communicating with the jobs database */ private void cleanRunningJobs(String serviceType, String baseUrl) throws ServiceRegistryException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); TypedQuery<JpaJob> query = em.createNamedQuery("Job.processinghost.status", JpaJob.class); query.setLockMode(LockModeType.PESSIMISTIC_WRITE); List<Integer> statuses = new ArrayList<Integer>(); statuses.add(Status.RUNNING.ordinal()); statuses.add(Status.DISPATCHING.ordinal()); statuses.add(Status.WAITING.ordinal()); query.setParameter("statuses", statuses); query.setParameter("host", baseUrl); query.setParameter("serviceType", serviceType); List<JpaJob> unregisteredJobs = query.getResultList(); for (JpaJob job : unregisteredJobs) { if (job.isDispatchable()) { em.refresh(job); // If this job has already been treated if (Status.CANCELED.equals(job.getStatus()) || Status.RESTART.equals(job.getStatus())) continue; if (job.getRootJob() != null && Status.PAUSED.equals(job.getRootJob().getStatus())) { JpaJob rootJob = job.getRootJob(); cancelAllChildren(rootJob, em); rootJob.setStatus(Status.RESTART); rootJob.setOperation(START_OPERATION); em.merge(rootJob); continue; } logger.info("Marking child jobs from job {} as canceled", job); cancelAllChildren(job, em); logger.info("Rescheduling lost job {}", job); job.setStatus(Status.RESTART); job.setProcessorServiceRegistration(null); } else { logger.info("Marking lost job {} as failed", job); job.setStatus(Status.FAILED); } em.merge(job); } tx.commit(); } catch (Exception e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Go through all the children recursively to set them in {@link Status#CANCELED} status * * @param job * the parent job * @param em * the entity manager */ private void cancelAllChildren(JpaJob job, EntityManager em) { for (JpaJob child : job.getChildJobs()) { em.refresh(child); if (Status.CANCELED.equals(job.getStatus())) continue; cancelAllChildren(child, em); child.setStatus(Status.CANCELED); em.merge(child); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#setMaintenanceStatus(java.lang.String, boolean) */ @Override public void setMaintenanceStatus(String baseUrl, boolean maintenance) throws NotFoundException { EntityManager em = null; EntityTransaction tx = null; try { em = emf.createEntityManager(); tx = em.getTransaction(); tx.begin(); HostRegistrationJpaImpl reg = fetchHostRegistration(em, baseUrl); if (reg == null) { throw new NotFoundException("Can not set maintenance mode on a host that has not been registered"); } reg.setMaintenanceMode(maintenance); em.merge(reg); tx.commit(); hostsStatistics.updateHost(reg); } catch (RollbackException e) { if (tx != null && tx.isActive()) { tx.rollback(); } throw e; } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceRegistrations() */ @Override public List<ServiceRegistration> getServiceRegistrations() { EntityManager em = null; try { em = emf.createEntityManager(); return getServiceRegistrations(em); } finally { if (em != null) em.close(); } } @Override public Incidents incident() { return incidents; } @SuppressWarnings("unchecked") private List<ServiceRegistration> getOnlineServiceRegistrations() { EntityManager em = null; try { em = emf.createEntityManager(); return em.createNamedQuery("ServiceRegistration.getAllOnline").getResultList(); } finally { if (em != null) em.close(); } } /** * Gets all service registrations. * * @param em * the current entity manager * @return the list of service registrations */ @SuppressWarnings("unchecked") protected List<ServiceRegistration> getServiceRegistrations(EntityManager em) { return em.createNamedQuery("ServiceRegistration.getAll").getResultList(); } /** * Gets all host registrations * * @return the list of host registrations */ @Override public List<HostRegistration> getHostRegistrations() { EntityManager em = null; try { em = emf.createEntityManager(); return getHostRegistrations(em); } finally { if (em != null) em.close(); } } /** * Gets all host registrations * * @param em * the current entity manager * @return the list of host registrations */ @SuppressWarnings("unchecked") protected List<HostRegistration> getHostRegistrations(EntityManager em) { return em.createNamedQuery("HostRegistration.getAll").getResultList(); } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getChildJobs(long) */ @Override public List<Job> getChildJobs(long id) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); TypedQuery<JpaJob> query = em.createNamedQuery("Job.root.children", JpaJob.class); query.setParameter("id", id); List<JpaJob> jobs = query.getResultList(); if (jobs.size() == 0) { jobs = getChildren(em, id); } return $(jobs).sort(new Comparator<JpaJob>() { @Override public int compare(JpaJob job1, JpaJob job2) { if (job1.getDateCreated() == null || job2.getDateCreated() == null) { return 0; } else { return job1.getDateCreated().compareTo(job2.getDateCreated()); } } }).map(fnSetJobUri()).map(fnToJob()).toList(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } @SuppressWarnings("unchecked") private List<JpaJob> getChildren(EntityManager em, long id) throws Exception { Query query = em.createNamedQuery("Job.children"); query.setParameter("id", id); List<JpaJob> childJobs = query.getResultList(); List<JpaJob> resultJobs = new ArrayList<>(childJobs); for (JpaJob childJob : childJobs) { resultJobs.addAll(getChildren(em, childJob.getId())); } return resultJobs; } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getJobs(java.lang.String, Status) */ @Override public List<Job> getJobs(String type, Status status) throws ServiceRegistryException { TypedQuery<JpaJob> query = null; EntityManager em = null; try { em = emf.createEntityManager(); if (type == null && status == null) { query = em.createNamedQuery("Job.all", JpaJob.class); } else if (type == null) { query = em.createNamedQuery("Job.status", JpaJob.class); query.setParameter("status", status.ordinal()); } else if (status == null) { query = em.createNamedQuery("Job.type", JpaJob.class); query.setParameter("serviceType", type); } else { query = em.createNamedQuery("Job", JpaJob.class); query.setParameter("status", status.ordinal()); query.setParameter("serviceType", type); } List<JpaJob> jobs = query.getResultList(); for (JpaJob job : jobs) { setJobUri(job); } return $(jobs).map(fnToJob()).toList(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getActiveJobs() */ @Override public List<Job> getActiveJobs() throws ServiceRegistryException { List<Status> statuses = new ArrayList<Status>(); for (Status status : Status.values()) { if (status.isActive()) statuses.add(status); } EntityManager em = null; try { em = emf.createEntityManager(); List<JpaJob> jpaJobs = getJobsByStatus(em, statuses.toArray(new Status[statuses.size()])); List<Job> jobs = new ArrayList<Job>(jpaJobs.size()); for (JpaJob jpaJob : jpaJobs) { jobs.add(jpaJob.toJob()); } return jobs; } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Get the list of jobs with status from the given statuses. * * @param em * the entity manager * @param statuses * variable sized array of status values to test on jobs * @return list of jobs with status from statuses * @throws ServiceRegistryException * if there is a problem communicating with the jobs database */ public List<JpaJob> getJobsByStatus(EntityManager em, Status... statuses) throws ServiceRegistryException { if (statuses == null || statuses.length < 1) throw new IllegalArgumentException("At least one job status must be given."); List<Integer> ordinalStatuses = new ArrayList<>(); for (Status status : statuses) { ordinalStatuses.add(status.ordinal()); } TypedQuery<JpaJob> query = null; try { query = em.createNamedQuery("Job.statuses", JpaJob.class); query.setParameter("statuses", ordinalStatuses); List<JpaJob> jpaJobs = query.getResultList(); for (JpaJob jpaJob : jpaJobs) { setJobUri(jpaJob); } return jpaJobs; } catch (Exception e) { throw new ServiceRegistryException(e); } } /** * Gets jobs of all types that are in the given state. * * @param em the entity manager * @param offset apply offset to the db query if offset > 0 * @param limit apply limit to the db query if limit > 0 * @param statuses the job status should be one from the given statuses * @return the list of jobs waiting for dispatch * @throws ServiceRegistryException if there is a problem communicating with the jobs database */ protected List<JpaJob> getDispatchableJobsWithStatus(EntityManager em, int offset, int limit, Status... statuses) throws ServiceRegistryException { if (statuses == null) return Collections.EMPTY_LIST; List<Integer> statusesOrdinal = new ArrayList<>(statuses.length); for (Status status : statuses) { statusesOrdinal.add(status.ordinal()); } TypedQuery<JpaJob> query = null; try { query = em.createNamedQuery("Job.dispatchable.status", JpaJob.class); query.setParameter("statuses", statusesOrdinal); if (offset > 0) query.setFirstResult(offset); if (limit > 0) query.setMaxResults(limit); return query.getResultList(); } catch (Exception e) { throw new ServiceRegistryException(e); } } /** * Return dispatchable job ids, where the job status is RESTART or QUEUED and the job id is listed in the given set. * * @param em the entity manager * @param jobIds set with job id's interested in * @return list with dispatchable job id's from the given set, with job status RESTART or QUEUED * @throws ServiceRegistryException if there is a problem communicating with the jobs database */ protected List<Long> getDispatchableJobsWithIdFilter(EntityManager em, Set<Long> jobIds) throws ServiceRegistryException { if (jobIds == null || jobIds.isEmpty()) return Collections.EMPTY_LIST; Query query = null; try { query = em.createNamedQuery("Job.dispatchable.status.idfilter"); query.setParameter("jobids", dispatchPriorityList.keySet()); query.setParameter("statuses", Arrays.asList(Status.RESTART.ordinal(), Status.QUEUED.ordinal())); return query.getResultList(); } catch (Exception e) { throw new ServiceRegistryException(e); } } @SuppressWarnings("unchecked") protected List<Object[]> getAvgOperations(EntityManager em) throws ServiceRegistryException { Query query = null; try { query = em.createNamedQuery("Job.avgOperation"); return query.getResultList(); } catch (Exception e) { throw new ServiceRegistryException(e); } } @SuppressWarnings("unchecked") List<Object[]> getCountPerHostService(EntityManager em) throws ServiceRegistryException { Query query = null; try { query = em.createNamedQuery("Job.countPerHostService"); return query.getResultList(); } catch (Exception e) { throw new ServiceRegistryException(e); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#count(java.lang.String, Status) */ @Override public long count(String serviceType, Status status) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Query query; if (serviceType == null && status == null) { query = em.createNamedQuery("Job.count.all"); } else if (serviceType == null) { query = em.createNamedQuery("Job.count.nullType"); query.setParameter("status", status.ordinal()); } else if (status == null) { query = em.createNamedQuery("Job.count.nullStatus"); query.setParameter("serviceType", serviceType); } else { query = em.createNamedQuery("Job.count"); query.setParameter("status", status.ordinal()); query.setParameter("serviceType", serviceType); } Number countResult = (Number) query.getSingleResult(); return countResult.longValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#countByHost(java.lang.String, java.lang.String, * Status) */ @Override public long countByHost(String serviceType, String host, Status status) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Query query = em.createNamedQuery("Job.countByHost"); query.setParameter("status", status.ordinal()); query.setParameter("serviceType", serviceType); query.setParameter("host", host); Number countResult = (Number) query.getSingleResult(); return countResult.longValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#countByOperation(java.lang.String, java.lang.String, * Status) */ @Override public long countByOperation(String serviceType, String operation, Status status) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Query query = em.createNamedQuery("Job.countByOperation"); query.setParameter("status", status.ordinal()); query.setParameter("serviceType", serviceType); query.setParameter("operation", operation); Number countResult = (Number) query.getSingleResult(); return countResult.longValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#count(java.lang.String, java.lang.String, * java.lang.String, Status) */ @Override public long count(String serviceType, String host, String operation, Status status) throws ServiceRegistryException { if (StringUtils.isBlank(serviceType) || StringUtils.isBlank(host) || StringUtils.isBlank(operation) || status == null) throw new IllegalArgumentException("service type, host, operation, and status must be provided"); Query query = null; EntityManager em = null; try { em = emf.createEntityManager(); query = em.createNamedQuery("Job.fullMonty"); query.setParameter("status", status.ordinal()); query.setParameter("serviceType", serviceType); query.setParameter("operation", operation); Number countResult = (Number) query.getSingleResult(); return countResult.longValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#countOfAbnormalServices() */ @Override public long countOfAbnormalServices() throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Query query = em.createNamedQuery("ServiceRegistration.countNotNormal"); Number count = (Number) query.getSingleResult(); return count.longValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceStatistics() */ @Override public List<ServiceStatistics> getServiceStatistics() throws ServiceRegistryException { Date now = new Date(); return getServiceStatistics( DateUtils.addDays(now, -maxJobAge), DateUtils.addDays(now, 1)); // Avoid glitches around 'now' by setting the endDate to 'tomorrow' } /** * Gets performance and runtime statistics for each known service registration. * For the statistics, only jobs created within the time interval [startDate, endDate] are being considered * * @param startDate * Only jobs created after this data are considered for statistics * @param endDate * Only jobs created before this data are considered for statistics * @return the service statistics * @throws ServiceRegistryException * if there is a problem accessing the service registry */ private List<ServiceStatistics> getServiceStatistics(Date startDate, Date endDate) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); Map<Long, JaxbServiceStatistics> statsMap = new HashMap<Long, JaxbServiceStatistics>(); // Make sure we also include the services that have no processing history so far List<ServiceRegistrationJpaImpl> services = em.createNamedQuery("ServiceRegistration.getAll").getResultList(); for (ServiceRegistrationJpaImpl s : services) { statsMap.put(s.getId(), new JaxbServiceStatistics(s)); } Query query = em.createNamedQuery("ServiceRegistration.statistics"); query.setParameter("minDateCreated", startDate, TemporalType.TIMESTAMP); query.setParameter("maxDateCreated", endDate, TemporalType.TIMESTAMP); List queryResults = query.getResultList(); for (Object result : queryResults) { Object[] oa = (Object[]) result; Number serviceRegistrationId = ((Number) oa[0]); if (serviceRegistrationId == null || serviceRegistrationId.longValue() == 0) continue; Status status = Status.values()[((Number) oa[1]).intValue()]; Number count = (Number) oa[2]; Number meanQueueTime = (Number) oa[3]; Number meanRunTime = (Number) oa[4]; // The statistics query returns a cartesian product, so we need to iterate over them to build up the objects JaxbServiceStatistics stats = statsMap.get(serviceRegistrationId.longValue()); if (stats == null) continue; // the status will be null if there are no jobs at all associated with this service registration if (status != null) { switch (status) { case RUNNING: stats.setRunningJobs(count.intValue()); break; case QUEUED: case DISPATCHING: stats.setQueuedJobs(count.intValue()); break; case FINISHED: stats.setMeanRunTime(meanRunTime.longValue()); stats.setMeanQueueTime(meanQueueTime.longValue()); stats.setFinishedJobs(count.intValue()); break; default: break; } } } List<ServiceStatistics> stats = new ArrayList<ServiceStatistics>(statsMap.values()); Collections.sort(stats, new Comparator<ServiceStatistics>() { @Override public int compare(ServiceStatistics o1, ServiceStatistics o2) { ServiceRegistration reg1 = o1.getServiceRegistration(); ServiceRegistration reg2 = o2.getServiceRegistration(); int typeComparison = reg1.getServiceType().compareTo(reg2.getServiceType()); return typeComparison == 0 ? reg1.getHost().compareTo(reg2.getHost()) : typeComparison; } }); return stats; } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Do not look at this, it will burn your eyes! This is due to JPA's inability to do a left outer join with join * conditions. * * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceRegistrationsByLoad(java.lang.String) */ @Override public List<ServiceRegistration> getServiceRegistrationsByLoad(String serviceType) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); SystemLoad loadByHost = getHostLoads(em, true); List<HostRegistration> hostRegistrations = getHostRegistrations(); List<ServiceRegistration> serviceRegistrations = getServiceRegistrationsByType(serviceType); return getServiceRegistrationsByLoad(serviceType, serviceRegistrations, hostRegistrations, loadByHost); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getCurrentHostLoads(boolean) */ @Override public SystemLoad getCurrentHostLoads(boolean activeOnly) { EntityManager em = null; try { em = emf.createEntityManager(); return getHostLoads(em, activeOnly); } finally { if (em != null) em.close(); } } /** * Gets a map of hosts to the number of jobs currently loading that host * * @param em * the entity manager * @param activeOnly * if true, the map will include only hosts that are online and have non-maintenance mode services * @return the map of hosts to job counts */ SystemLoad getHostLoads(EntityManager em, boolean activeOnly) { final SystemLoad systemLoad = new SystemLoad(); // Find all jobs that are currently running on any given host, or get all of them Query q = em.createNamedQuery("ServiceRegistration.hostloads"); List<Integer> statuses = new LinkedList<Integer>(); for (Status status : JOB_STATUSES_INFLUENCING_LOAD_BALANCING) { statuses.add(status.ordinal()); } q.setParameter("statuses", statuses); // Accumulate the numbers for relevant job statuses per host for (Object result : q.getResultList()) { Object[] resultArray = (Object[]) result; ServiceRegistrationJpaImpl service = (ServiceRegistrationJpaImpl) resultArray[0]; // Workflow related jobs are not counting. Workflows are load balanced by the workflow service directly if (TYPE_WORKFLOW.equals(service.getServiceType())) continue; Status status = Status.values()[(int) resultArray[1]]; float load = ((Number) resultArray[2]).floatValue(); if (activeOnly && (service.isInMaintenanceMode() || !service.isOnline())) { continue; } // Only queued, running and dispatching jobs are adding to the load, so every other status is discarded if (status == null || !JOB_STATUSES_INFLUENCING_LOAD_BALANCING.contains(status)) { load = 0.0f; } String host = service.getHost(); // Add the service registration NodeLoad serviceLoad; if (systemLoad.containsHost(host)) { serviceLoad = systemLoad.get(host); serviceLoad.setLoadFactor(serviceLoad.getLoadFactor() + load); } else { serviceLoad = new NodeLoad(host, load); } systemLoad.addNodeLoad(serviceLoad); } // This is important, otherwise services which have no current load are not listed in the output! for (HostRegistration h : getHostRegistrations(em)) { if (!systemLoad.containsHost(h.getBaseUrl())) { systemLoad.addNodeLoad(new NodeLoad(h.getBaseUrl(), 0.0f)); } } return systemLoad; } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceRegistrationsByType(java.lang.String) */ @SuppressWarnings("unchecked") @Override public List<ServiceRegistration> getServiceRegistrationsByType(String serviceType) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); return em.createNamedQuery("ServiceRegistration.getByType").setParameter("serviceType", serviceType) .getResultList(); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceRegistrationsByHost(java.lang.String) */ @SuppressWarnings("unchecked") @Override public List<ServiceRegistration> getServiceRegistrationsByHost(String host) throws ServiceRegistryException { EntityManager em = null; try { em = emf.createEntityManager(); return em.createNamedQuery("ServiceRegistration.getByHost").setParameter("host", host).getResultList(); } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getServiceRegistration(java.lang.String, * java.lang.String) */ @Override public ServiceRegistration getServiceRegistration(String serviceType, String host) { EntityManager em = null; try { em = emf.createEntityManager(); return getServiceRegistration(em, serviceType, host); } finally { if (em != null) em.close(); } } /** * A custom ServiceTracker that registers all locally published servlets so clients can find the most appropriate * service on the network to handle new jobs. */ class RestServiceTracker extends ServiceTracker { protected static final String FILTER = "(&(objectClass=javax.servlet.Servlet)(" + RestConstants.SERVICE_PATH_PROPERTY + "=*))"; protected BundleContext bundleContext = null; RestServiceTracker(BundleContext bundleContext) throws InvalidSyntaxException { super(bundleContext, bundleContext.createFilter(FILTER), null); this.bundleContext = bundleContext; } /** * {@inheritDoc} * * @see org.osgi.util.tracker.ServiceTracker#open(boolean) */ @Override public void open(boolean trackAllServices) { super.open(trackAllServices); try { ServiceReference[] references = bundleContext.getAllServiceReferences(null, FILTER); if (references != null) { for (ServiceReference ref : references) { addingService(ref); } } } catch (InvalidSyntaxException e) { throw new IllegalStateException("The tracker filter '" + FILTER + "' has syntax errors", e); } } @Override public Object addingService(ServiceReference reference) { String serviceType = (String) reference.getProperty(RestConstants.SERVICE_TYPE_PROPERTY); String servicePath = (String) reference.getProperty(RestConstants.SERVICE_PATH_PROPERTY); boolean publishFlag = (Boolean) reference.getProperty(RestConstants.SERVICE_PUBLISH_PROPERTY); boolean jobProducer = (Boolean) reference.getProperty(RestConstants.SERVICE_JOBPRODUCER_PROPERTY); // Only register services that have the "publish" flag set to "true" if (publishFlag) { try { registerService(serviceType, hostName, servicePath, jobProducer); } catch (ServiceRegistryException e) { logger.warn("Unable to register job producer of type " + serviceType + " on host " + hostName); } } else { logger.debug("Not registering service " + serviceType + " in service registry by configuration"); } return super.addingService(reference); } @Override public void removedService(ServiceReference reference, Object service) { String serviceType = (String) reference.getProperty(RestConstants.SERVICE_TYPE_PROPERTY); boolean publishFlag = (Boolean) reference.getProperty(RestConstants.SERVICE_PUBLISH_PROPERTY); // Services that have the "publish" flag set to "true" have been registered before. if (publishFlag) { try { unRegisterService(serviceType, hostName); } catch (ServiceRegistryException e) { logger.warn("Unable to unregister job producer of type " + serviceType + " on host " + hostName); } } else { logger.trace("Service " + reference + " was never registered"); } super.removedService(reference, service); } } /** * Sets the trusted http client. * * @param client * the trusted http client */ void setTrustedHttpClient(TrustedHttpClient client) { this.client = client; } /** * Callback for setting the security service. * * @param securityService * the securityService to set */ public void setSecurityService(SecurityService securityService) { this.securityService = securityService; } /** * Callback for setting the user directory service. * * @param userDirectoryService * the userDirectoryService to set */ public void setUserDirectoryService(UserDirectoryService userDirectoryService) { this.userDirectoryService = userDirectoryService; } /** * Sets a reference to the organization directory service. * * @param organizationDirectory * the organization directory */ public void setOrganizationDirectoryService(OrganizationDirectoryService organizationDirectory) { this.organizationDirectoryService = organizationDirectory; } /** OSGi DI. */ public void setIncidentService(IncidentService incidentService) { // Manually resolve the cyclic dependency between the incident service and the service registry ((OsgiIncidentService) incidentService).setServiceRegistry(this); this.incidents = new Incidents(this, incidentService); } /** * Update the jobs failure history and the service status with the given information. All these data are then use for * the jobs failover strategy. Only the terminated job (with FAILED or FINISHED status) are taken into account. * * @param job * the current job that failed/succeeded * @throws ServiceRegistryException * @throws IllegalArgumentException */ private void updateServiceForFailover(JpaJob job) throws IllegalArgumentException, ServiceRegistryException { if (job.getStatus() != Status.FAILED && job.getStatus() != Status.FINISHED) return; job.setStatus(job.getStatus(), job.getFailureReason()); // At this point, the only possible states for the current service are NORMAL and WARNING, // the services in ERROR state will not be chosen by the dispatcher ServiceRegistrationJpaImpl currentService = job.getProcessorServiceRegistration(); if (currentService == null) return; EntityManager em = emf.createEntityManager(); try { em = emf.createEntityManager(); // Job is finished with a failure if (job.getStatus() == FAILED && !DATA.equals(job.getFailureReason())) { // Services in WARNING or ERROR state triggered by current job List<ServiceRegistrationJpaImpl> relatedWarningOrErrorServices = getRelatedWarningErrorServices(job); // Before this job failed there was at least one job failed with this job signature on any service if (relatedWarningOrErrorServices.size() > 0) { for (ServiceRegistrationJpaImpl relatedService : relatedWarningOrErrorServices) { // Skip current service from the list if (currentService.equals(relatedService)) continue; // Reset the WARNING job to NORMAL if (relatedService.getServiceState() == WARNING) { logger.info("State reset to NORMAL for related service {} on host {}", relatedService.getServiceType(), relatedService.getHost()); relatedService.setServiceState(NORMAL, job.toJob().getSignature()); } // Reset the ERROR job to WARNING else if (relatedService.getServiceState() == ERROR) { logger.info("State reset to WARNING for related service {} on host {}", relatedService.getServiceType(), relatedService.getHost()); relatedService.setServiceState(WARNING, relatedService.getWarningStateTrigger()); } updateServiceState(em, relatedService); } } // This is the first job with this signature failing on any service else { // Set the current service to WARNING state if (currentService.getServiceState() == NORMAL) { logger.info("State set to WARNING for current service {} on host {}", currentService.getServiceType(), currentService.getHost()); currentService.setServiceState(WARNING, job.toJob().getSignature()); updateServiceState(em, currentService); } // The current service already is in WARNING state and max attempts is reached else if (getHistorySize(currentService) >= maxAttemptsBeforeErrorState) { logger.info("State set to ERROR for current service {} on host {}", currentService.getServiceType(), currentService.getHost()); currentService.setServiceState(ERROR, job.toJob().getSignature()); updateServiceState(em, currentService); } } } // Job is finished without failure else if (job.getStatus() == Status.FINISHED) { // If the service was in warning state reset to normal state if (currentService.getServiceState() == WARNING) { logger.info("State reset to NORMAL for current service {} on host {}", currentService.getServiceType(), currentService.getHost()); currentService.setServiceState(NORMAL); updateServiceState(em, currentService); } // Services in WARNING state triggered by current job List<ServiceRegistrationJpaImpl> relatedWarningServices = getRelatedWarningServices(job); // Sets all related services to error state for (ServiceRegistrationJpaImpl relatedService : relatedWarningServices) { logger.info("State set to ERROR for related service {} on host {}", currentService.getServiceType(), currentService.getHost()); relatedService.setServiceState(ERROR, job.toJob().getSignature()); updateServiceState(em, relatedService); } } } finally { if (em != null) em.close(); } } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#sanitize(java.lang.String, java.lang.String) */ @Override public void sanitize(String serviceType, String host) throws NotFoundException { EntityManager em = null; try { em = emf.createEntityManager(); ServiceRegistrationJpaImpl service = getServiceRegistration(em, serviceType, host); if (service == null) throw new NotFoundException(""); logger.info("State reset to NORMAL for service {} on host {} through santize method", service.getServiceType(), service.getHost()); service.setServiceState(NORMAL); updateServiceState(em, service); } finally { if (em != null) em.close(); } } /** * Gets the failed jobs history for the given service registration * * @param serviceRegistration * @return the failed jobs history size * @throws IllegalArgumentException * if parameter is null * @throws ServiceRegistryException */ private int getHistorySize(ServiceRegistration serviceRegistration) throws IllegalArgumentException, ServiceRegistryException { if (serviceRegistration == null) throw new IllegalArgumentException("serviceRegistration must not be null!"); Query query = null; EntityManager em = null; logger.debug("Try to get the number of jobs who failed on the service {}", serviceRegistration.toString()); try { em = emf.createEntityManager(); query = em.createNamedQuery("Job.count.history.failed"); query.setParameter("serviceType", serviceRegistration.getServiceType()); query.setParameter("host", serviceRegistration.getHost()); Number number = (Number) query.getSingleResult(); return number.intValue(); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Gets the services in WARNING state triggered by this job * * @param job * the given job to get the related services * @return a list of services triggered by the job * @throws IllegalArgumentException * if the given job was null * @throws ServiceRegistryException * if the there was a problem with the query */ private List<ServiceRegistrationJpaImpl> getRelatedWarningServices(JpaJob job) throws IllegalArgumentException, ServiceRegistryException { if (job == null) throw new IllegalArgumentException("job must not be null!"); Query query = null; EntityManager em = null; logger.debug("Try to get the services in WARNING state triggered by this job {} failed", job.toJob().getSignature()); try { em = emf.createEntityManager(); // TODO: modify the query to avoid to go through the list here query = em.createNamedQuery("ServiceRegistration.relatedservices.warning"); query.setParameter("serviceType", job.getJobType()); List<ServiceRegistrationJpaImpl> jpaServices = new ArrayList<ServiceRegistrationJpaImpl>(); @SuppressWarnings("unchecked") List<ServiceRegistrationJpaImpl> jobResults = query.getResultList(); for (ServiceRegistrationJpaImpl relatedService : jobResults) { if (relatedService.getWarningStateTrigger() == job.toJob().getSignature()) { jpaServices.add(relatedService); } } return jpaServices; } catch (NoResultException e) { return null; } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Gets the services in WARNING or ERROR state triggered by this job * * @param job * the given job to get the related services * @return a list of services triggered by the job * @throws IllegalArgumentException * if the given job was null * @throws ServiceRegistryException * if the there was a problem with the query */ private List<ServiceRegistrationJpaImpl> getRelatedWarningErrorServices(JpaJob job) throws ServiceRegistryException { if (job == null) throw new IllegalArgumentException("job must not be null!"); Query query = null; EntityManager em = null; logger.debug("Try to get the services in WARNING or ERROR state triggered by this job {} failed", job.toJob().getSignature()); try { em = emf.createEntityManager(); // TODO: modify the query to avoid to go through the list here query = em.createNamedQuery("ServiceRegistration.relatedservices.warning_error"); query.setParameter("serviceType", job.getJobType()); List<ServiceRegistrationJpaImpl> jpaServices = new ArrayList<ServiceRegistrationJpaImpl>(); @SuppressWarnings("unchecked") List<ServiceRegistrationJpaImpl> serviceResults = query.getResultList(); for (ServiceRegistrationJpaImpl relatedService : serviceResults) { if (relatedService.getServiceState() == WARNING && relatedService.getWarningStateTrigger() == job.toJob().getSignature()) { jpaServices.add(relatedService); } if (relatedService.getServiceState() == ERROR && relatedService.getErrorStateTrigger() == job.toJob().getSignature()) { jpaServices.add(relatedService); } } return jpaServices; } catch (NoResultException e) { return null; } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } /** * Returns a filtered list of service registrations, containing only those that are online, not in maintenance mode, * and with a specific service type that are running on a host which is not already maxed out. * * @param serviceRegistrations * the complete list of service registrations * @param hostRegistrations * the complete list of available host registrations * @param systemLoad * the map of hosts to the number of running jobs * @param jobType * the job type for which the services registrations are filtered */ protected List<ServiceRegistration> getServiceRegistrationsWithCapacity(String jobType, List<ServiceRegistration> serviceRegistrations, List<HostRegistration> hostRegistrations, final SystemLoad systemLoad) { final List<String> hostBaseUrls = $(hostRegistrations).map(toBaseUrl).toList(); final List<ServiceRegistration> filteredList = new ArrayList<ServiceRegistration>(); for (ServiceRegistration service : serviceRegistrations) { // Skip service if host not available if (!hostBaseUrls.contains(service.getHost())) { logger.trace("Not considering {} because it's host {} is not available for dispatching", service, service.getHost()); continue; } // Skip services that are not of the requested type if (!jobType.equals(service.getServiceType())) { logger.trace("Not considering {} because it is of the wrong job type", service); continue; } // Skip services that are in error state if (service.getServiceState() == ERROR) { logger.trace("Not considering {} because it is in error state", service); continue; } // Skip services that are in maintenance mode if (service.isInMaintenanceMode()) { logger.trace("Not considering {} because it is in maintenance mode", service); continue; } // Skip services that are marked as offline if (!service.isOnline()) { logger.trace("Not considering {} because it is currently offline", service); continue; } // Determine the maximum load for this host Float hostLoadMax = null; for (HostRegistration host : hostRegistrations) { if (host.getBaseUrl().equals(service.getHost())) { hostLoadMax = host.getMaxLoad(); break; } } if (hostLoadMax == null) logger.warn("Unable to determine max load for host {}", service.getHost()); // Determine the current load for this host Float hostLoad = systemLoad.get(service.getHost()).getLoadFactor(); if (hostLoad == null) logger.warn("Unable to determine current load for host {}", service.getHost()); // Is this host suited for processing? if (hostLoad == null || hostLoadMax == null || hostLoad < hostLoadMax) { logger.debug("Adding candidate service {} for processing of jobs of type '{}'", service, jobType); filteredList.add(service); } } // Sort the list by capacity Collections.sort(filteredList, new LoadComparator(systemLoad)); return filteredList; } /** * Returns a filtered list of service registrations, containing only those that are online, not in maintenance mode, * and with a specific service type, ordered by load. * * @param jobType * the job type for which the services registrations are filtered * @param serviceRegistrations * the complete list of service registrations * @param hostRegistrations * the complete list of available host registrations * @param systemLoad * */ protected List<ServiceRegistration> getServiceRegistrationsByLoad(String jobType, List<ServiceRegistration> serviceRegistrations, List<HostRegistration> hostRegistrations, final SystemLoad systemLoad) { final List<String> hostBaseUrls = $(hostRegistrations).map(toBaseUrl).toList(); final List<ServiceRegistration> filteredList = new ArrayList<ServiceRegistration>(); logger.debug("Finding services to dispatch job of type {}", jobType); for (ServiceRegistration service : serviceRegistrations) { // Skip service if host not available if (!hostBaseUrls.contains(service.getHost())) { logger.trace("Not considering {} because it's host {} is not available for dispatching", service, service.getHost()); continue; } // Skip services that are not of the requested type if (!jobType.equals(service.getServiceType())) { logger.trace("Not considering {} because it is of the wrong job type", service); continue; } // Skip services that are in error state if (service.getServiceState() == ERROR) { logger.trace("Not considering {} because it is in error state", service); continue; } // Skip services that are in maintenance mode if (service.isInMaintenanceMode()) { logger.trace("Not considering {} because it is in maintenance mode", service); continue; } // Skip services that are marked as offline if (!service.isOnline()) { logger.trace("Not considering {} because it is currently offline", service); continue; } // We found a candidate service logger.debug("Adding candidate service {} for processing of job of type '{}'", service, jobType); filteredList.add(service); } // Sort the list by capacity Collections.sort(filteredList, new LoadComparator(systemLoad)); return filteredList; } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getMaxLoads() */ @Override public SystemLoad getMaxLoads() throws ServiceRegistryException { final SystemLoad loads = new SystemLoad(); for (HostRegistration host : getHostRegistrations()) { NodeLoad load = new NodeLoad(host.getBaseUrl(), host.getMaxLoad()); loads.addNodeLoad(load); } return loads; } /** * {@inheritDoc} * * @see org.opencastproject.serviceregistry.api.ServiceRegistry#getMaxLoadOnNode(java.lang.String) */ @Override public NodeLoad getMaxLoadOnNode(String host) throws ServiceRegistryException, NotFoundException { Query query = null; EntityManager em = null; try { em = emf.createEntityManager(); query = em.createNamedQuery("HostRegistration.getMaxLoadByHostName"); query.setParameter("host", host); return new NodeLoad(host, ((Number) query.getSingleResult()).floatValue()); } catch (NoResultException e) { throw new NotFoundException(e); } catch (Exception e) { throw new ServiceRegistryException(e); } finally { if (em != null) em.close(); } } private final Fn<HostRegistration, String> toBaseUrl = new Fn<HostRegistration, String>() { @Override public String apply(HostRegistration h) { return h.getBaseUrl(); } }; /** * This dispatcher implementation will check for jobs in the QUEUED {@link Status}. If * new jobs are found, the dispatcher will attempt to dispatch each job to the least loaded service. */ class JobDispatcher implements Runnable { /** A list with job types that cannot be dispatched in each interation */ private List<String> undispatchableJobTypes = null; /** * {@inheritDoc} * * @see java.lang.Thread#run() */ @Override public void run() { logger.debug("Starting job dispatching"); undispatchableJobTypes = new ArrayList<String>(); EntityManager em = null; try { em = emf.createEntityManager(); // FIXME: the stats are not currently used and the queries are very // expense in database time. if (collectJobstats) { jobsStatistics.updateAvg(getAvgOperations(em)); jobsStatistics.updateJobCount(getCountPerHostService(em)); } if (!dispatchPriorityList.isEmpty()) { // Remove outdated jobs from priority list List<Long> jobIds = getDispatchableJobsWithIdFilter(em, dispatchPriorityList.keySet()); for (Long jobId : new HashSet<>(dispatchPriorityList.keySet())) { if (!jobIds.contains(jobId)) { dispatchPriorityList.remove(jobId); } } } int jobsOffset = 0; List<JpaJob> dispatchableJobs = null; List<JpaJob> workflowJobs = new ArrayList(); boolean jobsFound = false; do { // dispatch all dispatchable jobs with status restarted dispatchableJobs = getDispatchableJobsWithStatus(em, jobsOffset, DEFAULT_DISPATCH_JOBS_LIMIT, Status.RESTART); jobsOffset += DEFAULT_DISPATCH_JOBS_LIMIT; jobsFound = !dispatchableJobs.isEmpty(); // skip all jobs of type workflow, we will handle them next for (JpaJob job : dispatchableJobs) { if (TYPE_WORKFLOW.equals(job.getJobType())) { workflowJobs.add(job); } } if (dispatchableJobs.removeAll(workflowJobs) && dispatchableJobs.isEmpty()) continue; dispatchDispatchableJobs(em, dispatchableJobs); } while (jobsFound); jobsOffset = 0; jobsFound = false; do { // dispatch all dispatchable jobs with status queued dispatchableJobs = getDispatchableJobsWithStatus(em, jobsOffset, DEFAULT_DISPATCH_JOBS_LIMIT, Status.QUEUED); jobsOffset += DEFAULT_DISPATCH_JOBS_LIMIT; jobsFound = !dispatchableJobs.isEmpty(); // skip all jobs of type workflow, we will handle them next for (JpaJob job : dispatchableJobs) { if (TYPE_WORKFLOW.equals(job.getJobType())) { workflowJobs.add(job); } } if (dispatchableJobs.removeAll(workflowJobs) && dispatchableJobs.isEmpty()) continue; dispatchDispatchableJobs(em, dispatchableJobs); } while (jobsFound); if (!workflowJobs.isEmpty()) dispatchDispatchableJobs(em, workflowJobs); } catch (Throwable t) { logger.warn("Error dispatching jobs", t); } finally { undispatchableJobTypes = null; if (em != null) em.close(); } logger.debug("Finished job dispatching"); } /** * Dispatch the given jobs. * * @param em the entity manager * @param jobsToDispatch list with dispatchable jobs to dispatch */ private void dispatchDispatchableJobs(EntityManager em, List<JpaJob> jobsToDispatch) { for (JpaJob job : jobsToDispatch) { // Remember the job type String jobType = job.getJobType(); // Skip jobs that we already know can't be dispatched except of jobs in the priority list String jobSignature = new StringBuilder(jobType).append('@').append(job.getOperation()).toString(); if (undispatchableJobTypes.contains(jobSignature) && !dispatchPriorityList.keySet().contains(job.getId())) { logger.trace("Skipping dispatching of jobs {} with type '{}' for this round of dispatching", job.getId(), jobType); continue; } // Set the job's user and organization prior to dispatching String creator = job.getCreator(); String creatorOrganization = job.getOrganization(); // Try to load the organization. Organization organization = null; try { organization = organizationDirectoryService.getOrganization(creatorOrganization); securityService.setOrganization(organization); } catch (NotFoundException e) { logger.debug("Skipping dispatching of job for non-existing organization '{}'", creatorOrganization); continue; } // Try to load the user User user = userDirectoryService.loadUser(creator); if (user == null) { logger.warn("Unable to dispatch job {}: creator '{}' is not available", job.getId(), creator); continue; } securityService.setUser(user); // Start dispatching try { SystemLoad systemLoad = getHostLoads(em, true); List<ServiceRegistration> services = getServiceRegistrations(em); List<HostRegistration> hosts = $(getHostRegistrations(em)).filter(filterOutPriorityHosts._2(job.getId())) .toList(); List<ServiceRegistration> candidateServices = null; // Depending on whether this running job is trying to reach out to other services or whether this is an // attempt to execute the next operation in a workflow, choose either from a limited or from the full list // of services Job parentJob = null; try { if (job.getParentJob() != null) parentJob = getJob(job.getParentJob().getId()); } catch (NotFoundException e) { // That's ok } // When a job A starts a series of child jobs, then those child jobs should only be dispatched at the // same time if there is processing capacity available. boolean parentHasRunningChildren = false; if (parentJob != null) { for (Job child : getChildJobs(parentJob.getId())) { if (Status.RUNNING.equals(child.getStatus())) { parentHasRunningChildren = true; break; } } } // If this is a root job (a new workflow or a new workflow operation), then only dispatch if there is // capacity, i. e. the workflow service is ok dispatching the next workflow or the next workflow operation. if (parentJob == null || TYPE_WORKFLOW.equals(jobType) || parentHasRunningChildren) { logger.trace("Using available capacity only for dispatching of {} to a service of type '{}'", job, jobType); candidateServices = getServiceRegistrationsWithCapacity(jobType, services, hosts, systemLoad); } else { logger.trace("Using full list of services for dispatching of {} to a service of type '{}'", job, jobType); candidateServices = getServiceRegistrationsByLoad(jobType, services, hosts, systemLoad); } // Try to dispatch the job String hostAcceptingJob = null; try { hostAcceptingJob = dispatchJob(em, job, candidateServices); dispatchPriorityList.remove(job.getId()); } catch (ServiceUnavailableException e) { logger.debug("Jobs of type {} currently cannot be dispatched", job.getOperation()); // Don't mark workflow jobs as undispatchable to not impact worklfow operations if (!TYPE_WORKFLOW.equals(jobType)) undispatchableJobTypes.add(jobSignature); continue; } catch (UndispatchableJobException e) { logger.debug("Job {} currently cannot be dispatched", job.getId()); continue; } logger.debug("Job {} dispatched to {}", job.getId(), hostAcceptingJob); } catch (ServiceRegistryException e) { Throwable cause = (e.getCause() != null) ? e.getCause() : e; logger.error("Error dispatching job " + job, cause); } finally { securityService.setUser(null); securityService.setOrganization(null); } } } /** * Dispatches the job to the least loaded service that will accept the job, or throws a * <code>ServiceUnavailableException</code> if there is no such service. * * @param em * the current entity manager * @param job * the job to dispatch * @param services * a list of service registrations * @return the host that accepted the dispatched job, or <code>null</code> if no services took the job. * @throws ServiceRegistryException * if the service registrations are unavailable * @throws ServiceUnavailableException * if no service is available or if all available services refuse to take on more work * @throws UndispatchableJobException * if the current job cannot be processed */ protected String dispatchJob(EntityManager em, JpaJob job, List<ServiceRegistration> services) throws ServiceRegistryException, ServiceUnavailableException, UndispatchableJobException { if (services.size() == 0) { logger.debug("No service is currently available to handle jobs of type '" + job.getJobType() + "'"); throw new ServiceUnavailableException("No service of type " + job.getJobType() + " available"); } // Try the service registrations, after the first one finished, we quit; job.setStatus(Status.DISPATCHING); boolean triedDispatching = false; boolean onlyHighestMaxLoadHosts = false; final Float highestMaxLoad = $(services).map(toHostRegistration).map(toMaxLoad).sort(sortFloatValuesDesc).head2(); if (job.getJobLoad() > highestMaxLoad) { // None of the available hosts is able to accept the job due to less host load onlyHighestMaxLoadHosts = true; } for (ServiceRegistration registration : services) { job.setProcessorServiceRegistration((ServiceRegistrationJpaImpl) registration); // Skip registration of host with less max load than highest available max load if (onlyHighestMaxLoadHosts && job.getProcessorServiceRegistration().getHostRegistration().getMaxLoad() != highestMaxLoad) { continue; } try { job = updateInternal(em, job); } catch (Exception e) { // In theory, we should catch javax.persistence.OptimisticLockException. Unfortunately, eclipselink throws // org.eclipse.persistence.exceptions.OptimisticLockException. In order to avoid importing the implementation // specific APIs, we just catch Exception. logger.debug("Unable to dispatch {}. This is likely caused by another service registry dispatching the job", job); throw new UndispatchableJobException("Job " + job.getId() + " is already being dispatched"); } triedDispatching = true; String serviceUrl = UrlSupport.concat(registration.getHost(), registration.getPath(), "dispatch"); HttpPost post = new HttpPost(serviceUrl); // Add current organization and user so they can be used during execution at the remote end post.addHeader(ORGANIZATION_HEADER, securityService.getOrganization().getId()); post.addHeader(USER_HEADER, securityService.getUser().getUsername()); List<BasicNameValuePair> params = new ArrayList<BasicNameValuePair>(); params.add(new BasicNameValuePair("id", Long.toString(job.getId()))); params.add(new BasicNameValuePair("operation", job.getOperation())); post.setEntity(new UrlEncodedFormEntity(params, UTF_8)); // Post the request HttpResponse response = null; int responseStatusCode; try { logger.debug("Trying to dispatch job {} of type '{}' to {}", new String[] { Long.toString(job.getId()), job.getJobType(), registration.getHost() }); if (!START_WORKFLOW.equals(job.getOperation())) setCurrentJob(job.toJob()); response = client.execute(post); responseStatusCode = response.getStatusLine().getStatusCode(); if (responseStatusCode == HttpStatus.SC_NO_CONTENT) { return registration.getHost(); } else if (responseStatusCode == HttpStatus.SC_SERVICE_UNAVAILABLE) { logger.debug("Service {} is currently refusing to accept jobs of type {}", registration, job.getOperation()); continue; } else if (responseStatusCode == HttpStatus.SC_PRECONDITION_FAILED) { job.setStatus(Status.FAILED); job = updateJob(job); logger.debug("Service {} refused to accept {}", registration, job); throw new UndispatchableJobException(IOUtils.toString(response.getEntity().getContent())); } else if (responseStatusCode == HttpStatus.SC_METHOD_NOT_ALLOWED) { logger.debug("Service {} is not yet reachable", registration); continue; } else { logger.warn("Service {} failed ({}) accepting {}", new Object[] { registration, responseStatusCode, job }); continue; } } catch (UndispatchableJobException e) { throw e; } catch (Exception e) { logger.warn("Unable to dispatch job {}", job.getId(), e); } finally { client.close(response); setCurrentJob(null); } } // We've tried dispatching to every online service that can handle this type of job, with no luck. if (triedDispatching) { // Workflow type jobs are not set to priority list, because they handle accepting jobs not based on the job load // If the system don't accepts jobs whose load exceeds the host's max load we can't make use of the priority // list if (acceptJobLoadsExeedingMaxLoad && !dispatchPriorityList.containsKey(job.getId()) && !TYPE_WORKFLOW.equals(job.getJobType()) && job.getProcessorServiceRegistration() != null) { String host = job.getProcessorServiceRegistration().getHost(); dispatchPriorityList.put(job.getId(), host); } try { job.setStatus(Status.QUEUED); job.setProcessorServiceRegistration(null); job = updateJob(job); } catch (Exception e) { logger.error("Unable to put job back into queue", e); } } logger.debug("Unable to dispatch {}, no service is currently ready to accept the job", job); throw new UndispatchableJobException("Job " + job.getId() + " is currently undispatchable"); } private final Fn2<HostRegistration, Long, Boolean> filterOutPriorityHosts = new Fn2<HostRegistration, Long, Boolean>() { @Override public Boolean apply(HostRegistration host, Long jobId) { if (dispatchPriorityList.values().contains(host.getBaseUrl()) && !host.getBaseUrl().equals(dispatchPriorityList.get(jobId))) { return false; } return true; } }; private final Fn<ServiceRegistration, HostRegistration> toHostRegistration = new Fn<ServiceRegistration, HostRegistration>() { @Override public HostRegistration apply(ServiceRegistration s) { return ((ServiceRegistrationJpaImpl) s).getHostRegistration(); } }; private final Fn<HostRegistration, Float> toMaxLoad = new Fn<HostRegistration, Float>() { @Override public Float apply(HostRegistration h) { return h.getMaxLoad(); } }; private final Comparator<Float> sortFloatValuesDesc = new Comparator<Float>() { @Override public int compare(Float o1, Float o2) { return o2.compareTo(o1); } }; } /** A periodic check on each service registration to ensure that it is still alive. */ class JobProducerHeartbeat implements Runnable { /** List of service registrations that have been found unresponsive last time we checked */ private final List<ServiceRegistration> unresponsive = new ArrayList<ServiceRegistration>(); /** * {@inheritDoc} * * @see java.lang.Runnable#run() */ @Override public void run() { logger.debug("Checking for unresponsive services"); List<ServiceRegistration> serviceRegistrations = getOnlineServiceRegistrations(); for (ServiceRegistration service : serviceRegistrations) { hostsStatistics.updateHost(((ServiceRegistrationJpaImpl) service).getHostRegistration()); servicesStatistics.updateService(service); if (!service.isJobProducer()) continue; if (service.isInMaintenanceMode()) continue; // We think this service is online and available. Prove it. String serviceUrl = UrlSupport.concat(service.getHost(), service.getPath(), "dispatch"); HttpHead options = new HttpHead(serviceUrl); HttpResponse response = null; try { try { response = client.execute(options); if (response != null) { switch (response.getStatusLine().getStatusCode()) { case HttpStatus.SC_OK: // this service is reachable, continue checking other services logger.trace("Service " + service.toString() + " is responsive: " + response.getStatusLine()); if (unresponsive.remove(service)) { logger.info("Service {} is still online", service); } else if (!service.isOnline()) { try { setOnlineStatus(service.getServiceType(), service.getHost(), service.getPath(), true, true); logger.info("Service {} is back online", service); } catch (ServiceRegistryException e) { logger.warn("Error setting online status for {}", service); } } continue; default: if (!service.isOnline()) continue; logger.warn("Service {} is not working as expected: {}", service, response.getStatusLine()); } } else { logger.warn("Service {} does not respond: {}", service.toString()); } } catch (TrustedHttpClientException e) { if (!service.isOnline()) continue; logger.warn("Unable to reach {} : {}", service, e); } // If we get here, the service did not respond as expected try { if (unresponsive.contains(service)) { unRegisterService(service.getServiceType(), service.getHost()); unresponsive.remove(service); logger.warn("Marking {} as offline", service); } else { unresponsive.add(service); logger.warn("Added {} to the watch list", service); } } catch (ServiceRegistryException e) { logger.warn("Unable to unregister unreachable service: {} : {}", service, e); } } finally { client.close(response); } } logger.debug("Finished checking for unresponsive services"); } } /** * Comparator that will sort service registrations depending on their capacity, wich is defined by the number of jobs * the service's host is already running. The lower that number, the bigger the capacity. */ private static final class LoadComparator implements Comparator<ServiceRegistration> { private SystemLoad loadByHost = null; /** * Creates a new comparator which is using the given map of host names and loads. * * @param loadByHost * the current work load by host */ LoadComparator(SystemLoad loadByHost) { this.loadByHost = loadByHost; } @Override public int compare(ServiceRegistration serviceA, ServiceRegistration serviceB) { String hostA = serviceA.getHost(); String hostB = serviceB.getHost(); return Float.compare(loadByHost.get(hostA).getLoadFactor(), loadByHost.get(hostB).getLoadFactor()); } } /** * Comparator that will sort jobs according to their status. Those that were restarted are on top, those that are * queued are next. */ static final class DispatchableComparator implements Comparator<JpaJob> { @Override public int compare(JpaJob jobA, JpaJob jobB) { // Jobs that are in "restart" mode should be handled first if (Status.RESTART.equals(jobA.getStatus()) && !Status.RESTART.equals(jobB.getStatus())) { return -1; } else if (Status.RESTART.equals(jobB.getStatus()) && !Status.RESTART.equals(jobA.getStatus())) { return 1; } // Regular jobs should be processed prior to workflow and workflow operation jobs if (TYPE_WORKFLOW.equals(jobA.getJobType()) && !TYPE_WORKFLOW.equals(jobB.getJobType())) { return 1; } else if (TYPE_WORKFLOW.equals(jobB.getJobType()) && !TYPE_WORKFLOW.equals(jobA.getJobType())) { return -1; } // Use created date if (jobA.getDateCreated() != null && jobB.getDateCreated() != null) { if (jobA.getDateCreated().getTime() < jobB.getDateCreated().getTime()) return -1; else if (jobA.getDateCreated().getTime() > jobB.getDateCreated().getTime()) return 1; } // undecided return 0; } } }