/** * Copyright © 2013 enioka. All rights reserved * Authors: Marc-Antoine GOUILLART (marc-antoine.gouillart@enioka.com) * Pierre COPPEE (pierre.coppee@enioka.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.enioka.jqm.tools; import java.lang.management.ManagementFactory; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import javax.management.InstanceNotFoundException; import javax.management.MBeanServer; import javax.management.ObjectName; import org.apache.log4j.Logger; import com.enioka.jqm.jdbc.DbConn; import com.enioka.jqm.jdbc.QueryResult; import com.enioka.jqm.model.DeploymentParameter; import com.enioka.jqm.model.JobInstance; import com.enioka.jqm.model.Queue; /** * A thread that polls a queue according to the parameters defined inside a {@link DeploymentParameter}. */ class QueuePoller implements Runnable, QueuePollerMBean { private static Logger jqmlogger = Logger.getLogger(QueuePoller.class); private Queue queue = null; private JqmEngine engine; private int maxNbThread = 10; private int pollingInterval = 10000; private boolean run = true; private AtomicInteger actualNbThread = new AtomicInteger(0); private boolean hasStopped = true; private Calendar lastLoop = null; private Map<Integer, Date> peremption = new ConcurrentHashMap<Integer, Date>(); private ObjectName name = null; private Thread localThread = null; private Semaphore loop; @Override public void stop() { jqmlogger.info("Poller " + queue.getName() + " has received a stop order"); run = false; if (localThread != null) { localThread.interrupt(); } } /** * Will make the thread ready to run once again after it has stopped. */ void reset() { if (!hasStopped) { throw new IllegalStateException("cannot reset a non stopped queue poller"); } hasStopped = false; run = true; lastLoop = null; loop = new Semaphore(0); } QueuePoller(JqmEngine engine, Queue q, int nbThreads, int interval) { jqmlogger.info("Engine " + engine.getNode().getName() + " will poll JobInstances on queue " + q.getName() + " every " + interval / 1000 + "s with " + nbThreads + " threads for concurrent instances"); this.engine = engine; this.queue = q; this.pollingInterval = interval; this.maxNbThread = nbThreads; reset(); registerMBean(); } private void registerMBean() { try { if (this.engine.loadJmxBeans) { MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); name = new ObjectName( "com.enioka.jqm:type=Node.Queue,Node=" + this.engine.getNode().getName() + ",name=" + this.queue.getName()); // Unregister MBean if it already exists. This may happen during frequent DP modifications. try { mbs.getMBeanInfo(name); mbs.unregisterMBean(name); } catch (InstanceNotFoundException e) { // Nothing to do, this should be the normal case. } mbs.registerMBean(this, name); } } catch (Exception e) { throw new JqmInitError("Could not create JMX beans", e); } } protected List<JobInstance> dequeue(DbConn cnx, int level) { // Free room? int usedSlots = actualNbThread.get(); if (usedSlots >= maxNbThread) { return null; } // Get the list of all jobInstance within the defined queue, ordered by position QueryResult qr = cnx.runUpdate("ji_update_poll", this.engine.getNode().getId(), queue.getId(), maxNbThread - usedSlots); if (qr.nbUpdated > 0) { jqmlogger.debug("Poller has found " + qr.nbUpdated + " JI to run"); } if (qr.nbUpdated > 0) { List<JobInstance> res = JobInstance.select(cnx, "ji_select_to_run", this.engine.getNode().getId(), queue.getId()); if (res.size() == qr.nbUpdated) { cnx.commit(); return res; } else if (level <= 3) { // Try again. This means the jobs marked for exec on previous loop have not already started. // So they were still in the SELECT WHERE STATE='ATTRIBUTED'. // Happens when loop interval is too low (ms and not s), so only happens during tests. jqmlogger.info("Polling interval seems too low"); cnx.rollback(); Thread.yield(); return dequeue(cnx, level + 1); } else { // Simply give up. The engine is under such a heavy load that we should first deal with the backlog. return null; } } else { return null; } } @Override synchronized public void run() // sync: avoid race condition on run when restarting after failure. { this.localThread = Thread.currentThread(); this.localThread.setName("QUEUE_POLLER;polling;" + this.queue.getName()); DbConn cnx = null; while (true) { lastLoop = Calendar.getInstance(); jqmlogger.trace("poller loop"); try { // Get a JI to run cnx = Helpers.getNewDbSession(); List<JobInstance> newInstances = dequeue(cnx, 1); if (newInstances != null) { for (JobInstance ji : newInstances) { // We will run this JI! jqmlogger.trace("JI number " + ji.getId() + " will be run by this poller this loop (already " + actualNbThread + "/" + maxNbThread + " on " + this.queue.getName() + ")"); actualNbThread.incrementAndGet(); if (ji.getJD().getMaxTimeRunning() != null) { this.peremption.put(ji.getId(), new Date((new Date()).getTime() + ji.getJD().getMaxTimeRunning() * 60 * 1000)); } // Run it if (!ji.getJD().isExternal()) { (new Thread(new Loader(ji, this.engine, this, this.engine.getClassloaderManager()))).start(); } else { (new Thread(new LoaderExternal(cnx, ji, this))).start(); } } } } catch (RuntimeException e) { if (Helpers.testDbFailure(e)) { jqmlogger.error("connection to database lost - stopping poller"); jqmlogger.trace("connection error was:", e.getCause()); this.hasStopped = true; this.engine.pollerRestartNeeded(this); break; } else { jqmlogger.error("Queue poller has failed! It will stop.", e); this.run = false; this.hasStopped = true; break; } } finally { // Reset the connection on each loop. Helpers.closeQuietly(cnx); } // Wait according to the deploymentParameter try { loop.tryAcquire(this.pollingInterval, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { run = false; break; } // Exit if asked to if (!run) { break; } } if (!run) { // Run is true only if the loop has exited abnormally, in which case the engine should try to restart the poller // So only do the graceful shutdown procedure if normal shutdown. jqmlogger .info("Poller loop on queue " + this.queue.getName() + " is stopping [engine " + this.engine.getNode().getName() + "]"); waitForAllThreads(60L * 1000); // JMX if (this.engine.loadJmxBeans) { try { ManagementFactory.getPlatformMBeanServer().unregisterMBean(name); } catch (Exception e) { jqmlogger.error("Could not unregister JMX beans", e); } } // Let the engine decide if it should stop completely this.hasStopped = true; // BEFORE check jqmlogger.info("Poller on queue " + this.queue.getName() + " has ended normally"); this.engine.checkEngineEnd(); } else { // else => Abnormal stop (DB failure only). Set booleans to reflect this. jqmlogger.error("Poller on queue " + this.queue.getName() + " has ended abnormally"); this.run = false; this.hasStopped = true; // Do not check for engine end - we do not want to shut down the engine on a poller failure. } } @Override public Integer getCurrentActiveThreadCount() { return actualNbThread.get(); } /** * Called when a payload thread has ended. This notifies the poller to free a slot and poll once again. */ void decreaseNbThread(int jobId) { this.peremption.remove(jobId); this.actualNbThread.decrementAndGet(); loop.release(1); this.engine.signalEndOfRun(); } boolean isRunning() { return !this.hasStopped; } private void waitForAllThreads(long timeOutMs) { long timeWaitedMs = 0; long stepMs = 1000; while (timeWaitedMs <= timeOutMs) { jqmlogger.trace("Waiting the end of " + actualNbThread + " job(s)"); if (actualNbThread.get() == 0) { break; } if (timeWaitedMs == 0) { jqmlogger.info("Waiting for the end of " + actualNbThread + " jobs on queue " + this.queue.getName() + " - timeout is " + timeOutMs + "ms"); } try { Thread.sleep(stepMs); } catch (InterruptedException e) { // Interruption => stop right now jqmlogger.warn("Some job instances did not finish in time - wait was interrupted"); Thread.currentThread().interrupt(); return; } timeWaitedMs += stepMs; } if (timeWaitedMs > timeOutMs) { jqmlogger.warn("Some job instances did not finish in time - they will be killed for the poller to be able to stop"); } } Queue getQueue() { return this.queue; } JqmEngine getEngine() { return this.engine; } void setMaxThreads(int max) { if (this.maxNbThread > 0 && max == 0) { jqmlogger.info("Poller is being paused - it won't fetch any new job instances until it is resumed."); } else if (this.maxNbThread == 0 && max > 0) { jqmlogger.info("Poller is being resumed"); } this.maxNbThread = max; } void setPollingInterval(int ms) { this.pollingInterval = ms; } // ////////////////////////////////////////////////////////// // JMX // ////////////////////////////////////////////////////////// @Override public long getCumulativeJobInstancesCount() { DbConn em2 = Helpers.getNewDbSession(); try { return em2.runSelectSingle("history_select_count_for_poller", Long.class, this.queue.getId(), this.engine.getNode().getId()); } finally { Helpers.closeQuietly(em2); } } @Override public float getJobsFinishedPerSecondLastMinute() { DbConn em2 = Helpers.getNewDbSession(); try { return em2.runSelectSingle("history_select_count_last_mn_for_poller", Float.class, this.queue.getId(), this.engine.getNode().getId()); } finally { Helpers.closeQuietly(em2); } } @Override public long getCurrentlyRunningJobCount() { return this.actualNbThread.get(); } @Override public Integer getPollingIntervalMilliseconds() { return this.pollingInterval; } @Override public Integer getMaxConcurrentJobInstanceCount() { return this.maxNbThread; } @Override public boolean isActuallyPolling() { // 100ms is a rough estimate of the time taken to do the actual poll. If it's more, there is a huge issue elsewhere. return (Calendar.getInstance().getTimeInMillis() - this.lastLoop.getTimeInMillis()) <= pollingInterval + 100; } @Override public boolean isFull() { return this.actualNbThread.get() >= maxNbThread; } @Override public int getLateJobs() { int i = 0; Date now = new Date(); for (Date d : this.peremption.values()) { if (now.after(d)) { i++; } } return i; } }