/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.ipc; import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.AbstractQueue; import java.util.HashMap; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.Condition; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics2.util.MBeans; /** * A queue with multiple levels for each priority. */ public class FairCallQueue<E extends Schedulable> extends AbstractQueue<E> implements BlockingQueue<E> { @Deprecated public static final int IPC_CALLQUEUE_PRIORITY_LEVELS_DEFAULT = 4; @Deprecated public static final String IPC_CALLQUEUE_PRIORITY_LEVELS_KEY = "faircallqueue.priority-levels"; public static final Log LOG = LogFactory.getLog(FairCallQueue.class); /* The queues */ private final ArrayList<BlockingQueue<E>> queues; /* Read locks */ private final ReentrantLock takeLock = new ReentrantLock(); private final Condition notEmpty = takeLock.newCondition(); private void signalNotEmpty() { takeLock.lock(); try { notEmpty.signal(); } finally { takeLock.unlock(); } } /* Multiplexer picks which queue to draw from */ private RpcMultiplexer multiplexer; /* Statistic tracking */ private final ArrayList<AtomicLong> overflowedCalls; /** * Create a FairCallQueue. * @param capacity the maximum size of each sub-queue * @param ns the prefix to use for configuration * @param conf the configuration to read from * Notes: the FairCallQueue has no fixed capacity. Rather, it has a minimum * capacity of `capacity` and a maximum capacity of `capacity * number_queues` */ public FairCallQueue(int priorityLevels, int capacity, String ns, Configuration conf) { if(priorityLevels < 1) { throw new IllegalArgumentException("Number of Priority Levels must be " + "at least 1"); } int numQueues = priorityLevels; LOG.info("FairCallQueue is in use with " + numQueues + " queues."); this.queues = new ArrayList<BlockingQueue<E>>(numQueues); this.overflowedCalls = new ArrayList<AtomicLong>(numQueues); for(int i=0; i < numQueues; i++) { this.queues.add(new LinkedBlockingQueue<E>(capacity)); this.overflowedCalls.add(new AtomicLong(0)); } this.multiplexer = new WeightedRoundRobinMultiplexer(numQueues, ns, conf); // Make this the active source of metrics MetricsProxy mp = MetricsProxy.getInstance(ns); mp.setDelegate(this); } /** * Returns the first non-empty queue with equal or lesser priority * than <i>startIdx</i>. Wraps around, searching a maximum of N * queues, where N is this.queues.size(). * * @param startIdx the queue number to start searching at * @return the first non-empty queue with less priority, or null if * everything was empty */ private BlockingQueue<E> getFirstNonEmptyQueue(int startIdx) { final int numQueues = this.queues.size(); for(int i=0; i < numQueues; i++) { int idx = (i + startIdx) % numQueues; // offset and wrap around BlockingQueue<E> queue = this.queues.get(idx); if (queue.size() != 0) { return queue; } } // All queues were empty return null; } /* AbstractQueue and BlockingQueue methods */ /** * Put and offer follow the same pattern: * 1. Get the assigned priorityLevel from the call by scheduler * 2. Get the nth sub-queue matching this priorityLevel * 3. delegate the call to this sub-queue. * * But differ in how they handle overflow: * - Put will move on to the next queue until it lands on the last queue * - Offer does not attempt other queues on overflow */ @Override public void put(E e) throws InterruptedException { int priorityLevel = e.getPriorityLevel(); final int numLevels = this.queues.size(); while (true) { BlockingQueue<E> q = this.queues.get(priorityLevel); boolean res = q.offer(e); if (!res) { // Update stats this.overflowedCalls.get(priorityLevel).getAndIncrement(); // If we failed to insert, try again on the next level priorityLevel++; if (priorityLevel == numLevels) { // That was the last one, we will block on put in the last queue // Delete this line to drop the call this.queues.get(priorityLevel-1).put(e); break; } } else { break; } } signalNotEmpty(); } @Override public boolean offer(E e, long timeout, TimeUnit unit) throws InterruptedException { int priorityLevel = e.getPriorityLevel(); BlockingQueue<E> q = this.queues.get(priorityLevel); boolean ret = q.offer(e, timeout, unit); signalNotEmpty(); return ret; } @Override public boolean offer(E e) { int priorityLevel = e.getPriorityLevel(); BlockingQueue<E> q = this.queues.get(priorityLevel); boolean ret = q.offer(e); signalNotEmpty(); return ret; } @Override public E take() throws InterruptedException { int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); takeLock.lockInterruptibly(); try { // Wait while queue is empty for (;;) { BlockingQueue<E> q = this.getFirstNonEmptyQueue(startIdx); if (q != null) { // Got queue, so return if we can poll out an object E e = q.poll(); if (e != null) { return e; } } notEmpty.await(); } } finally { takeLock.unlock(); } } @Override public E poll(long timeout, TimeUnit unit) throws InterruptedException { int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); long nanos = unit.toNanos(timeout); takeLock.lockInterruptibly(); try { for (;;) { BlockingQueue<E> q = this.getFirstNonEmptyQueue(startIdx); if (q != null) { E e = q.poll(); if (e != null) { // Escape condition: there might be something available return e; } } if (nanos <= 0) { // Wait has elapsed return null; } try { // Now wait on the condition for a bit. If we get // spuriously awoken we'll re-loop nanos = notEmpty.awaitNanos(nanos); } catch (InterruptedException ie) { notEmpty.signal(); // propagate to a non-interrupted thread throw ie; } } } finally { takeLock.unlock(); } } /** * poll() provides no strict consistency: it is possible for poll to return * null even though an element is in the queue. */ @Override public E poll() { int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); BlockingQueue<E> q = this.getFirstNonEmptyQueue(startIdx); if (q == null) { return null; // everything is empty } // Delegate to the sub-queue's poll, which could still return null return q.poll(); } /** * Peek, like poll, provides no strict consistency. */ @Override public E peek() { BlockingQueue<E> q = this.getFirstNonEmptyQueue(0); if (q == null) { return null; } else { return q.peek(); } } /** * Size returns the sum of all sub-queue sizes, so it may be greater than * capacity. * Note: size provides no strict consistency, and should not be used to * control queue IO. */ @Override public int size() { int size = 0; for (BlockingQueue<E> q : this.queues) { size += q.size(); } return size; } /** * Iterator is not implemented, as it is not needed. */ @Override public Iterator<E> iterator() { throw new NotImplementedException(); } /** * drainTo defers to each sub-queue. Note that draining from a FairCallQueue * to another FairCallQueue will likely fail, since the incoming calls * may be scheduled differently in the new FairCallQueue. Nonetheless this * method is provided for completeness. */ @Override public int drainTo(Collection<? super E> c, int maxElements) { int sum = 0; for (BlockingQueue<E> q : this.queues) { sum += q.drainTo(c, maxElements); } return sum; } @Override public int drainTo(Collection<? super E> c) { int sum = 0; for (BlockingQueue<E> q : this.queues) { sum += q.drainTo(c); } return sum; } /** * Returns maximum remaining capacity. This does not reflect how much you can * ideally fit in this FairCallQueue, as that would depend on the scheduler's * decisions. */ @Override public int remainingCapacity() { int sum = 0; for (BlockingQueue<E> q : this.queues) { sum += q.remainingCapacity(); } return sum; } /** * MetricsProxy is a singleton because we may init multiple * FairCallQueues, but the metrics system cannot unregister beans cleanly. */ private static final class MetricsProxy implements FairCallQueueMXBean { // One singleton per namespace private static final HashMap<String, MetricsProxy> INSTANCES = new HashMap<String, MetricsProxy>(); // Weakref for delegate, so we don't retain it forever if it can be GC'd private WeakReference<FairCallQueue<? extends Schedulable>> delegate; // Keep track of how many objects we registered private int revisionNumber = 0; private MetricsProxy(String namespace) { MBeans.register(namespace, "FairCallQueue", this); } public static synchronized MetricsProxy getInstance(String namespace) { MetricsProxy mp = INSTANCES.get(namespace); if (mp == null) { // We must create one mp = new MetricsProxy(namespace); INSTANCES.put(namespace, mp); } return mp; } public void setDelegate(FairCallQueue<? extends Schedulable> obj) { this.delegate = new WeakReference<FairCallQueue<? extends Schedulable>>(obj); this.revisionNumber++; } @Override public int[] getQueueSizes() { FairCallQueue<? extends Schedulable> obj = this.delegate.get(); if (obj == null) { return new int[]{}; } return obj.getQueueSizes(); } @Override public long[] getOverflowedCalls() { FairCallQueue<? extends Schedulable> obj = this.delegate.get(); if (obj == null) { return new long[]{}; } return obj.getOverflowedCalls(); } @Override public int getRevision() { return revisionNumber; } } // FairCallQueueMXBean public int[] getQueueSizes() { int numQueues = queues.size(); int[] sizes = new int[numQueues]; for (int i=0; i < numQueues; i++) { sizes[i] = queues.get(i).size(); } return sizes; } public long[] getOverflowedCalls() { int numQueues = queues.size(); long[] calls = new long[numQueues]; for (int i=0; i < numQueues; i++) { calls[i] = overflowedCalls.get(i).get(); } return calls; } @VisibleForTesting public void setMultiplexer(RpcMultiplexer newMux) { this.multiplexer = newMux; } }