/* * Copyright 2014 Ben Manes. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.benmanes.caffeine; import static java.util.Objects.requireNonNull; import java.io.InvalidObjectException; import java.io.ObjectInputStream; import java.io.Serializable; import java.util.AbstractQueue; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import java.util.Queue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import javax.annotation.Nonnull; import javax.annotation.Nullable; import com.github.benmanes.caffeine.SingleConsumerQueue.Node; import com.github.benmanes.caffeine.base.UnsafeAccess; /** * A lock-free unbounded queue based on linked nodes that supports concurrent producers and is * restricted to a single consumer. This queue orders elements FIFO (first-in-first-out). The * <em>head</em> of the queue is that element that has been on the queue the longest time. The * <em>tail</em> of the queue is that element that has been on the queue the shortest time. New * elements are inserted at the tail of the queue, and the queue retrieval operations obtain * elements at the head of the queue. Like most other concurrent collection implementations, this * class does not permit the use of {@code null} elements. * <p> * A {@code SingleConsumerQueue} is an appropriate choice when many producer threads will share * access to a common collection and a single consumer thread drains it. This collection is useful * in scenarios such as implementing flat combining, actors, or lock amortization. * <p> * This implementation employs combination to transfer elements between threads that are producing * concurrently. This approach avoids contention on the queue by combining colliding operations * that have identical semantics. When a pair of producers collide, the task of performing the * combined set of operations is delegated to one of the threads and the other thread optionally * waits for its operation to be completed. This decision of whether to wait for completion is * determined by constructing either a <em>linearizable</em> or <em>optimistic</em> queue. * <p> * Iterators are <i>weakly consistent</i>, returning elements reflecting the state of the queue at * some point at or since the creation of the iterator. They do <em>not</em> throw {@link * java.util.ConcurrentModificationException}, and may proceed concurrently with other operations. * Elements contained in the queue since the creation of the iterator will be returned exactly once. * <p> * Beware that it is the responsibility of the caller to ensure that a consumer has exclusive read * access to the queue. This implementation does <em>not</em> include fail-fast behavior to guard * against incorrect consumer usage. * <p> * Beware that, unlike in most collections, the {@code size} method is <em>NOT</em> a * constant-time operation. Because of the asynchronous nature of these queues, determining the * current number of elements requires a traversal of the elements, and so may report inaccurate * results if this collection is modified during traversal. * <p> * <b>Warning:</b> This class is scheduled for removal in version <tt>3.0.0</tt>. * * @author ben.manes@gmail.com (Ben Manes) * @param <E> the type of elements held in this collection */ public final class SingleConsumerQueue<E> extends SCQHeader.HeadAndTailRef<E> implements Queue<E>, Serializable { /* * The queue is represented as a singly-linked list with an atomic head and tail reference. It is * based on the non-intrusive multi-producer / single-consumer node queue described by * Dmitriy Vyukov [1]. * * The backoff strategy of combining operations with identical semantics is based on inverting * the elimination technique [2]. Elimination allows pairs of operations with reverse semantics, * like pushes and pops on a stack, to complete without any central coordination and therefore * substantially aids scalability. The approach of applying elimination and reversing its * semantics was explored in [3, 4]. Unlike other approaches, this implementation does not use * opcodes or a background thread. * * This implementation borrows optimizations from {@link java.util.concurrent.Exchanger} for * choosing an arena location and awaiting a match [5]. * * [1] Non-intrusive MPSC node-based queue * http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue * [2] A Scalable Lock-free Stack Algorithm * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.156.8728 * [3] Using Elimination to Implement Scalable and Lock-Free FIFO Queues * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.108.6422 * [4] A Dynamic Elimination-Combining Stack Algorithm * http://www.cs.bgu.ac.il/~hendlerd/papers/DECS.pdf * [5] A Scalable Elimination-based Exchange Channel * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.59.7396 */ /** The number of CPUs */ static final int NCPU = Runtime.getRuntime().availableProcessors(); /** The number of slots in the elimination array. */ static final int ARENA_LENGTH = ceilingPowerOfTwo((NCPU + 1) / 2); /** The mask value for indexing into the arena. */ static final int ARENA_MASK = ARENA_LENGTH - 1; /** The factory for creating an optimistic node. */ static final Function<?, ?> OPTIMISIC = Node<Object>::new; /** * The number of times to spin (doing nothing except polling a memory location) before giving up * while waiting to eliminate an operation. Should be zero on uniprocessors. On multiprocessors, * this value should be large enough so that two threads exchanging items as fast as possible * block only when one of them is stalled (due to GC or preemption), but not much longer, to avoid * wasting CPU resources. Seen differently, this value is a little over half the number of cycles * of an average context switch time on most systems. The value here is approximately the average * of those across a range of tested systems. */ static final int SPINS = (NCPU == 1) ? 0 : 2000; /** The offset to the thread-specific probe field. */ static final long PROBE = UnsafeAccess.objectFieldOffset(Thread.class, "threadLocalRandomProbe"); static int ceilingPowerOfTwo(int x) { // From Hacker's Delight, Chapter 3, Harry S. Warren Jr. return 1 << -Integer.numberOfLeadingZeros(x - 1); } final AtomicReference<Node<E>>[] arena; final Function<E, Node<E>> factory; @SuppressWarnings({"unchecked", "rawtypes"}) private SingleConsumerQueue(Function<E, Node<E>> factory) { arena = new AtomicReference[ARENA_LENGTH]; for (int i = 0; i < ARENA_LENGTH; i++) { arena[i] = new AtomicReference<>(); } Node<E> node = new Node<>(null); this.factory = factory; lazySetTail(node); head = node; } /** * Creates a queue with an optimistic backoff strategy. A thread completes its operation * without waiting after it successfully hands off the additional element(s) to another producing * thread for batch insertion. This optimistic behavior may result in additions not appearing in * FIFO order due to the backoff strategy trying to compensate for queue contention. * * @param <E> the type of elements held in this collection * @return a new queue where producers complete their operation immediately if combined with * another producing thread's */ public static <E> SingleConsumerQueue<E> optimistic() { @SuppressWarnings("unchecked") Function<E, Node<E>> factory = (Function<E, Node<E>>) OPTIMISIC; return new SingleConsumerQueue<>(factory); } /** * Creates a queue with a linearizable backoff strategy. A thread waits for a completion * signal if it successfully hands off the additional element(s) to another producing * thread for batch insertion. * * @param <E> the type of elements held in this collection * @return a new queue where producers wait for a completion signal after combining its addition * with another producing thread's */ public static <E> SingleConsumerQueue<E> linearizable() { return new SingleConsumerQueue<>(LinearizableNode<E>::new); } @Override public boolean isEmpty() { return (head == tail); } @Override public int size() { Node<E> cursor = head; Node<E> t = tail; int size = 0; while ((cursor != t) && (size != Integer.MAX_VALUE)) { Node<E> next = cursor.getNextRelaxed(); if (next == null) { while ((next = cursor.next) == null) {} } cursor = next; size++; } return size; } @Override public boolean contains(Object o) { if (o == null) { return false; } for (Iterator<E> it = iterator(); it.hasNext();) { if (o.equals(it.next())) { return true; } } return false; } @Override public E peek() { Node<E> h = head; Node<E> t = tail; if (h == t) { return null; } Node<E> next = h.getNextRelaxed(); if (next == null) { while ((next = h.next) == null) {} } return next.value; } @Override public boolean offer(E e) { requireNonNull(e); Node<E> node = factory.apply(e); append(node, node); return true; } @Override public E poll() { Node<E> h = head; Node<E> next = h.getNextRelaxed(); if (next == null) { if (h == tail) { return null; } else { while ((next = h.next) == null) {} } } E e = next.value; next.value = null; head = next; if (factory == OPTIMISIC) { h.next = null; // prevent nepotism } return e; } @Override public boolean add(E e) { return offer(e); } @Override public boolean addAll(Collection<? extends E> c) { requireNonNull(c); Node<E> first = null; Node<E> last = null; for (E e : c) { requireNonNull(e); if (first == null) { first = factory.apply(e); last = first; } else { Node<E> newLast = new Node<>(e); last.lazySetNext(newLast); last = newLast; } } if (first == null) { return false; } append(first, last); return true; } /** Adds the linked list of nodes to the queue. */ void append(@Nonnull Node<E> first, @Nonnull Node<E> last) { for (;;) { Node<E> t = tail; if (casTail(t, last)) { t.lazySetNext(first); if (factory == OPTIMISIC) { return; } for (;;) { first.complete(); if (first == last) { return; } Node<E> next = first.getNextRelaxed(); if (next.value == null) { first.next = null; // reduce nepotism } first = next; } } Node<E> node = transferOrCombine(first, last); if (node == null) { first.await(); return; } else if (node != first) { last = node; } } } /** * Attempts to receive a linked list from a waiting producer or transfer the specified linked list * to an arriving producer. * * @param first the first node in the linked list to try to transfer * @param last the last node in the linked list to try to transfer * @return either {@code null} if the element was transferred, the first node if neither a * transfer nor receive were successful, or the received last element from a producer */ @Nullable Node<E> transferOrCombine(@Nonnull Node<E> first, Node<E> last) { int index = index(); AtomicReference<Node<E>> slot = arena[index]; for (;;) { Node<E> found = slot.get(); if (found == null) { if (slot.compareAndSet(null, first)) { for (int spin = 0; spin < SPINS; spin++) { if (slot.get() != first) { return null; } } return slot.compareAndSet(first, null) ? first : null; } } else if (slot.compareAndSet(found, null)) { last.lazySetNext(found); last = findLast(found); for (int i = 1; i < ARENA_LENGTH; i++) { slot = arena[(i + index) & ARENA_MASK]; found = slot.get(); if ((found != null) && slot.compareAndSet(found, null)) { last.lazySetNext(found); last = findLast(found); } } return last; } } } /** Returns the arena index for the current thread. */ static int index() { int probe = UnsafeAccess.UNSAFE.getInt(Thread.currentThread(), PROBE); if (probe == 0) { ThreadLocalRandom.current(); // force initialization probe = UnsafeAccess.UNSAFE.getInt(Thread.currentThread(), PROBE); } return (probe & ARENA_MASK); } /** Returns the last node in the linked list. */ @Nonnull static <E> Node<E> findLast(@Nonnull Node<E> node) { Node<E> next; while ((next = node.getNextRelaxed()) != null) { node = next; } return node; } @Override public Iterator<E> iterator() { return new Iterator<E>() { Node<E> prev; Node<E> t = tail; Node<E> cursor = head; boolean failOnRemoval = true; @Override public boolean hasNext() { return (cursor != t); } @Override public E next() { if (!hasNext()) { throw new NoSuchElementException(); } advance(); failOnRemoval = false; return cursor.value; } private void advance() { if ((prev == null) || !failOnRemoval) { prev = cursor; } cursor = awaitNext(); } @Override public void remove() { if (failOnRemoval) { throw new IllegalStateException(); } failOnRemoval = true; cursor.value = null; if (t == cursor) { prev.lazySetNext(null); if (casTail(t, prev)) { return; } } prev.lazySetNext(awaitNext()); } Node<E> awaitNext() { if (cursor.getNextRelaxed() == null) { while (cursor.next == null) {} } return cursor.getNextRelaxed(); } }; } /* ---------------- Serialization Support -------------- */ static final long serialVersionUID = 1; Object writeReplace() { return new SerializationProxy<>(this); } private void readObject(ObjectInputStream stream) throws InvalidObjectException { throw new InvalidObjectException("Proxy required"); } /** A proxy that is serialized instead of the queue. */ static final class SerializationProxy<E> implements Serializable { final boolean linearizable; final List<E> elements; SerializationProxy(SingleConsumerQueue<E> queue) { linearizable = (queue.factory.apply(null) instanceof LinearizableNode<?>); elements = new ArrayList<>(queue); } Object readResolve() { SingleConsumerQueue<E> queue = linearizable ? linearizable() : optimistic(); queue.addAll(elements); return queue; } static final long serialVersionUID = 1; } static class Node<E> { static final long NEXT_OFFSET = UnsafeAccess.objectFieldOffset(Node.class, "next"); E value; volatile Node<E> next; Node(@Nullable E value) { this.value = value; } @SuppressWarnings("unchecked") @Nullable Node<E> getNextRelaxed() { return (Node<E>) UnsafeAccess.UNSAFE.getObject(this, NEXT_OFFSET); } void lazySetNext(@Nullable Node<E> newNext) { UnsafeAccess.UNSAFE.putOrderedObject(this, NEXT_OFFSET, newNext); } /** A no-op notification that the element was added to the queue. */ void complete() {} /** A no-op wait until the operation has completed. */ void await() {} /** Always returns that the operation completed. */ boolean isDone() { return true; } @Override public String toString() { return getClass().getSimpleName() + "[" + value + "]"; } } static final class LinearizableNode<E> extends Node<E> { volatile boolean done; LinearizableNode(@Nullable E value) { super(value); } /** A notification that the element was added to the queue. */ @Override void complete() { done = true; } /** A busy wait until the operation has completed. */ @Override void await() { while (!done) {} } /** Returns whether the operation completed. */ @Override boolean isDone() { return done; } } } /** The namespace for field padding through inheritance. */ final class SCQHeader { abstract static class PadHead<E> extends AbstractQueue<E> { long p00, p01, p02, p03, p04, p05, p06, p07; long p10, p11, p12, p13, p14, p15, p16; } /** Enforces a memory layout to avoid false sharing by padding the head node. */ abstract static class HeadRef<E> extends PadHead<E> { Node<E> head; } abstract static class PadHeadAndTail<E> extends HeadRef<E> { long p20, p21, p22, p23, p24, p25, p26, p27; long p30, p31, p32, p33, p34, p35, p36; } /** Enforces a memory layout to avoid false sharing by padding the tail node. */ abstract static class HeadAndTailRef<E> extends PadHeadAndTail<E> { static final long TAIL_OFFSET = UnsafeAccess.objectFieldOffset(HeadAndTailRef.class, "tail"); volatile Node<E> tail; void lazySetTail(Node<E> next) { UnsafeAccess.UNSAFE.putOrderedObject(this, TAIL_OFFSET, next); } boolean casTail(Node<E> expect, Node<E> update) { return UnsafeAccess.UNSAFE.compareAndSwapObject(this, TAIL_OFFSET, expect, update); } } }