/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zookeeper.server.quorum;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.LinkedBlockingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.server.Request;
import org.apache.zookeeper.server.RequestProcessor;
import org.apache.zookeeper.server.WorkerService;
import org.apache.zookeeper.server.ZooKeeperCriticalThread;
import org.apache.zookeeper.server.ZooKeeperServerListener;
/**
* This RequestProcessor matches the incoming committed requests with the
* locally submitted requests. The trick is that locally submitted requests that
* change the state of the system will come back as incoming committed requests,
* so we need to match them up. Instead of just waiting for the committed requests,
* we process the uncommitted requests that belong to other sessions.
*
* The CommitProcessor is multi-threaded. Communication between threads is
* handled via queues, atomics, and wait/notifyAll synchronized on the
* processor. The CommitProcessor acts as a gateway for allowing requests to
* continue with the remainder of the processing pipeline. It will allow many
* read requests but only a single write request to be in flight simultaneously,
* thus ensuring that write requests are processed in transaction id order.
*
* - 1 commit processor main thread, which watches the request queues and
* assigns requests to worker threads based on their sessionId so that
* read and write requests for a particular session are always assigned
* to the same thread (and hence are guaranteed to run in order).
* - 0-N worker threads, which run the rest of the request processor pipeline
* on the requests. If configured with 0 worker threads, the primary
* commit processor thread runs the pipeline directly.
*
* Typical (default) thread counts are: on a 32 core machine, 1 commit
* processor thread and 32 worker threads.
*
* Multi-threading constraints:
* - Each session's requests must be processed in order.
* - Write requests must be processed in zxid order
* - Must ensure no race condition between writes in one session that would
* trigger a watch being set by a read request in another session
*
* The current implementation solves the third constraint by simply allowing no
* read requests to be processed in parallel with write requests.
*/
public class CommitProcessor extends ZooKeeperCriticalThread implements
RequestProcessor {
private static final Logger LOG = LoggerFactory.getLogger(CommitProcessor.class);
/** Default: numCores */
public static final String ZOOKEEPER_COMMIT_PROC_NUM_WORKER_THREADS =
"zookeeper.commitProcessor.numWorkerThreads";
/** Default worker pool shutdown timeout in ms: 5000 (5s) */
public static final String ZOOKEEPER_COMMIT_PROC_SHUTDOWN_TIMEOUT =
"zookeeper.commitProcessor.shutdownTimeout";
/**
* Incoming requests.
*/
protected LinkedBlockingQueue<Request> queuedRequests =
new LinkedBlockingQueue<Request>();
/**
* Requests that have been committed.
*/
protected final LinkedBlockingQueue<Request> committedRequests =
new LinkedBlockingQueue<Request>();
/**
* Requests that we are holding until commit comes in. Keys represent
* session ids, each value is a linked list of the session's requests.
*/
protected final HashMap<Long, LinkedList<Request>> pendingRequests =
new HashMap<Long, LinkedList<Request>>(10000);
/** The number of requests currently being processed */
protected final AtomicInteger numRequestsProcessing = new AtomicInteger(0);
RequestProcessor nextProcessor;
/** For testing purposes, we use a separated stopping condition for the
* outer loop.*/
protected volatile boolean stoppedMainLoop = true;
protected volatile boolean stopped = true;
private long workerShutdownTimeoutMS;
protected WorkerService workerPool;
private Object emptyPoolSync = new Object();
/**
* This flag indicates whether we need to wait for a response to come back
* from the leader or we just let the sync operation flow through like a
* read. The flag will be true if the CommitProcessor is in a Leader
* pipeline.
*/
boolean matchSyncs;
public CommitProcessor(RequestProcessor nextProcessor, String id,
boolean matchSyncs, ZooKeeperServerListener listener) {
super("CommitProcessor:" + id, listener);
this.nextProcessor = nextProcessor;
this.matchSyncs = matchSyncs;
}
private boolean isProcessingRequest() {
return numRequestsProcessing.get() != 0;
}
protected boolean needCommit(Request request) {
switch (request.type) {
case OpCode.create:
case OpCode.create2:
case OpCode.createTTL:
case OpCode.createContainer:
case OpCode.delete:
case OpCode.deleteContainer:
case OpCode.setData:
case OpCode.reconfig:
case OpCode.multi:
case OpCode.setACL:
return true;
case OpCode.sync:
return matchSyncs;
case OpCode.createSession:
case OpCode.closeSession:
return !request.isLocalSession();
default:
return false;
}
}
@Override
public void run() {
try {
/*
* In each iteration of the following loop we process at most
* requestsToProcess requests of queuedRequests. We have to limit
* the number of request we poll from queuedRequests, since it is
* possible to endlessly poll read requests from queuedRequests, and
* that will lead to a starvation of non-local committed requests.
*/
int requestsToProcess = 0;
boolean commitIsWaiting = false;
do {
/*
* Since requests are placed in the queue before being sent to
* the leader, if commitIsWaiting = true, the commit belongs to
* the first update operation in the queuedRequests or to a
* request from a client on another server (i.e., the order of
* the following two lines is important!).
*/
commitIsWaiting = !committedRequests.isEmpty();
requestsToProcess = queuedRequests.size();
// Avoid sync if we have something to do
if (requestsToProcess == 0 && !commitIsWaiting){
// Waiting for requests to process
synchronized (this) {
while (!stopped && requestsToProcess == 0
&& !commitIsWaiting) {
wait();
commitIsWaiting = !committedRequests.isEmpty();
requestsToProcess = queuedRequests.size();
}
}
}
/*
* Processing up to requestsToProcess requests from the incoming
* queue (queuedRequests), possibly less if a committed request
* is present along with a pending local write. After the loop,
* we process one committed request if commitIsWaiting.
*/
Request request = null;
while (!stopped && requestsToProcess > 0
&& (request = queuedRequests.poll()) != null) {
requestsToProcess--;
if (needCommit(request)
|| pendingRequests.containsKey(request.sessionId)) {
// Add request to pending
LinkedList<Request> requests = pendingRequests
.get(request.sessionId);
if (requests == null) {
requests = new LinkedList<Request>();
pendingRequests.put(request.sessionId, requests);
}
requests.addLast(request);
}
else {
sendToNextProcessor(request);
}
/*
* Stop feeding the pool if there is a local pending update
* and a committed request that is ready. Once we have a
* pending request with a waiting committed request, we know
* we can process the committed one. This is because commits
* for local requests arrive in the order they appeared in
* the queue, so if we have a pending request and a
* committed request, the committed request must be for that
* pending write or for a write originating at a different
* server.
*/
if (!pendingRequests.isEmpty() && !committedRequests.isEmpty()){
/*
* We set commitIsWaiting so that we won't check
* committedRequests again.
*/
commitIsWaiting = true;
break;
}
}
// Handle a single committed request
if (commitIsWaiting && !stopped){
waitForEmptyPool();
if (stopped){
return;
}
// Process committed head
if ((request = committedRequests.poll()) == null) {
throw new IOException("Error: committed head is null");
}
/*
* Check if request is pending, if so, update it with the
* committed info
*/
LinkedList<Request> sessionQueue = pendingRequests
.get(request.sessionId);
if (sessionQueue != null) {
// If session queue != null, then it is also not empty.
Request topPending = sessionQueue.poll();
if (request.cxid != topPending.cxid) {
LOG.error(
"Got cxid 0x"
+ Long.toHexString(request.cxid)
+ " expected 0x" + Long.toHexString(
topPending.cxid)
+ " for client session id "
+ Long.toHexString(request.sessionId));
throw new IOException("Error: unexpected cxid for"
+ "client session");
}
/*
* We want to send our version of the request. the
* pointer to the connection in the request
*/
topPending.setHdr(request.getHdr());
topPending.setTxn(request.getTxn());
topPending.zxid = request.zxid;
request = topPending;
}
sendToNextProcessor(request);
waitForEmptyPool();
/*
* Process following reads if any, remove session queue if
* empty.
*/
if (sessionQueue != null) {
while (!stopped && !sessionQueue.isEmpty()
&& !needCommit(sessionQueue.peek())) {
sendToNextProcessor(sessionQueue.poll());
}
// Remove empty queues
if (sessionQueue.isEmpty()) {
pendingRequests.remove(request.sessionId);
}
}
}
} while (!stoppedMainLoop);
} catch (Throwable e) {
handleException(this.getName(), e);
}
LOG.info("CommitProcessor exited loop!");
}
private void waitForEmptyPool() throws InterruptedException {
synchronized(emptyPoolSync) {
while ((!stopped) && isProcessingRequest()) {
emptyPoolSync.wait();
}
}
}
@Override
public void start() {
int numCores = Runtime.getRuntime().availableProcessors();
int numWorkerThreads = Integer.getInteger(
ZOOKEEPER_COMMIT_PROC_NUM_WORKER_THREADS, numCores);
workerShutdownTimeoutMS = Long.getLong(
ZOOKEEPER_COMMIT_PROC_SHUTDOWN_TIMEOUT, 5000);
LOG.info("Configuring CommitProcessor with "
+ (numWorkerThreads > 0 ? numWorkerThreads : "no")
+ " worker threads.");
if (workerPool == null) {
workerPool = new WorkerService(
"CommitProcWork", numWorkerThreads, true);
}
stopped = false;
stoppedMainLoop = false;
super.start();
}
/**
* Schedule final request processing; if a worker thread pool is not being
* used, processing is done directly by this thread.
*/
private void sendToNextProcessor(Request request) {
numRequestsProcessing.incrementAndGet();
workerPool.schedule(new CommitWorkRequest(request), request.sessionId);
}
/**
* CommitWorkRequest is a small wrapper class to allow
* downstream processing to be run using the WorkerService
*/
private class CommitWorkRequest extends WorkerService.WorkRequest {
private final Request request;
CommitWorkRequest(Request request) {
this.request = request;
}
@Override
public void cleanup() {
if (!stopped) {
LOG.error("Exception thrown by downstream processor,"
+ " unable to continue.");
CommitProcessor.this.halt();
}
}
public void doWork() throws RequestProcessorException {
try {
nextProcessor.processRequest(request);
} finally {
if (numRequestsProcessing.decrementAndGet() == 0){
wakeupOnEmpty();
}
}
}
}
synchronized private void wakeup() {
notifyAll();
}
private void wakeupOnEmpty() {
synchronized(emptyPoolSync){
emptyPoolSync.notifyAll();
}
}
public void commit(Request request) {
if (stopped || request == null) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Committing request:: " + request);
}
committedRequests.add(request);
wakeup();
}
public void processRequest(Request request) {
if (stopped) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Processing request:: " + request);
}
queuedRequests.add(request);
wakeup();
}
private void halt() {
stoppedMainLoop = true;
stopped = true;
wakeupOnEmpty();
wakeup();
queuedRequests.clear();
if (workerPool != null) {
workerPool.stop();
}
}
public void shutdown() {
LOG.info("Shutting down");
halt();
if (workerPool != null) {
workerPool.join(workerShutdownTimeoutMS);
}
if (nextProcessor != null) {
nextProcessor.shutdown();
}
}
}