/*
* Copyright (c) 2008-2012 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.coordinator.client.service.impl;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.CuratorEvent;
import org.apache.curator.framework.api.CuratorEventType;
import org.apache.curator.framework.api.CuratorListener;
import org.apache.curator.framework.recipes.queue.QueueSerializer;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.curator.utils.EnsurePath;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.emc.storageos.coordinator.client.service.DistributedQueue;
import com.emc.storageos.coordinator.common.impl.ZkConnection;
import com.emc.storageos.coordinator.common.impl.ZkPath;
import com.emc.storageos.coordinator.exceptions.CoordinatorException;
import com.emc.storageos.services.util.NamedThreadPoolExecutor;
/**
* ZK backed distributed queue implementation. Differs from Netflix Curator distributed queue
* recipe in the following manner
* <p/>
* 1. Multithreaded queue consumer callbacks 2. Queue item is always processed with lock safety (meaning they are not removed from queue
* until successfully processed / lock released)
*/
public class DistributedQueueImpl<T> implements DistributedQueue<T> {
private static final Logger _log = LoggerFactory.getLogger(DistributedQueueImpl.class);
private static final String _queuePrefix = "queue-";
private static final String WORKER_POOL_NAME = "DQWorkers";
private static final String STATE_LISTENER_POOL_NAME = "DQStateListener";
// default max of 100K requests
private static final int DEFAULT_MAX_ITEM = 100000;
private final CuratorFramework _zkClient;
private final DistributedQueueConsumer<T> _consumer;
private final QueueSerializer<T> _serializer;
private final String _name;
private final String _queuePath;
private final String _queueName;
private final String _lockPath;
private final ExecutorService _notifyExecutor;
private final ThreadPoolExecutor _workers;
private int _maxItem = DEFAULT_MAX_ITEM;
private int _maxThreads = 10; // this is for distributed queue consumer threads
/**
* Responds to connection drops / reconnects.
*/
private final ConnectionStateListener _connectionListener = new ConnectionStateListener() {
@Override
public void stateChanged(final CuratorFramework client, final ConnectionState newState) {
_log.info("Entering stateChanged method : {}", newState);
// Curator would send RECONNECTED event to listeners in the following two cases.
// a) session reconnected with original session id.
// b) a new session created and connected with the zookeeper.
if (newState == ConnectionState.CONNECTED || newState == ConnectionState.RECONNECTED) {
_workers.submit(new Callable<Object>() {
@Override
public Object call() throws Exception {
_log.debug("Inside call method");
final long sessionId = _zkClient.getZookeeperClient().getZooKeeper().getSessionId();
List<String> locks = _zkClient.getChildren().forPath(_lockPath);
for (int i = 0; i < locks.size(); i++) {
String lock = locks.get(i);
Stat stat = _zkClient.checkExists().forPath(ZKPaths.makePath(_lockPath, lock));
if (stat == null || stat.getEphemeralOwner() != sessionId) {
continue;
}
/*
* If there are some tasks need to be re-tasked, we should find the root cause
* and find a proper way instead of retrying during quick session reconnection.
*/
String lockPath = ZKPaths.makePath(_lockPath, lock);
_log.debug("Leave alone lock {} during quick session reconnection", lockPath);
}
// needs to wake up consumer thread and rearm
// the watch since last watch most likely has
// gone away due to connection loss
_notifyExecutor.execute(new Runnable() {
@Override
public void run() {
notifyPendingChange();
}
});
return null;
}
});
}
}
};
/**
* Reacts to additional queue items and/or changes in their lock state
*/
private final CuratorListener _childListener = new CuratorListener() {
@Override
public void eventReceived(CuratorFramework client, CuratorEvent event) throws Exception {
if (event.getType() == CuratorEventType.WATCHED) {
if (event.getWatchedEvent().getType() == Watcher.Event.EventType.NodeChildrenChanged &&
(event.getPath().startsWith(_queuePath) || event.getPath().startsWith(_lockPath))) {
_notifyExecutor.execute(new Runnable() {
@Override
public void run() {
notifyPendingChange();
}
});
}
}
}
};
/**
* Wakes up main dispatch loop
*/
private synchronized void notifyPendingChange() {
// This is not a naked notify, we do status mutation outside this method, safe to suppress
notifyAll(); // NOSONAR("findbugs:NN_NAKED_NOTIFY")
}
/**
* Constructor
*
* @param conn ZK connection
* @param consumer task consumer
* @param serializer task serializer
* @param name name of the distributed queue
* @param maxThreads maximum number of threads for dispatching work
* @param maxItem maximum number of items in queue
*/
public DistributedQueueImpl(ZkConnection conn, DistributedQueueConsumer<T> consumer,
QueueSerializer<T> serializer, String name, int maxThreads, int maxItem) {
_zkClient = conn.curator();
_consumer = consumer;
_serializer = serializer;
_name = name;
_queuePath = String.format("%1$s/%2$s/queue", ZkPath.QUEUE.toString(), name);
_queueName = getQueueName(_queuePath);
_notifyExecutor = new NamedThreadPoolExecutor(DistributedQueueImpl.class.getSimpleName()
+ "_Notification", 1);
_maxThreads = maxThreads;
// _workers thread pool is for distributed queue framework,
// it needs to has at least two threads -- one for dispatch, one for state change.
_workers = new NamedThreadPoolExecutor(WORKER_POOL_NAME, 2);
_lockPath = String.format("%1$s/%2$s/lock", ZkPath.QUEUE.toString(), name);
_maxItem = maxItem;
}
/**
* Construct distributed queue with default max item (of 100K)
*/
public DistributedQueueImpl(
ZkConnection conn, DistributedQueueConsumer<T> consumer, QueueSerializer<T> serializer,
String name, int maxThreads) {
this(conn, consumer, serializer, name, maxThreads, DEFAULT_MAX_ITEM);
}
public DistributedQueueConsumer<T> getConsumer() {
return _consumer;
}
@Override
public synchronized void start() {
if (_workers.isTerminated()) {
throw CoordinatorException.fatals.failedToStartDistributedQueue();
}
_zkClient.getConnectionStateListenable().addListener(_connectionListener);
_zkClient.getCuratorListenable().addListener(_childListener, new NamedThreadPoolExecutor(STATE_LISTENER_POOL_NAME, 1));
try {
EnsurePath path = new EnsurePath(_queuePath);
path.ensure(_zkClient.getZookeeperClient());
path = new EnsurePath(_lockPath);
path.ensure(_zkClient.getZookeeperClient());
} catch (Exception e) {
throw CoordinatorException.fatals.failedToStartDistributedQueue(e);
}
if (_consumer != null) {
_consumer.init(_queueName, this, _maxThreads);
_workers.submit(new Callable<Object>() {
@Override
public Object call() throws Exception {
dispatch();
return null;
}
});
}
}
@Override
public synchronized boolean stop(long timeoutMs) {
if (_workers.isTerminated()) {
return true;
}
_log.info("Stopping consumer with timeout: {}", timeoutMs);
if (_consumer != null) {
if (!_consumer.uninit(timeoutMs)) {
return false;
}
}
_log.info("Stopping dispatcher");
_zkClient.getConnectionStateListenable().removeListener(_connectionListener);
_zkClient.getCuratorListenable().removeListener(_childListener);
_workers.shutdownNow();
return true;
}
@Override
public void put(T item) throws Exception {
Stat stat = _zkClient.checkExists().forPath(_queuePath);
if (stat.getNumChildren() > _maxItem) {
_log.error("Queue is too busy. Found " + stat.getNumChildren() + " items. Max allowed items are " + _maxItem);
throw CoordinatorException.retryables.queueTooBusy();
}
String path = ZKPaths.makePath(_queuePath, _queuePrefix);
byte[] data = _serializer.serialize(item);
_zkClient.create().withMode(CreateMode.PERSISTENT_SEQUENTIAL).forPath(path, data);
}
/**
* Removes the specified item from the distributed queue.
* Removes associated lock.
*
* @param itemName Name of the item to be removed from the queue.
*
* @throws Exception
*/
public void remove(String itemName) throws Exception {
String itemPath = null;
try {
itemPath = ZKPaths.makePath(_queuePath, itemName);
_zkClient.delete().guaranteed().forPath(itemPath);
} catch (Exception e) {
_log.warn("Problem deleting queue item: {} e={}", itemPath, e);
} finally {
String lockPath = ZKPaths.makePath(_lockPath, itemName);
try {
_log.info("delete lock {}", lockPath);
_zkClient.delete().guaranteed().forPath(lockPath);
} catch (KeeperException.NoNodeException ex) {
_log.warn("The lock {} has been removed e={}", lockPath, ex);
} catch (Exception ex) {
_log.warn("Problem deleting lock item: {} e={}", lockPath, ex);
}
}
}
/**
* Dispatcher loop that
* <p/>
* 1. scans queued items and attempts to lock 2. if locked, queues it to pending item list
*/
private void dispatch() throws Exception {
while (!Thread.currentThread().isInterrupted()) {
try {
List<String> children;
List<String> locks;
synchronized (this) {
// Need to re-scan the zk for any possible changes happened before it is waked up.
boolean needRescan = false;
do {
locks = _zkClient.getChildren().watched().forPath(_lockPath);
children = _zkClient.getChildren().watched().forPath(_queuePath);
_log.info("Processing queue {} - #items: {}, #locks: {}",
new Object[] { _name, children.size(), locks.size() });
children.removeAll(locks);
if (children.isEmpty()) {
wait();
needRescan = true;
} else if (_consumer.isBusy(_queueName)) {
// Wait till consumer has enough resources.
// Note:
// It needs to ALWAYS wait under "watch armed". Because WATCH is one-time trigger,
// after it is waked, it needs to re-arm watch during getChildren() again.
_log.info("The consumer {} is busy", _consumer);
wait();
needRescan = true;
} else {
needRescan = false;
}
} while (needRescan);
}
if (!children.isEmpty()) {
// Note: multiple zkClients might see the same child at the same time,
// if one processChildren finish(deleted both queue item and lock) quickly,
// the other zkClients still be able to create lock but failed while handling
// the queue item, it is harmless but will raise a warning in spawnWork.
processChildren(children);
}
} catch (KeeperException e) {
// In load env, the KeeperException would be thrown occasionally.
// So we need to keep monitoring the queue with the dispatch loop
_log.warn("KeeperException in dispatch loop, retrying in dispatch loop", e);
continue;
} catch (Exception e) {
_log.error("Exception in dispatch loop, quiting", e);
throw e;
}
}
}
/**
* Starts working on an item after lock is successfully obtained
*
* @param child queue item name
*/
private void spawnWork(final String child) {
if (_consumer == null) {
return;
}
final String itemPath = ZKPaths.makePath(_queuePath, child);
byte[] data = null;
try {
data = _zkClient.getData().forPath(itemPath);
} catch (Exception e) {
// 1. free the lock if there is any issue reading the item.
// 2. it also might be raised because the item has been handled by others quickly.
_log.warn("Problem seen while processing queue item which might be already handled by other workers. ", e);
final String lockPath = ZKPaths.makePath(_lockPath, child);
try {
_log.info("delete lock {}", lockPath);
_zkClient.delete().guaranteed().inBackground().forPath(lockPath);
} catch (KeeperException.NoNodeException ex) {
_log.warn("The lock {} has been removed e={}", lockPath, ex);
} catch (Exception ex) {
_log.warn("Problem deleting lock item: {} e={}", lockPath, ex);
}
data = null;
}
if (data != null) {
final T item = _serializer.deserialize(data);
_consumer.startConsumeItem(_queueName, child, item);
}
}
/**
* Process each queued item by submitting them to worker queue
*
* @param children children to process
* @throws Exception
*/
private void processChildren(List<String> children) throws Exception {
Collections.sort(children,
new Comparator<String>() {
public int compare(String o1, String o2) {
return o1.compareTo(o2);
}
}
);
for (int i = 0; i < children.size(); i++) {
// only grab tasks when the consumer is not busy
// we need to check it before each one is processed.
if (_consumer.isBusy(_queueName)) {
_log.info("The consumer {} is busy", _consumer);
return;
}
final String child = children.get(i);
final String lockPath = ZKPaths.makePath(_lockPath, child);
try {
_zkClient.create().withMode(CreateMode.EPHEMERAL).forPath(lockPath);
_log.info("processChildren(): Created lock zNode {} for Queue {}", child, _queuePath);
spawnWork(child);
} catch (KeeperException.NodeExistsException nee) {
_log.info("processChildren(): For Queue: {}, ZNodes already exist", _queuePath, nee);
} catch (KeeperException ke) {
_log.info("processChildren(): For Queue: {}, Problem while creating ZNodes: {}",
new Object[] { _queuePath, lockPath }, ke);
} catch (Exception e) {
_log.info("processChildren(): For Queue: {}, Failed processing ZNodes: {}",
new Object[] { _queuePath, lockPath }, e);
}
}
}
private String getQueueName(String queuePath) {
// Extract queue name from ZPath
// ZPath format for queue: /queue/<queuename>/queue
String[] tmparray = queuePath.split("/");
if (tmparray.length != 4) {
return null;
}
return tmparray[2];
}
/* (non-Javadoc)
* @see com.emc.storageos.coordinator.client.service.DistributedQueue#getQueuedItems()
*/
@Override
public List<T> getQueuedItems() {
List<T> items = new ArrayList<T>();
try {
synchronized (this) {
List<String> activeItems = _zkClient.getChildren().watched().forPath(_lockPath);
List<String> queuedItems = _zkClient.getChildren().watched().forPath(_queuePath);
queuedItems.removeAll(activeItems);
for (String queuedItem : queuedItems) {
try {
final String itemPath = ZKPaths.makePath(_queuePath, queuedItem);
byte[] data = _zkClient.getData().forPath(itemPath);
if (data != null) {
final T itemOnQueue = _serializer.deserialize(data);
if (itemOnQueue != null) {
items.add(itemOnQueue);
}
}
} catch (Exception e) {
_log.warn("Exception thrown getting queued items from queue " + _queuePath, e);
}
}
}
} catch (Exception e) {
_log.warn("Exception thrown getting queued items from queue " + _queuePath, e);
}
return items;
}
/* (non-Javadoc)
* @see com.emc.storageos.coordinator.client.service.DistributedQueue#getActiveItems()
*/
@Override
public List<T> getActiveItems() {
List<T> items = new ArrayList<T>();
try {
synchronized (this) {
List<String> activeItems = _zkClient.getChildren().watched().forPath(_lockPath);
for (String activeItem : activeItems) {
try {
final String itemPath = ZKPaths.makePath(_queuePath, activeItem);
byte[] data = _zkClient.getData().forPath(itemPath);
if (data != null) {
final T itemOnQueue = _serializer.deserialize(data);
if (itemOnQueue != null) {
items.add(itemOnQueue);
}
}
} catch (Exception e) {
_log.warn("Exception thrown getting active items from queue " + _queuePath, e);
}
}
}
} catch (Exception e) {
_log.warn("Exception thrown getting active items from queue " + _queuePath, e);
}
return items;
}
}