/*
* Copyright [2013-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.guagua.coordinator.zk;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import ml.shifu.guagua.GuaguaConstants;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* ZooKeeper provides only atomic operations. GuaguaZooKeeper provides additional non-atomic operations that are useful.
* It also provides wrappers to deal with ConnectionLossException. All methods of this class should be thread-safe.
*/
public class GuaguaZooKeeper {
/** Internal logger */
private static final Logger LOG = LoggerFactory.getLogger(GuaguaZooKeeper.class);
/** Length of the ZK sequence number */
private static final int SEQUENCE_NUMBER_LENGTH = 10;
/** Internal ZooKeeper */
private final ZooKeeper zooKeeper;
/** Number of max attempts to retry when failing due to connection loss */
private final int maxRetryAttempts;
/** Milliseconds to wait before trying again due to connection loss */
private final long retryWaitMsecs;
private static final SequenceComparator sequenceComparator = new SequenceComparator();
/**
* Constructor to connect to ZooKeeper, make progress
*
* @param connectString
* Comma separated host:port pairs, each corresponding to a zk server. e.g.
* "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" If the optional chroot suffix is used the example would
* look like: "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002/app/a" where the client would be rooted at
* "/app/a" and all paths would be relative to this root - ie getting/setting/etc... "/foo/bar" would
* result in operations being run on "/app/a/foo/bar" (from the
* server perspective).
* @param sessionTimeout
* Session timeout in milliseconds
* @param maxRetryAttempts
* Max retry attempts during connection loss
* @param retryWaitMsecs
* Msecs to wait when retrying due to connection loss
* @param watcher
* A watcher object which will be notified of state changes, may also be notified for node events
* @throws IOException
* In case of any io exception to connect to zookeeper server.
*/
public GuaguaZooKeeper(String connectString, int sessionTimeout, int maxRetryAttempts, int retryWaitMsecs,
Watcher watcher) throws IOException {
if(maxRetryAttempts <= 0) {
throw new IllegalArgumentException("'maxRetryAttempts' should be larger than 0.");
}
if(retryWaitMsecs <= 0) {
throw new IllegalArgumentException("'retryWaitMsecs' should be larger than 0.");
}
this.zooKeeper = new ZooKeeper(connectString, sessionTimeout, watcher);
this.maxRetryAttempts = maxRetryAttempts;
this.retryWaitMsecs = retryWaitMsecs;
}
/**
* Provides a possibility of a creating a path consisting of more than one znode (not atomic). If recursive is
* false, operates exactly the same as create().
*
* @param path
* path to create
* @param data
* data to set on the final znode
* @param acl
* acls on each znode created
* @param createMode
* only affects the final znode
* @param recursive
* if true, creates all ancestors
* @return Actual created path
* @throws KeeperException
* @throws InterruptedException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
* @throws NullPointerException
* If {@code path} is null.
*/
public String createExt(final String path, final byte[] data, final List<ACL> acl, final CreateMode createMode,
final boolean recursive) throws KeeperException, InterruptedException {
// LOG.info("createExt: Creating path {}", path);
String result = retryOperation(new GuaguaZooKeeperOperation<String>() {
@Override
public String execute() throws KeeperException, InterruptedException {
if(!recursive) {
return getZooKeeper().create(path, data, acl, createMode);
}
try {
return getZooKeeper().create(path, data, acl, createMode);
} catch (KeeperException.NoNodeException e) {
LOG.warn("createExt: Cannot directly create node {} because of NoNodeException.", path);
}
int pos = path.indexOf(GuaguaConstants.ZOOKEEPER_SEPARATOR, 1);
for(; pos != -1; pos = path.indexOf(GuaguaConstants.ZOOKEEPER_SEPARATOR, pos + 1)) {
String subPath = path.substring(0, pos);
try {
// set intermediate path to PERSISTENT, because other EPHEMERAL or SEQUENTIAL znode should be
// attached to PERSISTENT znode.
getZooKeeper().create(subPath, null, acl, CreateMode.PERSISTENT);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("createExt: Znode {} already exists", subPath);
}
}
return getZooKeeper().create(path, data, acl, createMode);
}
});
return result;
}
/**
* Data structure for handling the output of createOrSet()
*/
public static class PathStat {
/** Path to created znode (if any) */
private String path;
/** Stat from set znode (if any) */
private Stat stat;
/**
* Put in results from createOrSet()
*
* @param path
* Path to created znode (or null)
* @param stat
* Stat from set znode (if set)
*/
public PathStat(String path, Stat stat) {
this.path = path;
this.stat = stat;
}
/**
* Get the path of the created znode if it was created.
*
* @return Path of created znode or null if not created
*/
public String getPath() {
return path;
}
/**
* Get the stat of the set znode if set
*
* @return Stat of set znode or null if not set
*/
public Stat getStat() {
return stat;
}
}
/**
* Create a znode. Set the znode if the created znode already exists.
*
* @param path
* path to create
* @param data
* data to set on the final znode
* @param acl
* acls on each znode created
* @param createMode
* only affects the final znode
* @param recursive
* if true, creates all ancestors
* @param version
* Version to set if setting
* @return Path of created znode or Stat of set znode
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public PathStat createOrSetExt(final String path, byte[] data, List<ACL> acl, CreateMode createMode,
boolean recursive, int version) throws KeeperException, InterruptedException {
String createdPath = null;
Stat setStat = null;
try {
createdPath = createExt(path, data, acl, createMode, recursive);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("createOrSet: Node exists on path {}", path);
setStat = getZooKeeper().setData(path, data, version);
}
return new PathStat(createdPath, setStat);
}
/**
* Create a znode if there is no other znode there
*
* @param path
* path to create
* @param data
* data to set on the final znode
* @param acl
* acls on each znode created
* @param createMode
* only affects the final znode
* @param recursive
* if true, creates all ancestors
* @return Path of created znode or Stat of set znode
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public PathStat createOnceExt(final String path, byte[] data, List<ACL> acl, CreateMode createMode,
boolean recursive) throws KeeperException, InterruptedException {
try {
return new PathStat(createExt(path, data, acl, createMode, recursive), null);
} catch (KeeperException.NodeExistsException e) {
LOG.warn("createOnceExt: Node already exists on path {}", path);
}
return null;
}
/**
* Delete a path recursively. When the deletion is recursive, it is a non-atomic operation, hence, not part of
* ZooKeeper.
*
* @param path
* path to remove (i.e. /tmp will remove /tmp/1 and /tmp/2)
* @param version
* expected version (-1 for all)
* @param recursive
* if true, remove all children, otherwise behave like remove()
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public void deleteExt(final String path, final int version, final boolean recursive) throws InterruptedException,
KeeperException {
retryOperation(new GuaguaZooKeeperOperation<Void>() {
@Override
public Void execute() throws KeeperException, InterruptedException {
if(!recursive) {
getZooKeeper().delete(path, version);
return null;
}
try {
getZooKeeper().delete(path, version);
return null;
} catch (KeeperException.NotEmptyException e) {
LOG.warn("deleteExt: Cannot directly remove node {}", path);
}
List<String> childList = getZooKeeper().getChildren(path, false);
for(String child: childList) {
deleteExt(path + GuaguaConstants.ZOOKEEPER_SEPARATOR + child, -1, true);
}
getZooKeeper().delete(path, version);
return null;
}
});
}
/**
* Return the stat of the node of the given path. Return null if no such a node exists.
* <p>
* If the watch is true and the call is successful (no exception is thrown), a watch will be left on the node with
* the given path. The watch will be triggered by a successful operation that creates/delete the node or sets the
* data on the node.
*
* @param path
* the node path
* @param watch
* whether need to watch this node
* @return the stat of the node of the given path; return null if no such a node exists.
* @throws KeeperException
* If the server signals an error
* @throws InterruptedException
* If the server transaction is interrupted.
*/
public Stat exists(final String path, final boolean watch) throws KeeperException, InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<Stat>() {
@Override
public Stat execute() throws KeeperException, InterruptedException {
return getZooKeeper().exists(path, watch);
}
});
}
/**
* Return the stat of the node of the given path. Return null if no such a node exists.
* <p>
* If the watch is non-null and the call is successful (no exception is thrown), a watch will be left on the node
* with the given path. The watch will be triggered by a successful operation that creates/delete the node or sets
* the data on the node.
*
* @param path
* the node path
* @param watcher
* explicit watcher
* @return the stat of the node of the given path; return null if no such a
* node exists.
* @throws KeeperException
* If the server signals an error
* @throws InterruptedException
* If the server transaction is interrupted.
* @throws IllegalArgumentException
* if an invalid path is specified
*/
public Stat exists(final String path, final Watcher watcher) throws KeeperException, InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<Stat>() {
@Override
public Stat execute() throws KeeperException, InterruptedException {
return getZooKeeper().exists(path, watcher);
}
});
}
/**
* Return the data and the stat of the node of the given path.
* <p>
* If the watch is non-null and the call is successful (no exception is thrown), a watch will be left on the node
* with the given path. The watch will be triggered by a successful operation that sets data on the node, or deletes
* the node.
* <p>
* A KeeperException with error code KeeperException.NoNode will be thrown if no node with the given path exists.
*
* @param path
* the given path
* @param watcher
* explicit watcher
* @param stat
* the stat of the node
* @return the data of the node
* @throws KeeperException
* If the server signals an error with a non-zero
* error code
* @throws InterruptedException
* If the server transaction is interrupted.
* @throws IllegalArgumentException
* if an invalid path is specified
*/
public byte[] getData(final String path, final Watcher watcher, final Stat stat) throws KeeperException,
InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<byte[]>() {
@Override
public byte[] execute() throws KeeperException, InterruptedException {
return getZooKeeper().getData(path, watcher, stat);
}
});
}
/**
* Return the data and the stat of the node of the given path.
* <p>
* If the watch is true and the call is successful (no exception is thrown), a watch will be left on the node with
* the given path. The watch will be triggered by a successful operation that sets data on the node, or deletes the
* node.
* <p>
* A KeeperException with error code KeeperException.NoNode will be thrown if no node with the given path exists.
*
* @param path
* the given path
* @param watch
* whether need to watch this node
* @param stat
* the stat of the node
* @return the data of the node
* @throws KeeperException
* If the server signals an error with a non-zero error code
* @throws InterruptedException
* If the server transaction is interrupted.
*/
public byte[] getData(final String path, final boolean watch, final Stat stat) throws KeeperException,
InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<byte[]>() {
@Override
public byte[] execute() throws KeeperException, InterruptedException {
return getZooKeeper().getData(path, watch, stat);
}
});
}
/**
* Get the children of the path with extensions.
* Extension 1: Sort the children based on {@link Code childComparator} number
* Extension 2: Get the full path instead of relative path
*
* @param path
* path to znode
* @param watch
* set the watch?
* @param fullPath
* if true, get the fully znode path back
* @param childComparator
* comparator to sort children.
* @return list of children
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public List<String> getChildrenExt(final String path, final boolean watch, final boolean fullPath,
final Comparator<String> childComparator) throws KeeperException, InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<List<String>>() {
@Override
public List<String> execute() throws KeeperException, InterruptedException {
List<String> childList = getZooKeeper().getChildren(path, watch);
/* Sort children according to the sequence number, if desired */
if(childComparator != null) {
Collections.sort(childList, childComparator);
}
// remove guava dependency to avoid making core depending on two many libs.
if(fullPath) {
List<String> fullChildList = new ArrayList<String>();
for(String child: childList) {
fullChildList.add(path + GuaguaConstants.ZOOKEEPER_SEPARATOR + child);
}
return fullChildList;
}
return childList;
}
});
}
/**
* Get the children of the path with extensions.
* Extension 1: Sort the children based on sequence number
* Extension 2: Get the full path instead of relative path
*
* @param path
* path to znode
* @param watch
* set the watch?
* @param sequenceSorted
* sort by the sequence number
* @param fullPath
* if true, get the fully znode path back
* @return list of children
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public List<String> getChildrenExt(final String path, final boolean watch, final boolean sequenceSorted,
final boolean fullPath) throws KeeperException, InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<List<String>>() {
@Override
public List<String> execute() throws KeeperException, InterruptedException {
List<String> childList = getZooKeeper().getChildren(path, watch);
/* Sort children according to the sequence number, if desired */
if(sequenceSorted) {
Collections.sort(childList, sequenceComparator);
}
if(fullPath) {
List<String> fullChildList = new ArrayList<String>();
for(String child: childList) {
fullChildList.add(path + GuaguaConstants.ZOOKEEPER_SEPARATOR + child);
}
return fullChildList;
}
return childList;
}
});
}
/**
* Get the children of the path with extensions.
* Extension 1: Sort the children based on sequence number
* Extension 2: Get the full path instead of relative path
* Extension 3: Filter some path out
*
* @param path
* path to znode
* @param watch
* set the watch?
* @param sequenceSorted
* sort by the sequence number
* @param fullPath
* if true, get the fully znode path back
* @param filter
* filter some path out if not null.
* @return list of children
* @throws InterruptedException
* @throws KeeperException
* Both KeeperException InterruptedException are thrown from {@link ZooKeeper} methods.
*/
public List<String> getChildrenExt(final String path, final boolean watch, final boolean sequenceSorted,
final boolean fullPath, final Filter filter) throws KeeperException, InterruptedException {
return retryOperation(new GuaguaZooKeeperOperation<List<String>>() {
@Override
public List<String> execute() throws KeeperException, InterruptedException {
List<String> childList = getZooKeeper().getChildren(path, watch);
/* Sort children according to the sequence number, if desired */
if(sequenceSorted) {
Collections.sort(childList, sequenceComparator);
}
List<String> result = new ArrayList<String>();
for(String child: childList) {
String realPath = fullPath ? (path + GuaguaConstants.ZOOKEEPER_SEPARATOR + child) : child;
if(filter == null || !filter.filter(realPath)) {
result.add(realPath);
}
}
return result;
}
});
}
private static class SequenceComparator implements Comparator<String>, Serializable {
private static final long serialVersionUID = 4555088814306270860L;
@Override
public int compare(String s1, String s2) {
if((s1.length() <= SEQUENCE_NUMBER_LENGTH) || (s2.length() <= SEQUENCE_NUMBER_LENGTH)) {
throw new RuntimeException(String.format(
"getChildrenExt: Invalid length for sequence sorting > %s for s1 (%s) or s2 (%s)",
SEQUENCE_NUMBER_LENGTH, s1.length(), s2.length()));
}
Integer s1SequenceNumber = Integer.parseInt(s1.substring(s1.length() - SEQUENCE_NUMBER_LENGTH));
Integer s2SequenceNumber = Integer.parseInt(s2.substring(s2.length() - SEQUENCE_NUMBER_LENGTH));
return s1SequenceNumber.compareTo(s2SequenceNumber);
}
}
/**
* Close this client object. Once the client is closed, its session becomes invalid. All the ephemeral nodes in the
* ZooKeeper server associated with the session will be removed. The watches left on those nodes (and on their
* parents) will be triggered.
*
* @throws InterruptedException
* in case of InterruptedException from {@code ZooKeeper#close()};
*/
public void close() throws InterruptedException {
getZooKeeper().close();
}
/**
* Perform the given operation, retrying if the connection fails
*
* @return object. it needs to be cast to the callee's expected return type.
*/
protected <T> T retryOperation(GuaguaZooKeeperOperation<T> operation) throws KeeperException, InterruptedException {
KeeperException exception = null;
for(int i = 0; i < this.getMaxRetryAttempts(); i++) {
try {
return operation.execute();
} catch (KeeperException.SessionExpiredException e) {
LOG.warn("Session expired so reconnecting due to:", e);
throw e;
} catch (KeeperException.ConnectionLossException e) {
if(exception == null) {
exception = e;
}
LOG.debug("Attempt {} failed with connection loss so attempting to reconnect. Exception is: {} ", i, e);
retryDelay(i);
}
}
throw exception;
}
/**
* Performs a retry delay if this is not the first attempt
*
* @param attemptCount
* the number of the attempts performed so far
*/
protected void retryDelay(int attemptCount) {
if(attemptCount > 0) {
try {
Thread.sleep(attemptCount * getRetryWaitMsecs());
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
public ZooKeeper getZooKeeper() {
return zooKeeper;
}
public int getMaxRetryAttempts() {
return maxRetryAttempts;
}
public long getRetryWaitMsecs() {
return retryWaitMsecs;
}
/**
* Filter path out {@link #filter(String)} return true.
*/
public static interface Filter {
boolean filter(String path);
}
}