/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; import java.util.List; import java.util.concurrent.TimeUnit; /** * A wrapper around other {@link ZooKeeperIface} implementations * (e.g., {@link BasicZooKeeper}) that will correctly recover from common * transient issues e.g., connection loss, timeouts. Ported from * jcommon-zookeeper: * https://github.com/facebook/jcommon/blob/master/zookeeper/src/main/java/com/facebook/zookeeper/RecoveringZooKeeper.java */ public class RecoveringZooKeeper implements ZooKeeperIface { private static final Log LOG = LogFactory.getLog(RecoveringZooKeeper.class); private final ZooKeeperIface zk; private final RetryCounterFactory retryCounterFactory; public RecoveringZooKeeper( ZooKeeperIface zk, int maxRetries, int retryIntervalMillis ) { this.zk = zk; this.retryCounterFactory = new RetryCounterFactory(maxRetries, retryIntervalMillis); } @Override public long getSessionId() { return zk.getSessionId(); } @Override public void close() throws InterruptedException { zk.close(); } @Override public String create(String path, byte[] data, List<ACL> acl, CreateMode createMode) throws KeeperException, InterruptedException { switch (createMode) { case EPHEMERAL: case PERSISTENT: return createNonSequential(path, data, acl, createMode); case EPHEMERAL_SEQUENTIAL: // NOTE: this does not reliably support creating multiple ephemeral // sequential nodes with the same prefix under the same path return createEphemeralSequential(path, data, acl, createMode); case PERSISTENT_SEQUENTIAL: // No recovery for persistent sequential b/c no way to verify // insertion after disconnect w/o application help return zk.create(path, data, acl, createMode); default: throw new IllegalArgumentException("Unrecognized CreateMode: " + createMode); } } @Override public void delete(String path, int version) throws InterruptedException, KeeperException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { zk.delete(path, version); return; } catch (KeeperException e) { switch (e.code()) { case NONODE: return; // Delete was successful case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper delete failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public Stat exists(String path, Watcher watcher) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.exists(path, watcher); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper exists failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public Stat exists(String path, boolean watch) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.exists(path, watch); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper exists failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public byte[] getData(String path, Watcher watcher, Stat stat) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.getData(path, watcher, stat); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper getData failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public byte[] getData(String path, boolean watch, Stat stat) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.getData(path, watch, stat); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper getData failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public Stat setData(String path, byte[] data, int version) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.setData(path, data, version); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper setData failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public List<String> getChildren(String path, Watcher watcher) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.getChildren(path, watcher); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper getChildren failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public List<String> getChildren(String path, boolean watch) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.getChildren(path, watch); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper getChildren failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } @Override public ZooKeeper.States getState() { return zk.getState(); } // ------------------------- Internal Helpers ------------------------ // private String createNonSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); while (true) { try { return zk.create(path, data, acl, createMode); } catch (KeeperException e) { switch (e.code()) { case NODEEXISTS: // Non-sequential node was successfully created return path; case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } private String createEphemeralSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode) throws KeeperException, InterruptedException { RetryCounter retryCounter = retryCounterFactory.create(); boolean first = true; while (true) { try { if (!first) { // Check if we succeeded on a previous attempt String myNode = findMyEphemeralSequentialNode(path); if (myNode != null) { return myNode; } } first = false; return zk.create(path, data, acl, createMode); } catch (KeeperException e) { switch (e.code()) { case CONNECTIONLOSS: case OPERATIONTIMEOUT: LOG.warn("Possibly transient ZooKeeper exception: " + e); if (!retryCounter.shouldRetry()) { LOG.error("ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries"); throw e; } break; default: throw e; } } LOG.info("Retrying ZooKeeper after sleeping..."); retryCounter.sleepUntilNextRetry(); retryCounter.useRetry(); } } private String findMyEphemeralSequentialNode(String path) throws KeeperException, InterruptedException { int lastSlashIdx = path.lastIndexOf('/'); assert(lastSlashIdx != -1); String parent = path.substring(0, lastSlashIdx); String nodePrefix = path.substring(lastSlashIdx+1); List<String> nodes = zk.getChildren(parent, false); List<String> matching = ZkUtil.filterByPrefix(nodes, nodePrefix); for (String node : matching) { String nodePath = parent + "/" + node; Stat stat = zk.exists(nodePath, false); if (stat != null && stat.getEphemeralOwner() == zk.getSessionId()) { return nodePath; } } return null; } private static class RetryCounterFactory { private final int maxRetries; private final int retryIntervalMillis; private RetryCounterFactory(int maxRetries, int retryIntervalMillis) { this.maxRetries = maxRetries; this.retryIntervalMillis = retryIntervalMillis; } public RetryCounter create() { return new RetryCounter( maxRetries, retryIntervalMillis, TimeUnit.MILLISECONDS ); } } private static class RetryCounter { private final int maxRetries; private int retriesRemaining; private final int retryIntervalMillis; private final TimeUnit timeUnit; private RetryCounter( int maxRetries, int retryIntervalMillis, TimeUnit timeUnit ) { this.maxRetries = maxRetries; this.retriesRemaining = maxRetries; this.retryIntervalMillis = retryIntervalMillis; this.timeUnit = timeUnit; } public int getMaxRetries() { return maxRetries; } public void sleepUntilNextRetry() throws InterruptedException { timeUnit.sleep(retryIntervalMillis); } public boolean shouldRetry() { return retriesRemaining > 0; } public void useRetry() { retriesRemaining--; } } }