/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.AvatarZKShell; import org.apache.hadoop.hdfs.AvatarZooKeeperClient; import org.apache.hadoop.hdfs.protocol.AvatarConstants.Avatar; import org.apache.hadoop.hdfs.protocol.AvatarConstants.StartupOption; import org.apache.hadoop.hdfs.protocol.AvatarConstants.ZookeeperKey; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.net.NetUtils; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; public class AvatarNodeZkUtil { private static final Log LOG = LogFactory.getLog(AvatarNodeZkUtil.class); /** * Verifies whether we are in a consistent state before we perform a failover * * @param startupConf * the startup configuration * @param confg * the current configuration * @param noverification * whether or not to skip some zookeeper based verification * @return the session id and last transaction id information from zookeeper * @throws IOException */ static ZookeeperTxId checkZooKeeperBeforeFailover(Configuration startupConf, Configuration confg, boolean noverification) throws IOException { AvatarZooKeeperClient zk = null; String fsname = startupConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY); int maxTries = startupConf.getInt("dfs.avatarnode.zk.retries", 3); Exception lastException = null; for (int i = 0; i < maxTries; i++) { try { zk = new AvatarZooKeeperClient(confg, null, false); LOG.info("Failover: Checking if the primary is empty"); String zkRegistry = zk.getPrimaryAvatarAddress(fsname, new Stat(), false, i > 0); if (zkRegistry != null) { throw new IOException( "Can't switch the AvatarNode to primary since " + "zookeeper record is not clean. Either use shutdownAvatar to kill " + "the current primary and clean the ZooKeeper entry, " + "or clear out the ZooKeeper entry if the primary is dead"); } if (noverification) { return null; } LOG.info("Failover: Obtaining last transaction id from ZK"); String address = startupConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY); long sessionId = zk.getPrimarySsId(address, i > 0); ZookeeperTxId zkTxId = zk.getPrimaryLastTxId(address, i > 0); if (sessionId != zkTxId.getSessionId()) { throw new IOException("Session Id in the ssid node : " + sessionId + " does not match the session Id in the txid node : " + zkTxId.getSessionId()); } return zkTxId; } catch (Exception e) { LOG.error("Got Exception reading primary node registration " + "from ZooKeeper. Will retry...", e); lastException = e; } finally { shutdownZkClient(zk); } } throw new IOException(lastException); } /** * Performs some operations after failover such as writing a new session id * and registering to zookeeper as the new primary. * * @param startupConf * the startup configuration * @param confg * the current configuration * @return the session id for the new node after failover * @throws IOException */ static long writeToZooKeeperAfterFailover(Configuration startupConf, Configuration confg) throws IOException { AvatarZooKeeperClient zk = null; // Register client port address. String address = startupConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY); String realAddress = confg.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY); int maxTries = startupConf.getInt("dfs.avatarnode.zk.retries", 3); for (int i = 0; i < maxTries; i++) { try { zk = new AvatarZooKeeperClient(confg, null, false); LOG.info("Failover: Registering to ZK as primary"); final boolean toOverwrite = true; zk.registerPrimary(address, realAddress, toOverwrite); registerClientProtocolAddress(zk, startupConf, confg, toOverwrite); registerDnProtocolAddress(zk, startupConf, confg, toOverwrite); registerHttpAddress(zk, startupConf, confg, toOverwrite); LOG.info("Failover: Writting session id to ZK"); return writeSessionIdToZK(startupConf, zk); } catch (Exception e) { LOG.error("Got Exception registering the new primary " + "with ZooKeeper. Will retry...", e); } finally { shutdownZkClient(zk); } } throw new IOException("Cannot connect to zk"); } /** * Writes the last transaction id of the primary avatarnode to zookeeper. */ static void writeLastTxidToZookeeper(long lastTxid, long totalBlocks, long totalInodes, long ssid, Configuration startupConf, Configuration confg) throws IOException { AvatarZooKeeperClient zk = null; LOG.info("Writing lastTxId: " + lastTxid + ", total blocks: " + totalBlocks + ", total inodes: " + totalInodes); if (lastTxid < 0) { LOG.warn("Invalid last transaction id : " + lastTxid + " skipping write to zookeeper."); return; } ZookeeperTxId zkTxid = new ZookeeperTxId(ssid, lastTxid, totalBlocks, totalInodes); int maxTries = startupConf.getInt("dfs.avatarnode.zk.retries", 3); for (int i = 0; i < maxTries; i++) { try { zk = new AvatarZooKeeperClient(confg, null, false); zk.registerLastTxId(startupConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY), zkTxid); return; } catch (Exception e) { LOG.error("Got Exception when syncing last txid to zk. Will retry...", e); } finally { shutdownZkClient(zk); } } throw new IOException("Cannot connect to zk"); } static long writeSessionIdToZK(Configuration conf, AvatarZooKeeperClient zk) throws IOException { long ssid = AvatarNode.now(); zk.registerPrimarySsId(conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY), ssid); return ssid; } /** * Generates a new session id for the cluster and writes it to zookeeper. Some * other data in zookeeper (like the last transaction id) is written to * zookeeper with the sessionId so that we can easily determine in which * session was this data written. The sessionId is unique since it uses the * current time. * * @return the session id that it wrote to ZooKeeper * @throws IOException */ static long writeSessionIdToZK(Configuration conf) throws IOException { AvatarZooKeeperClient zk = null; long ssid = -1; int maxTries = conf.getInt("dfs.avatarnode.zk.retries", 3); boolean mismatch = false; Long ssIdInZk = -1L; for (int i = 0; i < maxTries; i++) { try { zk = new AvatarZooKeeperClient(conf, null, false); ssid = writeSessionIdToZK(conf, zk); return ssid; } catch (Exception e) { LOG.error("Got Exception when writing session id to zk. Will retry...", e); } finally { shutdownZkClient(zk); } } if (mismatch) throw new IOException("Session Id in the NameNode : " + ssid + " does not match the session Id in Zookeeper : " + ssIdInZk); throw new IOException("Cannot connect to zk"); } /** * Obtain the registration of the primary from zk. */ static String getPrimaryRegistration(Configuration startupConf, Configuration conf, String fsname) throws IOException { AvatarZooKeeperClient zk = null; int maxTries = startupConf.getInt("dfs.avatarnode.zk.retries", 3); for (int i = 0; i < maxTries; i++) { try { zk = new AvatarZooKeeperClient(conf, null, false); String zkRegistry = zk.getPrimaryAvatarAddress(fsname, new Stat(), false); return zkRegistry; } catch (Exception e) { LOG.error( "Got Exception when reading primary registration. Will retry...", e); } finally { shutdownZkClient(zk); } } throw new IOException("Cannot connect to zk"); } // helpers static void shutdownZkClient(AvatarZooKeeperClient zk) { if (zk != null) { try { zk.shutdown(); } catch (InterruptedException e) { LOG.error("Error shutting down ZooKeeper client", e); } } } /** * This method tries to update the information in ZooKeeper For every address * of the NameNode it is being run for (fs.default.name, * dfs.namenode.dn-address, dfs.namenode.http.address) if they are present. It * also creates information for aliases in ZooKeeper for lists of strings in * fs.default.name.aliases, dfs.namenode.dn-address.aliases and * dfs.namenode.http.address.aliases * * Each address it transformed to the address of the zNode to be created by * substituting all . and : characters to /. The slash is also added in the * front to make it a valid zNode address. So dfs.domain.com:9000 will be * /dfs/domain/com/9000 * * If any part of the path does not exist it is created automatically */ public static void updateZooKeeper(Configuration originalConf, Configuration conf, boolean toOverwrite, String serviceName, String primaryInstance) throws IOException { String connection = conf.get(FSConstants.FS_HA_ZOOKEEPER_QUORUM); if (connection == null) return; AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null); if (registerClientProtocolAddress(zk, originalConf, conf, toOverwrite)) { return; } registerDnProtocolAddress(zk, originalConf, conf, toOverwrite); registerHttpAddress(zk, originalConf, conf, toOverwrite); for (ZookeeperKey key : ZookeeperKey.values()) { zk.registerPrimary(getZnodeName(conf, serviceName, Avatar.ACTIVE, key), key.getIpPortString(conf), true); } if(primaryInstance.equalsIgnoreCase(StartupOption.NODEZERO.getName())){ primaryInstance = StartupOption.NODEONE.getName(); } else { primaryInstance = StartupOption.NODEZERO.getName(); } Configuration tempConf = AvatarZKShell.updateConf(primaryInstance, originalConf); for (ZookeeperKey key : ZookeeperKey.values()) { zk.registerPrimary(getZnodeName(tempConf, serviceName, Avatar.STANDBY, key), key.getIpPortString(tempConf), true); } } private static String getZnodeName(Configuration conf, String serviceName, Avatar primaryOrStandby, ZookeeperKey typeOfAddress) { return (conf.get(FSConstants.DFS_CLUSTER_NAME, "no-cluster") + "/" + (serviceName == null ? "no-service" : serviceName) + "/" + primaryOrStandby.toString() + typeOfAddress).toLowerCase(); } /** * Registers namenode's address in zookeeper */ private static boolean registerClientProtocolAddress(AvatarZooKeeperClient zk, Configuration originalConf, Configuration conf, boolean toOverwrite) throws UnsupportedEncodingException, IOException { LOG.info("Updating Client Address information in ZooKeeper"); InetSocketAddress addr = NameNode.getClientProtocolAddress(conf); if (addr == null) { LOG.error(FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY + " for primary service is not defined"); return true; } InetSocketAddress defaultAddr = NameNode.getClientProtocolAddress(originalConf); if (defaultAddr == null) { LOG.error(FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY + " for default service is not defined"); return true; } registerSocketAddress(zk, originalConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY), conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY), toOverwrite); /** TODO later: need to handle alias leave it as it is now */ registerAliases(zk, conf, FSConstants.FS_NAMENODE_ALIASES, conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY), toOverwrite); return false; } /** * Registers the datanode protocol address in the zookeeper */ private static void registerDnProtocolAddress(AvatarZooKeeperClient zk, Configuration originalConf, Configuration conf, boolean toOverwrite) throws UnsupportedEncodingException, IOException { LOG.info("Updating Service Address information in ZooKeeper"); registerSocketAddress(zk, originalConf.get(NameNode.DATANODE_PROTOCOL_ADDRESS), conf.get(NameNode.DATANODE_PROTOCOL_ADDRESS), toOverwrite); registerAliases(zk, conf, FSConstants.DFS_NAMENODE_DN_ALIASES, conf.get(NameNode.DATANODE_PROTOCOL_ADDRESS), toOverwrite); } /** * Registers the http address of the namenode in the zookeeper */ private static void registerHttpAddress(AvatarZooKeeperClient zk, Configuration originalConf, Configuration conf, boolean toOverwrite) throws UnsupportedEncodingException, IOException { LOG.info("Updating Http Address information in ZooKeeper"); String addr = conf.get(FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY); String defaultAddr = originalConf .get(FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY); registerSocketAddress(zk, defaultAddr, addr, toOverwrite); registerAliases(zk, conf, FSConstants.DFS_HTTP_ALIASES, addr, toOverwrite); } private static void registerAliases(AvatarZooKeeperClient zk, Configuration conf, String key, String value, boolean toOverwrite) throws UnsupportedEncodingException, IOException { String[] aliases = conf.getStrings(key); if (aliases == null) { return; } for (String alias : aliases) { zk.registerPrimary(alias, value, toOverwrite); } } public static String toIpPortString(InetSocketAddress addr) { return addr.getAddress().getHostAddress() + ":" + addr.getPort(); } private static void registerSocketAddress(AvatarZooKeeperClient zk, String key, String value, boolean toOverwrite) throws UnsupportedEncodingException, IOException { if (key == null || value == null) { return; } zk.registerPrimary(key, value, toOverwrite); } public static void clearZookeeper(Configuration originalConf, Configuration conf, String serviceName) throws IOException { String connection = conf.get(FSConstants.FS_HA_ZOOKEEPER_QUORUM); if (connection == null) { return; } AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null); // Clear NameNode address in ZK InetSocketAddress defaultAddr; String[] aliases; defaultAddr = NameNode.getClientProtocolAddress(originalConf); String defaultName = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); LOG.info("Clear Client Address information in ZooKeeper: " + defaultName); zk.clearPrimary(defaultName); aliases = conf.getStrings(FSConstants.FS_NAMENODE_ALIASES); if (aliases != null) { for (String alias : aliases) { zk.clearPrimary(alias); } } LOG.info("Clear Service Address information in ZooKeeper"); defaultAddr = NameNode.getDNProtocolAddress(originalConf); if (defaultAddr != null) { String defaultServiceName = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); zk.clearPrimary(defaultServiceName); } aliases = conf.getStrings(FSConstants.DFS_NAMENODE_DN_ALIASES); if (aliases != null) { for (String alias : aliases) { zk.clearPrimary(alias); } } LOG.info("Clear Http Address information in ZooKeeper"); // Clear http address in ZK // Stolen from NameNode so we have the same code in both places defaultAddr = NetUtils.createSocketAddr(originalConf .get(FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY)); String defaultHttpAddress = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); zk.clearPrimary(defaultHttpAddress); aliases = conf.getStrings(FSConstants.DFS_HTTP_ALIASES); if (aliases != null) { for (String alias : aliases) { zk.clearPrimary(alias); } } for(Avatar avatar : Avatar.avatars) { for (ZookeeperKey key : ZookeeperKey.values()) { zk.clearPrimary(getZnodeName(conf, serviceName, avatar, key)); } } } public static void printZookeeperEntries(Configuration originalConf, Configuration conf, String serviceName, PrintStream outputStream) throws IOException, KeeperException, InterruptedException { String connection = conf.get(FSConstants.FS_HA_ZOOKEEPER_QUORUM); if (connection == null) return; AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null); outputStream.println("ZooKeeper entries:"); // client protocol InetSocketAddress defaultAddr = NameNode.getClientProtocolAddress(originalConf); String defaultName = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); outputStream.println("Default name is " + defaultName); String registration = zk.getPrimaryAvatarAddress(defaultName, new Stat(), false); outputStream.println("Primary node according to ZooKeeper: " + registration); // datanode protocol defaultAddr = NameNode.getDNProtocolAddress(originalConf); defaultName = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); registration = zk.getPrimaryAvatarAddress(defaultName, new Stat(), false); outputStream.println("Primary node DN protocol : " + registration); // http address defaultAddr = NetUtils.createSocketAddr(originalConf.get("dfs.http.address")); defaultName = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); registration = zk.getPrimaryAvatarAddress(defaultName, new Stat(), false); outputStream.println("Primary node http address : " + registration); for (Avatar anAvatar : Avatar.avatars) { outputStream.println(anAvatar + " entries: "); for (ZookeeperKey key : ZookeeperKey.values()) { String keyInZookeeper = getZnodeName(conf, serviceName, anAvatar, key); outputStream.println(keyInZookeeper + " : " + zk.getPrimaryAvatarAddress(keyInZookeeper, new Stat(), false)); } } } }