/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.io.FileOutputStream; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.Properties; import java.util.ArrayList; import java.net.InetSocketAddress; import junit.framework.Assert; import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; import org.apache.zookeeper.server.persistence.FileTxnSnapLog; import org.apache.zookeeper.server.NIOServerCnxn; import org.apache.zookeeper.server.ServerConfig; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.AvatarConstants; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.server.common.HdfsConstants; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.namenode.AvatarNode; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.datanode.AvatarDataNode; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.net.StaticMapping; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.DNSToSwitchMapping; /** * This class manages a Avatar/HDFS cluster with all nodes running * locally. * To synchronize the AvatarNodes, it uses a local ZooKeeper * server. */ public class MiniAvatarCluster { public static final String NAMESERVICE_ID_PREFIX = "nameserviceId"; public static int currNSId = 0; public static class DataNodeProperties { public AvatarDataNode datanode; public Configuration conf; public String[] dnArgs; DataNodeProperties(AvatarDataNode node, Configuration conf, String[] args) { this.datanode = node; this.conf = conf; this.dnArgs = args; } } private static enum AvatarState { ACTIVE, STANDBY, DEAD } public static class AvatarInfo { public AvatarNode avatar; AvatarState state; int nnPort; int nnDnPort; int httpPort; int rpcPort; String startupOption; AvatarInfo(AvatarNode avatar, AvatarState state, int nnPort, int nnDnPort, int httpPort, int rpcPort, String startupOption) { this.avatar = avatar; this.state = state; this.nnPort = nnPort; this.nnDnPort = nnDnPort; this.httpPort = httpPort; this.rpcPort = rpcPort; this.startupOption = startupOption; } } private static final Log LOG = LogFactory.getLog(MiniAvatarCluster.class); private static final String DEFAULT_TEST_DIR = "build/contrib/highavailability/test/data"; private static final String TEST_DIR = new File(System.getProperty("test.build.data", DEFAULT_TEST_DIR)). getAbsolutePath(); private static final String ZK_DATA_DIR = TEST_DIR + "/zk.data"; private static final String ZK_CONF_FILE = TEST_DIR + "/zk.conf"; private static final int zkClientPort = MiniDFSCluster.getFreePort(); private static String baseAvatarDir; private static String dataDir; private int numDataNodes; private boolean format; private String[] racks; private String[] hosts; private boolean federation; private NameNodeInfo[] nameNodes; private Configuration conf; public class NameNodeInfo { Configuration conf; public ArrayList<AvatarInfo> avatars = null; private final String fsimagelocal0Dir; private final String fsimagelocal1Dir; private final String fsimage0Dir; private final String fsimage1Dir; private final String fseditslocal0Dir; private final String fseditslocal1Dir; private final String fsedits0Dir; private final String fsedits1Dir; private final int nnPort; private final int nn0Port; private final int nn1Port; private final int nnDnPort; private final int nnDn0Port; private final int nnDn1Port; private final int httpPort; private final int http0Port; private final int http1Port; private final int rpcPort; private final int rpc0Port; private final int rpc1Port; private Configuration clientConf; private Configuration a0Conf; private Configuration a1Conf; private final String avatarDir; String nameserviceId; NameNodeInfo(int nnIndex) { avatarDir = baseAvatarDir; fsimagelocal0Dir = avatarDir + "/fsimagelocal0"; fsimagelocal1Dir = avatarDir + "/fsimagelocal1"; fsimage0Dir = avatarDir + "/fsimage0"; fsimage1Dir = avatarDir + "/fsimage1"; fseditslocal0Dir = avatarDir + "/fseditslocal0"; fseditslocal1Dir = avatarDir + "/fseditslocal1"; fsedits0Dir = avatarDir + "/fsedits0"; fsedits1Dir = avatarDir + "/fsedits1"; rpcPort = nnPort = MiniDFSCluster.getFreePort(); nnDnPort = MiniDFSCluster.getFreePort(); httpPort = MiniDFSCluster.getFreePort(); rpc0Port = nn0Port = MiniDFSCluster.getFreePorts(2); nnDn0Port = MiniDFSCluster.getFreePort(); http0Port = MiniDFSCluster.getFreePort(); rpc1Port = nn1Port = MiniDFSCluster.getFreePorts(2); nnDn1Port = MiniDFSCluster.getFreePort(); http1Port = MiniDFSCluster.getFreePort(); } public void setAvatarNodes(ArrayList<AvatarInfo> avatars) { this.avatars = avatars; } public void initClientConf(Configuration conf) { clientConf = new Configuration(conf); clientConf.set("fs.default.name", "hdfs://localhost:" + nnPort); clientConf.set("fs.default.name0", "hdfs://localhost:" + nn0Port); clientConf.set("fs.default.name1", "hdfs://localhost:" + nn1Port); clientConf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort); clientConf.set("dfs.namenode.dn-address0", "localhost:" + nnDn0Port); clientConf.set("dfs.namenode.dn-address1", "localhost:" + nnDn1Port); clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedAvatarFileSystem"); clientConf.setBoolean("fs.hdfs.impl.disable.cache", true); // Lower the number of retries to close connections quickly. clientConf.setInt("ipc.client.connect.max.retries", 3); } public void initGeneralConf(Configuration conf, String nameserviceId) { // overwrite relevant settings initClientConf(conf); this.nameserviceId = nameserviceId; // avatar nodes if (federation) { conf.set("dfs.namenode.rpc-address0", "localhost:" + rpc0Port); conf.set("dfs.namenode.rpc-address1", "localhost:" + rpc1Port); } else { conf.set("fs.default.name", "hdfs://localhost:" + nnPort); conf.set("fs.default.name0", "hdfs://localhost:" + nn0Port); conf.set("fs.default.name1", "hdfs://localhost:" + nn1Port); conf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort); conf.set("dfs.http.address", "localhost:" + httpPort); } // Enable avatar testing framework for unit tests. conf.setFloat("dfs.avatarnode.failover.sample.percent", 1.0f); conf.set("dfs.namenode.dn-address0", "localhost:" + nnDn0Port); conf.set("dfs.namenode.dn-address1", "localhost:" + nnDn1Port); conf.set("dfs.http.address0", "localhost:" + http0Port); conf.set("dfs.http.address1", "localhost:" + http1Port); conf.set("dfs.name.dir.shared0", fsimage0Dir); conf.set("dfs.name.dir.shared1", fsimage1Dir); conf.set("dfs.name.edits.dir.shared0", fsedits0Dir); conf.set("dfs.name.edits.dir.shared1", fsedits1Dir); conf.setInt("dfs.safemode.extension", 1000); // These two ipc parameters help RPC connections to shut down quickly in // unit tests. conf.setInt("ipc.client.connect.max.retries", 3); conf.setInt("ipc.client.connect.timeout", 2000); // We need to disable the filesystem cache so that unit tests and // MiniAvatarCluster don't end up sharing FileSystem objects. if (federation) { for (String key: AvatarNode.AVATARSERVICE_SPECIFIC_KEYS) { String value = conf.get(key); if (value != null) { String newKey = DFSUtil.getNameServiceIdKey(key, nameserviceId); conf.set(newKey, value); conf.set(key, ""); } } String rpcKey = DFSUtil.getNameServiceIdKey( AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId); conf.set(rpcKey, "localhost:" + rpcPort); String dnKey = DFSUtil.getNameServiceIdKey( NameNode.DATANODE_PROTOCOL_ADDRESS, nameserviceId); conf.set(dnKey, "localhost:" + nnDnPort); String httpKey = DFSUtil.getNameServiceIdKey( NameNode.DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId); conf.set(httpKey, "localhost:" + httpPort); } } public void updateAvatarConf(Configuration newConf) { conf = new Configuration(newConf); if (federation) { conf.set(FSConstants.DFS_FEDERATION_NAMESERVICE_ID, nameserviceId); } // server config for avatar nodes a0Conf = new Configuration(conf); a1Conf = new Configuration(conf); a0Conf.set("dfs.name.dir", fsimagelocal0Dir); a0Conf.set("dfs.name.edits.dir", fseditslocal0Dir); a0Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint0"); a1Conf.set("dfs.name.dir", fsimagelocal1Dir); a1Conf.set("dfs.name.edits.dir", fseditslocal1Dir); a1Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint1"); } public void createAvatarDirs() { new File(fsimagelocal0Dir).mkdirs(); new File(fsimagelocal1Dir).mkdirs(); new File(fsimage0Dir).mkdirs(); new File(fsimage1Dir).mkdirs(); new File(fseditslocal0Dir).mkdirs(); new File(fseditslocal1Dir).mkdirs(); new File(fsedits0Dir).mkdirs(); new File(fsedits1Dir).mkdirs(); } public void cleanupAvatarDirs() throws IOException { String[] files = new String[] {fsimagelocal0Dir, fsimagelocal1Dir, fsimage0Dir, fsimage1Dir, fseditslocal0Dir, fseditslocal1Dir, fsedits0Dir, fsedits1Dir }; for (String filename : files) { FileUtil.fullyDelete(new File(filename)); } } } private static ZooKeeperServer zooKeeper; private static NIOServerCnxn.Factory cnxnFactory; private ArrayList<DataNodeProperties> dataNodes = new ArrayList<DataNodeProperties>(); public MiniAvatarCluster(Configuration conf, int numDataNodes, boolean format, String[] racks, String[] hosts) throws IOException, ConfigException, InterruptedException { this(conf, numDataNodes, format, racks, hosts, 1, false); } /** * Modify the config and start up the servers. The rpc and info ports for * servers are guaranteed to use free ports. * <p> * NameNode and DataNode directory creation and configuration will be * managed by this class. * * @param conf the base configuration to use in starting the servers. This * will be modified as necessary. * @param numDataNodes Number of DataNodes to start; may be zero * @param format if true, format the NameNode and DataNodes before starting up * @param racks array of strings indicating the rack that each DataNode is on * @param hosts array of strings indicating the hostname of each DataNode * @param numNameNodes Number of NameNodes to start; * @param federation if true, we start it with federation configure; */ public MiniAvatarCluster(Configuration conf, int numDataNodes, boolean format, String[] racks, String[] hosts, int numNameNodes, boolean federation) throws IOException, ConfigException, InterruptedException { final String testDir = TEST_DIR + "/" + conf.get(MiniDFSCluster.DFS_CLUSTER_ID, ""); baseAvatarDir = testDir + "/avatar"; dataDir = testDir + "/data"; this.conf = conf; this.numDataNodes = numDataNodes; this.format = format; this.racks = racks; this.hosts = hosts; conf.setInt("dfs.secondary.info.port", 0); conf.set("fs.ha.zookeeper.prefix", "/hdfs"); conf.set("fs.ha.zookeeper.quorum", "localhost:" + zkClientPort); // datanodes conf.set("dfs.datanode.address", "localhost:0"); conf.set("dfs.datanode.http.address", "localhost:0"); conf.set("dfs.datanode.ipc.address", "localhost:0"); conf.set("dfs.datanode.dns.interface", "lo"); conf.set("dfs.namenode.dns.interface", "lo"); // other settings conf.setBoolean("dfs.permissions", false); conf.setBoolean("dfs.persist.blocks", true); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedAvatarFileSystem"); conf.setLong("dfs.blockreport.initialDelay", 0); conf.setClass("topology.node.switch.mapping.impl", StaticMapping.class, DNSToSwitchMapping.class); this.federation = federation; Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf); if(nameserviceIds.size() > 1) this.federation = true; if (!federation && numNameNodes != 1) { throw new IOException("Only 1 namenode is allowed in non-federation cluster."); } nameNodes = new NameNodeInfo[numNameNodes]; for (int nnIndex = 0; nnIndex < numNameNodes; nnIndex++) { nameNodes[nnIndex] = new NameNodeInfo(nnIndex); if (format) nameNodes[nnIndex].cleanupAvatarDirs(); nameNodes[nnIndex].createAvatarDirs(); } if (!federation) { nameNodes[0].initGeneralConf(conf, null); } else { if (nameserviceIds.isEmpty()) { for (int i = 0; i < nameNodes.length; i++) { nameserviceIds.add(NAMESERVICE_ID_PREFIX + getNSId()); } } initFederationConf(conf, nameserviceIds); } if (this.format) { File data_dir = new File(dataDir); if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) { throw new IOException("Cannot remove data directory: " + data_dir); } } // Need to start datanodes before avatarnodes, since the primary starts up // in safemode and when the standby starts up, it waits for the primary to // exit safemode. So if we start avatarnodes first with non-empty FSImage // and FSEdits, the primary avatar would wait for datanode block reports and // the standby would wait for the primary to exit safemode and since we // wouldn't return from the standby initialization we would never start the // datanodes and hence we enter a deadlock. registerZooKeeperNodes(); startDataNodes(); startAvatarNodes(); waitAvatarNodesActive(); waitDataNodesActive(); waitExitSafeMode(); } private void initFederationConf(Configuration conf, Collection<String> nameserviceIds) { String nameserviceIdList = ""; int nnIndex = 0; for (String nameserviceId : nameserviceIds) { // Create comma separated list of nameserviceIds if (nameserviceIdList.length() > 0) { nameserviceIdList += ","; } nameserviceIdList += nameserviceId; nameNodes[nnIndex].initGeneralConf(conf, nameserviceId); nnIndex++; } conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIdList); } private static ServerConfig createZooKeeperConf() throws IOException, ConfigException { // create conf file File zkConfDir = new File(TEST_DIR); zkConfDir.mkdirs(); File zkConfFile = new File(ZK_CONF_FILE); zkConfFile.delete(); zkConfFile.createNewFile(); Properties zkConfProps = new Properties(); zkConfProps.setProperty("tickTime", "2000"); zkConfProps.setProperty("dataDir", ZK_DATA_DIR); zkConfProps.setProperty("clientPort", new Integer(zkClientPort).toString()); zkConfProps.setProperty("maxClientCnxns", "30"); zkConfProps.store(new FileOutputStream(zkConfFile), ""); // create config object ServerConfig zkConf = new ServerConfig(); zkConf.parse(ZK_CONF_FILE); return zkConf; } private static ServerConfig getZooKeeperConf() throws Exception { if (new File(ZK_CONF_FILE).exists()) { ServerConfig zkConf = new ServerConfig(); zkConf.parse(ZK_CONF_FILE); return zkConf; } else { return createZooKeeperConf(); } } public static boolean clearZooKeeperData() throws Exception { ServerConfig zkConf = getZooKeeperConf(); File dataLogDir = new File(zkConf.getDataLogDir()); File dataDir = new File(zkConf.getDataDir()); return (FileUtil.fullyDelete(dataLogDir) && FileUtil.fullyDelete(dataDir)); } public static void createAndStartZooKeeper() throws IOException, ConfigException, InterruptedException { ServerConfig zkConf = createZooKeeperConf(); zooKeeper = new ZooKeeperServer(); FileTxnSnapLog ftxn = new FileTxnSnapLog(new File(zkConf.getDataLogDir()), new File(zkConf.getDataDir())); zooKeeper.setTxnLogFactory(ftxn); zooKeeper.setTickTime(zkConf.getTickTime()); zooKeeper.setMinSessionTimeout(zkConf.getMinSessionTimeout()); zooKeeper.setMaxSessionTimeout(zkConf.getMaxSessionTimeout()); cnxnFactory = new NIOServerCnxn.Factory(zkConf.getClientPortAddress(), zkConf.getMaxClientCnxns()); cnxnFactory.startup(zooKeeper); } private void registerZooKeeperNode(int nnPrimaryPort, int nnDnPrimaryPort, int httpPrimaryPort, int rpcPrimaryPort, NameNodeInfo nni) throws IOException { AvatarZooKeeperClient zkClient = new AvatarZooKeeperClient(nni.conf, null); zkClient.registerPrimary("localhost:" + nni.nnPort, "localhost:" + nnPrimaryPort); zkClient.registerPrimary("localhost:" + nni.nnDnPort, "localhost:" + nnDnPrimaryPort); zkClient.registerPrimary("localhost:" + nni.httpPort, "localhost:" + httpPrimaryPort); zkClient.registerPrimary("localhost:" + nni.rpcPort, "localhost:" + rpcPrimaryPort); try { zkClient.shutdown(); } catch (InterruptedException ie) { throw new IOException("zkClient.shutdown() interrupted"); } LOG.info("Closed zk client connection for registerZookeeper"); } void clearZooKeeperNode(int nnIndex) throws IOException { NameNodeInfo nni = this.nameNodes[nnIndex]; AvatarZooKeeperClient zkClient = new AvatarZooKeeperClient(nni.conf, null); zkClient.clearPrimary("localhost:" + nni.httpPort); zkClient.clearPrimary("localhost:" + nni.nnPort); zkClient.clearPrimary("localhost:" + nni.nnDnPort); zkClient.clearPrimary("localhost:" + nni.rpcPort); try { zkClient.shutdown(); } catch (InterruptedException ie) { throw new IOException("zkClient.shutdown() interrupted"); } LOG.info("Closed zk client connection for clearZKNode"); } static Configuration getServerConf(String startupOption, NameNodeInfo nni) { // namenode should use DFS, not DAFS if (startupOption. equals(AvatarConstants.StartupOption.NODEZERO.getName())) { return new Configuration(nni.a0Conf); } else if (startupOption. equals(AvatarConstants.StartupOption.NODEONE.getName())) { return new Configuration(nni.a1Conf); } else { throw new IllegalArgumentException("invalid avatar"); } } private void registerZooKeeperNodes() throws IOException { for (NameNodeInfo nni : this.nameNodes) { nni.updateAvatarConf(this.conf); registerZooKeeperNode(nni.nn0Port, nni.nnDn0Port, nni.http0Port, nni.rpc0Port, nni); } } private void startAvatarNodes() throws IOException { for (NameNodeInfo nni: this.nameNodes) { nni.updateAvatarConf(this.conf); startAvatarNode(nni, null); } } private void startAvatarNode(NameNodeInfo nni, StartupOption operation) throws IOException { registerZooKeeperNode(nni.nn0Port, nni.nnDn0Port, nni.http0Port, nni.rpc0Port, nni); if (format) { LOG.info("formatting"); // Start the NameNode String[] a0FormatArgs; ArrayList<String> argList = new ArrayList<String>(); argList.add(AvatarConstants.StartupOption. NODEZERO.getName()); argList.add(AvatarConstants.StartupOption. FORMATFORCE.getName()); if (federation) { argList.add(StartupOption.SERVICE.getName()); argList.add(nni.nameserviceId); } a0FormatArgs = new String[argList.size()]; argList.toArray(a0FormatArgs); AvatarNode.createAvatarNode(a0FormatArgs, getServerConf(AvatarConstants.StartupOption. NODEZERO.getName(), nni)); } ArrayList<AvatarInfo> avatars = new ArrayList<AvatarInfo>(2); { LOG.info("starting avatar 0"); String[] a0Args; ArrayList<String> argList = new ArrayList<String>(); if (operation != null) { argList.add(operation.getName()); } argList.add(AvatarConstants.StartupOption.NODEZERO.getName()); if (federation) { argList.add(StartupOption.SERVICE.getName()); argList.add(nni.nameserviceId); } a0Args = new String[argList.size()]; argList.toArray(a0Args); AvatarNode a0 = AvatarNode. createAvatarNode(a0Args, getServerConf(AvatarConstants. StartupOption. NODEZERO. getName(), nni)); avatars.add(new AvatarInfo(a0, AvatarState.ACTIVE, nni.nn0Port, nni.nnDn0Port, nni.http0Port, nni.rpc0Port, AvatarConstants.StartupOption.NODEZERO. getName())); // wait for up to 10 seconds until the ACTIVE is initialized for (int i = 0; i < 10; i++) { if (a0.isInitialized()) break; LOG.info("Waiting for the ACTIVE to be initialized..."); try { Thread.sleep(1000); } catch (InterruptedException e) { throw new IOException( "Received interruption when initializing ACTIVE node"); } } if (!a0.isInitialized()) { throw new IOException("The ACTIVE cannot be initialized"); } } { LOG.info("starting avatar 1"); String[] a1Args; ArrayList<String> argList = new ArrayList<String>(); argList.add(AvatarConstants.StartupOption.NODEONE.getName()); argList.add(AvatarConstants.StartupOption.STANDBY.getName()); argList.add(AvatarConstants.StartupOption.REGULAR.getName()); if (federation) { argList.add(StartupOption.SERVICE.getName()); argList.add(nni.nameserviceId); } a1Args = new String[argList.size()]; argList.toArray(a1Args); avatars.add(new AvatarInfo(AvatarNode. createAvatarNode(a1Args, getServerConf(AvatarConstants. StartupOption. NODEONE. getName(), nni)), AvatarState.STANDBY, nni.nn1Port, nni.nnDn1Port, nni.http1Port, nni.rpc1Port, AvatarConstants.StartupOption.NODEONE. getName())); } for (AvatarInfo avatar: avatars) { if (avatar.avatar == null) { throw new IOException("Cannot create avatar nodes"); } Assert.assertTrue( avatar.avatar.getConf().getBoolean("dfs.persist.blocks", false)); } nni.setAvatarNodes(avatars); DFSUtil.setGenericConf(nni.conf, nni.nameserviceId, AvatarNode.AVATARSERVICE_SPECIFIC_KEYS); nni.updateAvatarConf(nni.conf); } public void restartAvatarNodes() throws Exception { shutDownAvatarNodes(); for (NameNodeInfo nni : this.nameNodes) { nni.avatars.clear(); } this.format = false; Thread.sleep(10000); startAvatarNodes(); waitAvatarNodesActive(); waitDataNodesActive(); waitExitSafeMode(); } public void shutDownDataNode(int i) throws IOException, InterruptedException { dataNodes.get(i).datanode.shutdown(); } public void shutDownDataNodes() throws IOException, InterruptedException { for (int i = 0; i < dataNodes.size(); i++) { LOG.info("shutting down data node " + i); shutDownDataNode(i); LOG.info("data node " + i + " shut down"); } } public void shutDownAvatarNodes() throws IOException, InterruptedException { for (NameNodeInfo nni : this.nameNodes) { for (AvatarInfo avatar: nni.avatars) { if (avatar.state == AvatarState.ACTIVE || avatar.state == AvatarState.STANDBY) { LOG.info("shutdownAvatar"); avatar.avatar.shutdown(true); } } } try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } public static void shutDownZooKeeper() throws IOException, InterruptedException { cnxnFactory.shutdown(); cnxnFactory.join(); LOG.info("Zookeeper Connection Factory shutdown"); if (zooKeeper.isRunning()) { zooKeeper.shutdown(); } LOG.info("Zookeepr Server shutdown"); } /** * Shut down the cluster */ public void shutDown() throws IOException, InterruptedException { System.out.println("Shutting down the Mini Avatar Cluster"); // this doesn't work, so just leave the datanodes running, // they won't interfere with the next run shutDownDataNodes(); shutDownAvatarNodes(); } private void startDataNodes() throws IOException { if (racks != null && numDataNodes > racks.length ) { throw new IllegalArgumentException( "The length of racks [" + racks.length + "] is less than the number " + "of datanodes [" + numDataNodes + "]."); } if (hosts != null && numDataNodes > hosts.length ) { throw new IllegalArgumentException( "The length of hosts [" + hosts.length + "] is less than the number " + "of datanodes [" + numDataNodes + "]."); } //Generate some hostnames if required if (racks != null && hosts == null) { LOG.info("Generating host names for datanodes"); hosts = new String[numDataNodes]; for (int i = 0; i < numDataNodes; i++) { hosts[i] = "host" + i + ".foo.com"; } } String[] dnArgs = { HdfsConstants.StartupOption.REGULAR.getName() }; for (int i = 0; i < numDataNodes; i++) { Configuration dnConf = new Configuration(conf); File dir1 = new File(dataDir, "data"+(2*i+1)); File dir2 = new File(dataDir, "data"+(2*i+2)); dir1.mkdirs(); dir2.mkdirs(); if (!dir1.isDirectory() || !dir2.isDirectory()) { throw new IOException("Mkdirs failed to create directory for DataNode " + i + ": " + dir1 + " or " + dir2); } dnConf.set("dfs.data.dir", dir1.getPath() + "," + dir2.getPath()); LOG.info("Starting DataNode " + i + " with dfs.data.dir: " + dnConf.get("dfs.data.dir")); if (hosts != null) { dnConf.set("slave.host.name", hosts[i]); LOG.info("Starting DataNode " + i + " with hostname set to: " + dnConf.get("slave.host.name")); } if (racks != null) { String name = hosts[i]; LOG.info("Adding node with hostname : " + name + " to rack "+ racks[i]); StaticMapping.addNodeToRack(name, racks[i]); } Configuration newconf = new Configuration(dnConf); // save config if (hosts != null) { NetUtils.addStaticResolution(hosts[i], "localhost"); } AvatarDataNode dn = AvatarDataNode.instantiateDataNode(dnArgs, dnConf); //since the HDFS does things based on IP:port, we need to add the mapping //for IP:port to rackId String ipAddr = dn.getSelfAddr().getAddress().getHostAddress(); if (racks != null) { int port = dn.getSelfAddr().getPort(); System.out.println("Adding node with IP:port : " + ipAddr + ":" + port+ " to rack " + racks[i]); StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i]); } dn.runDatanodeDaemon(); dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs)); } } public void waitAvatarNodesActive() { for (int nnIndex = 0; nnIndex < this.nameNodes.length; nnIndex++) { waitAvatarNodesActive(nnIndex); } } public void waitAvatarNodesActive(int nnIndex) { NameNodeInfo nni = this.nameNodes[nnIndex]; for (AvatarInfo avatar: nni.avatars) { while (avatar.avatar.getNameNodeDNAddress() == null) { try { LOG.info("waiting for avatar"); Thread.sleep(200); } catch (InterruptedException ignore) { // do nothing } } } } /* wait Datanodes active for all namespaces */ public void waitDataNodesActive() throws IOException { for (int nnIndex = 0; nnIndex < this.nameNodes.length; nnIndex++) { waitDataNodesActive(nnIndex); } } /* wait Datanodes active for specific namespaces */ public void waitDataNodesActive(int nnIndex) throws IOException { DistributedAvatarFileSystem dafs = null; int liveDataNodes = 0; // make sure all datanodes are alive while(liveDataNodes != numDataNodes) { try { dafs = getFileSystem(nnIndex); LOG.info("waiting for data nodes... "); Thread.sleep(200); LOG.info("waiting for data nodes : live=" + liveDataNodes + ", total=" + numDataNodes); liveDataNodes = dafs.getLiveDataNodeStats(false).length; } catch (Exception e) { LOG.warn("Exception waiting for datanodes : ", e); } finally { if (dafs != null) { dafs.close(); } } } } private void checkSingleNameNode() { if (nameNodes.length != 1) { throw new IllegalArgumentException("It's not a single namenode cluster, use index instead."); } } public AvatarInfo getPrimaryAvatar(int nnIndex) { return getAvatarByState(nnIndex, AvatarState.ACTIVE); } public AvatarInfo getStandbyAvatar(int nnIndex) { return getAvatarByState(nnIndex, AvatarState.STANDBY); } private AvatarInfo getDeadAvatar(int nnIndex) { return getAvatarByState(nnIndex, AvatarState.DEAD); } private AvatarInfo getAvatarByState(int nnIndex, AvatarState state) { for (AvatarInfo avatar: this.nameNodes[nnIndex].avatars) { if (avatar.state == state) { return avatar; } } return null; } /** * Return true if primary avatar has left safe mode */ private boolean hasLeftSafeMode(int nnIndex) throws IOException { AvatarInfo primary = getPrimaryAvatar(nnIndex); return (primary != null && !primary.avatar.isInSafeMode() && primary.avatar.getStats()[0] != 0); } private void waitExitSafeMode() throws IOException { for (int nnIndex=0; nnIndex < this.nameNodes.length; nnIndex++) { // make sure all datanodes are alive while(!hasLeftSafeMode(nnIndex)) { try { LOG.info("waiting until avatar0 has left safe mode"); Thread.sleep(50); } catch (InterruptedException ignore) { // do nothing } } } } public DistributedAvatarFileSystem getFileSystem() throws IOException { checkSingleNameNode(); return getFileSystem(0); } /** * Get DAFS. */ public DistributedAvatarFileSystem getFileSystem(int nnIndex) throws IOException { FileSystem fs = FileSystem .get(this.nameNodes[nnIndex].clientConf); if (!(fs instanceof DistributedAvatarFileSystem)) { throw new IOException("fs is not avatar fs"); } return (DistributedAvatarFileSystem) fs; } /** * Kill the primary avatar node. * @param updateZK clear zookeeper? */ public void killPrimary() throws IOException { checkSingleNameNode(); killPrimary(0, true); } public void killPrimary(int nnIndex) throws IOException { killPrimary(nnIndex, true); } public void killPrimary(boolean clearZK) throws IOException { checkSingleNameNode(); killPrimary(0, clearZK); } /** * Kill the primary avatar node. * @param clearZK clear zookeeper? */ public void killPrimary(int nnIndex, boolean clearZK) throws IOException { AvatarInfo primary = getPrimaryAvatar(nnIndex); if (primary != null) { if (clearZK) { clearZooKeeperNode(nnIndex); } primary.avatar.shutdown(true); primary.avatar = null; primary.state = AvatarState.DEAD; try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } else { throw new IOException("can't kill primary avatar, already dead"); } } public void killStandby() throws IOException { checkSingleNameNode(); killStandby(0); } /** * Kill the standby avatar node. */ public void killStandby(int nnIndex) throws IOException { AvatarInfo standby = getStandbyAvatar(nnIndex); if (standby != null) { standby.avatar.shutdown(true); standby.avatar = null; standby.state = AvatarState.DEAD; try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } else { LOG.info("can't kill standby avatar, already dead"); } } public void failOver() throws IOException { failOver(false); } public void failOver(boolean force) throws IOException { checkSingleNameNode(); failOver(0, force); } /** * Make standby avatar the new primary avatar. Kill the old * primary avatar first if necessary. */ public void failOver(int nnIndex) throws IOException { failOver(nnIndex, false); } public void failOver(int nnIndex, boolean force) throws IOException { if (getPrimaryAvatar(nnIndex) != null) { LOG.info("killing primary avatar before failover"); killPrimary(nnIndex); } AvatarInfo standby = getStandbyAvatar(nnIndex); if (standby == null) { throw new IOException("no standby avatar running"); } standby.avatar.setAvatar(AvatarConstants.Avatar.ACTIVE, force); standby.state = AvatarState.ACTIVE; registerZooKeeperNode(standby.nnPort, standby.nnDnPort, standby.httpPort, standby.rpcPort, this.nameNodes[nnIndex]); } public void restartStandby() throws IOException { checkSingleNameNode(); restartStandby(0); } /** * Restart a dead avatar node as a standby avatar. */ public void restartStandby(int nnIndex) throws IOException { AvatarInfo dead = getDeadAvatar(nnIndex); if (getPrimaryAvatar(nnIndex) == null || dead == null) { throw new IOException("cannot start standby avatar: " + "primary or dead avatar not found"); } LOG.info("restarting " + dead.startupOption + " as standby"); NameNodeInfo nni = this.nameNodes[nnIndex]; String[] args; ArrayList<String> argList = new ArrayList<String>(); argList.add(dead.startupOption); argList.add(AvatarConstants.StartupOption.STANDBY.getName()); argList.add(AvatarConstants.StartupOption.REGULAR.getName()); if (federation) { argList.add(StartupOption.SERVICE.getName()); argList.add(nni.nameserviceId); } args = new String[argList.size()]; argList.toArray(args); dead.avatar = AvatarNode.createAvatarNode(args, getServerConf(dead.startupOption, nni)); dead.state = AvatarState.STANDBY; if (dead.avatar == null) { throw new IOException("cannot start avatar node"); } } /** * return NameNodeInfo */ public NameNodeInfo getNameNode(int nnIndex) { return this.nameNodes[nnIndex]; } public ArrayList<DataNodeProperties> getDataNodeProperties() { return dataNodes; } /** * Gets a list of the started DataNodes. May be empty. */ public ArrayList<AvatarDataNode> getDataNodes() { ArrayList<AvatarDataNode> list = new ArrayList<AvatarDataNode>(); for (int i = 0; i < dataNodes.size(); i++) { AvatarDataNode node = dataNodes.get(i).datanode; list.add(node); } return list; } /* * return number of namenodes */ public int getNumNameNodes() { return this.nameNodes.length; } /** * Add a namenode to cluster and start it. Configuration of datanodes * in the cluster is refreshed to register with the new namenode. * @return newly started namenode */ public NameNodeInfo addNameNode(Configuration conf) throws IOException { if(!federation) { throw new IOException("cannot add namenode to non-federated cluster"); } int nnIndex = nameNodes.length; int numNameNodes = nameNodes.length + 1; NameNodeInfo[] newlist = new NameNodeInfo[numNameNodes]; System.arraycopy(nameNodes, 0, newlist, 0, nameNodes.length); nameNodes = newlist; nameNodes[nnIndex] = new NameNodeInfo(nnIndex); NameNodeInfo nni = nameNodes[nnIndex]; nni.createAvatarDirs(); String nameserviceId = NAMESERVICE_ID_PREFIX + getNSId(); String nameserviceIds = conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES); nameserviceIds += "," + nameserviceId; nni.initGeneralConf(conf, nameserviceId); conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds); nni.updateAvatarConf(conf); startAvatarNode(nni, null); // Refresh datanodes with the newly started namenode for (DataNodeProperties dn : dataNodes) { DataNode datanode = dn.datanode; datanode.refreshNamenodes(conf); } // Wait for new namenode to get registrations from all the datanodes waitDataNodesActive(nnIndex); return nni; } private void updateAvatarConfWithServiceId(Configuration dstConf, Configuration srcConf, String nameserviceId) { for (String key: AvatarNode.AVATARSERVICE_SPECIFIC_KEYS) { String federationKey = DFSUtil.getNameServiceIdKey( key, nameserviceId); String value = srcConf.get(federationKey); if (value != null) { dstConf.set(federationKey, value); } } for (String key: NameNode.NAMESERVICE_SPECIFIC_KEYS) { String federationKey = DFSUtil.getNameServiceIdKey( key, nameserviceId); String value = srcConf.get(federationKey); if (value != null) { dstConf.set(federationKey, value); } } } /** * Add another cluster to current cluster and start it. Configuration of datanodes * in the cluster is refreshed to register with the new namenodes; */ public void addCluster(MiniAvatarCluster cluster, boolean format) throws IOException, InterruptedException{ if(!federation || !cluster.federation) { throw new IOException("Cannot handle non-federated cluster"); } if (cluster.dataNodes.size() > this.dataNodes.size()) { throw new IOException("Cannot merge: new cluster has more datanodes the old one."); } this.shutDown(); cluster.shutDown(); int nnIndex = nameNodes.length; int numNameNodes = nameNodes.length + cluster.nameNodes.length; NameNodeInfo[] newlist = new NameNodeInfo[numNameNodes]; System.arraycopy(nameNodes, 0, newlist, 0, nameNodes.length); System.arraycopy(cluster.nameNodes, 0, newlist, nameNodes.length, cluster.nameNodes.length); nameNodes = newlist; String newNameserviceIds = cluster.conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES); String nameserviceIds = conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES); nameserviceIds += "," + newNameserviceIds; this.format = format; conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds); int i; for (i = 0; i < nameNodes.length; i++) { NameNodeInfo nni = nameNodes[i]; String nameserviceId = nni.nameserviceId; nni.initGeneralConf(nni.conf, nni.nameserviceId); nni.updateAvatarConf(nni.conf); for (int dnIndex = 0; dnIndex < dataNodes.size(); dnIndex++) { Configuration dstConf = dataNodes.get(dnIndex).conf; if (i >= nnIndex) { String dataStr = cluster.dataNodes.get(dnIndex).conf.get("dfs.data.dir"); dstConf.set("dfs.merge.data.dir." + nameserviceId, dataStr); } updateAvatarConfWithServiceId(dstConf, nni.conf, nameserviceId); } } for (DataNodeProperties dn : dataNodes) { dn.conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds); dn.datanode = AvatarDataNode.instantiateDataNode(dn.dnArgs, new Configuration(dn.conf)); dn.datanode.runDatanodeDaemon(); } for (i = 0; i < nameNodes.length; i++) { NameNodeInfo nni = nameNodes[i]; if (i < nnIndex) { startAvatarNode(nni, StartupOption.UPGRADE); } else { startAvatarNode(nni, null); } } waitAvatarNodesActive(); waitDataNodesActive(); waitExitSafeMode(); } public synchronized boolean restartDataNodes() throws IOException, InterruptedException { return restartDataNodes(true); } /* * Restart all datanodes */ public synchronized boolean restartDataNodes(boolean waitActive) throws IOException, InterruptedException { shutDownDataNodes(); int i = 0; for (DataNodeProperties dn : dataNodes) { i++; LOG.info("Restart Datanode " + i); // Use the same port since dn is identified by host:port. int port = dn.datanode.getSelfAddr().getPort(); dn.conf.set("dfs.datanode.address", "localhost:" + port); dn.datanode = AvatarDataNode.instantiateDataNode(dn.dnArgs, new Configuration(dn.conf)); dn.datanode.runDatanodeDaemon(); if (waitActive) { waitDataNodeInitialized(dn.datanode); } } if (waitActive) { waitDataNodesActive(); } return true; } /** * Wait until the Datanode is initialized, or it throws an IOException * @param AvatarDataNode dn; * @throws IOException when some ServicePair threads are dead. */ public synchronized void waitDataNodeInitialized(AvatarDataNode dn) throws IOException { if (dn == null) { return ; } boolean initialized = false; while (!initialized) { initialized = true; for (int i = 0; i<nameNodes.length; i++) { InetSocketAddress nameNodeAddr = new InetSocketAddress("localhost", getNameNode(i).avatars.get(0).nnDnPort); if (!dn.initialized(nameNodeAddr)) { initialized = false; break; } } try { Thread.sleep(100); } catch (Exception e) { } } } public int getNamespaceId(int index) { return this.nameNodes[index].avatars.get(0).avatar.getNamespaceID(); } static public int getNSId() { return MiniAvatarCluster.currNSId++; } }