/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.io.FileOutputStream; import java.io.File; import java.io.PrintWriter; import java.io.IOException; import java.util.Properties; import java.util.ArrayList; import java.net.InetSocketAddress; import java.net.InetAddress; import java.net.ServerSocket; import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; import org.apache.zookeeper.server.ZKDatabase; import org.apache.zookeeper.server.persistence.FileTxnSnapLog; import org.apache.zookeeper.server.NIOServerCnxn; import org.apache.zookeeper.server.ServerConfig; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.AvatarConstants; import org.apache.hadoop.hdfs.server.common.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.AvatarNode; import org.apache.hadoop.hdfs.server.datanode.AvatarDataNode; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.net.StaticMapping; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.util.StringUtils; /** * This class manages a Avatar/HDFS cluster with all nodes running * locally. * To synchronize the AvatarNodes, it uses a local ZooKeeper * server. */ public class MiniAvatarCluster { // start handing out ports from this number private static final int PORT_START = 10000; // the next port that will be handed out (if it is free) private volatile static int nextPort = PORT_START; /** * Check whether a port is free. */ static boolean isPortFree(int port) { ServerSocket socket = null; try { socket = new ServerSocket(); socket.bind(new InetSocketAddress(port)); } catch (IOException e) { return false; } finally { try { if (socket != null) { socket.close(); } } catch (IOException ignore) { // do nothing } } return true; } /** * Get a free port. */ private static int getFreePort() { return getFreePorts(1); } /** * Get the specified number of consecutive free ports. * @return the first free port of the range */ private static int getFreePorts(int num) { int port = nextPort; boolean found = true; do { for (int i = port; i < port + num; i++) { if (!isPortFree(i)) { port = i + 1; found = false; break; // from for loop } } } while (!found); nextPort = port + num; LOG.info("using free port " + port + "(+" + (num - 1) + ")"); return port; } private static class DataNodeProperties { AvatarDataNode datanode; Configuration conf; String[] dnArgs; DataNodeProperties(AvatarDataNode node, Configuration conf, String[] args) { this.datanode = node; this.conf = conf; this.dnArgs = args; } } private static enum AvatarState { ACTIVE, STANDBY, DEAD } private static class AvatarInfo { AvatarNode avatar; AvatarState state; int nnPort; int nnDnPort; int httpPort; String startupOption; AvatarInfo(AvatarNode avatar, AvatarState state, int nnPort, int nnDnPort, int httpPort, String startupOption) { this.avatar = avatar; this.state = state; this.nnPort = nnPort; this.nnDnPort = nnDnPort; this.httpPort = httpPort; this.startupOption = startupOption; } } private static final Log LOG = LogFactory.getLog(MiniAvatarCluster.class); private static final String DEFAULT_TEST_DIR = "build/contrib/highavailability/test/data"; private static final String TEST_DIR = new File(System.getProperty("test.build.data", DEFAULT_TEST_DIR)). getAbsolutePath(); private static final String ZK_DATA_DIR = TEST_DIR + "/zk.data"; private static final String ZK_CONF_FILE = TEST_DIR + "/zk.conf"; private static final int zkClientPort = getFreePort(); private final String avatarDir; private final String dataDir; private final String fsimagelocal0Dir; private final String fsimagelocal1Dir; private final String fsimage0Dir; private final String fsimage1Dir; private final String fseditslocal0Dir; private final String fseditslocal1Dir; private final String fsedits0Dir; private final String fsedits1Dir; private final int nnPort; private final int nn0Port; private final int nn1Port; private final int nnDnPort; private final int nnDn0Port; private final int nnDn1Port; private final int httpPort; private final int http0Port; private final int http1Port; private Configuration conf; private Configuration a0Conf; private Configuration a1Conf; private int numDataNodes; private boolean format; private String[] racks; private String[] hosts; private static ZooKeeperServer zooKeeper; private static NIOServerCnxn.Factory cnxnFactory; ArrayList<AvatarInfo> avatars = new ArrayList<AvatarInfo>(2); private ArrayList<DataNodeProperties> dataNodes = new ArrayList<DataNodeProperties>(); /** * Modify the config and start up the servers. The rpc and info ports for * servers are guaranteed to use free ports. * <p> * NameNode and DataNode directory creation and configuration will be * managed by this class. * * @param conf the base configuration to use in starting the servers. This * will be modified as necessary. * @param numDataNodes Number of DataNodes to start; may be zero * @param format if true, format the NameNode and DataNodes before starting up * @param racks array of strings indicating the rack that each DataNode is on * @param hosts array of strings indicating the hostname of each DataNode */ public MiniAvatarCluster(Configuration conf, int numDataNodes, boolean format, String[] racks, String[] hosts) throws IOException, ConfigException, InterruptedException { final String testDir = TEST_DIR + "/" + System.currentTimeMillis(); avatarDir = testDir + "/avatar"; dataDir = testDir + "/data"; fsimagelocal0Dir = avatarDir + "/fsimagelocal0"; fsimagelocal1Dir = avatarDir + "/fsimagelocal1"; fsimage0Dir = avatarDir + "/fsimage0"; fsimage1Dir = avatarDir + "/fsimage1"; fseditslocal0Dir = avatarDir + "/fseditslocal0"; fseditslocal1Dir = avatarDir + "/fseditslocal1"; fsedits0Dir = avatarDir + "/fsedits0"; fsedits1Dir = avatarDir + "/fsedits1"; nnPort = getFreePort(); nn0Port = getFreePorts(2); nn1Port = getFreePorts(2); nnDnPort = getFreePort(); nnDn0Port = getFreePort(); nnDn1Port = getFreePort(); httpPort = getFreePort(); http0Port = getFreePort(); http1Port = getFreePort(); this.conf = conf; this.numDataNodes = numDataNodes; this.format = format; this.racks = racks; this.hosts = hosts; configureAvatar(); createAvatarDirs(); startAvatarNodes(); waitAvatarNodesActive(); startDataNodes(); waitDataNodesActive(); waitExitSafeMode(); } private static ServerConfig createZooKeeperConf() throws IOException, ConfigException { // create conf file File zkConfDir = new File(TEST_DIR); zkConfDir.mkdirs(); File zkConfFile = new File(ZK_CONF_FILE); zkConfFile.delete(); zkConfFile.createNewFile(); Properties zkConfProps = new Properties(); zkConfProps.setProperty("tickTime", "2000"); zkConfProps.setProperty("dataDir", ZK_DATA_DIR); zkConfProps.setProperty("clientPort", new Integer(zkClientPort).toString()); zkConfProps.store(new FileOutputStream(zkConfFile), ""); // create config object ServerConfig zkConf = new ServerConfig(); zkConf.parse(ZK_CONF_FILE); return zkConf; } public static void createAndStartZooKeeper() throws IOException, ConfigException, InterruptedException { ServerConfig zkConf = createZooKeeperConf(); zooKeeper = new ZooKeeperServer(); FileTxnSnapLog ftxn = new FileTxnSnapLog(new File(zkConf.getDataLogDir()), new File(zkConf.getDataDir())); zooKeeper.setTxnLogFactory(ftxn); zooKeeper.setTickTime(zkConf.getTickTime()); zooKeeper.setMinSessionTimeout(zkConf.getMinSessionTimeout()); zooKeeper.setMaxSessionTimeout(zkConf.getMaxSessionTimeout()); cnxnFactory = new NIOServerCnxn.Factory(zkConf.getClientPortAddress(), zkConf.getMaxClientCnxns()); cnxnFactory.startup(zooKeeper); } private void registerZooKeeperNode(int nnPrimaryPort, int nnDnPrimaryPort, int httpPrimaryPort) throws IOException { AvatarZooKeeperClient zkClient = new AvatarZooKeeperClient(conf, null); zkClient.registerPrimary("localhost:" + nnPort, "localhost:" + nnPrimaryPort); zkClient.registerPrimary("localhost:" + nnDnPort, "localhost:" + nnDnPrimaryPort); zkClient.registerPrimary("localhost:" + httpPort, "localhost:" + httpPrimaryPort); } private void clearZooKeeperNode() throws IOException { AvatarZooKeeperClient zkClient = new AvatarZooKeeperClient(conf, null); zkClient.clearPrimary("localhost:" + httpPort); zkClient.clearPrimary("localhost:" + nnPort); zkClient.clearPrimary("localhost:" + nnDnPort); } private void createAvatarDirs() { new File(fsimagelocal0Dir).mkdirs(); new File(fsimagelocal1Dir).mkdirs(); new File(fsimage0Dir).mkdirs(); new File(fsimage1Dir).mkdirs(); new File(fseditslocal0Dir).mkdirs(); new File(fseditslocal1Dir).mkdirs(); new File(fsedits0Dir).mkdirs(); new File(fsedits1Dir).mkdirs(); } private void configureAvatar() throws IOException { // overwrite relevant settings // avatar nodes conf.setInt("dfs.secondary.info.port", 0); conf.set("fs.default.name", "hdfs://localhost:" + nnPort); conf.set("fs.default.name0", "hdfs://localhost:" + nn0Port); conf.set("fs.default.name1", "hdfs://localhost:" + nn1Port); conf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort); conf.set("dfs.namenode.dn-address0", "localhost:" + nnDn0Port); conf.set("dfs.namenode.dn-address1", "localhost:" + nnDn1Port); conf.set("dfs.http.address", "localhost:" + httpPort); conf.set("dfs.http.address0", "localhost:" + http0Port); conf.set("dfs.http.address1", "localhost:" + http1Port); conf.set("fs.ha.zookeeper.prefix", "/hdfs"); conf.set("fs.ha.zookeeper.quorum", "localhost:" + zkClientPort); conf.set("dfs.name.dir.shared0", fsimage0Dir); conf.set("dfs.name.dir.shared1", fsimage1Dir); conf.set("dfs.name.edits.dir.shared0", fsedits0Dir); conf.set("dfs.name.edits.dir.shared1", fsedits1Dir); // datanodes conf.set("dfs.datanode.address", "localhost:0"); conf.set("dfs.datanode.http.address", "localhost:0"); conf.set("dfs.datanode.ipc.address", "localhost:0"); conf.set("dfs.datanode.dns.interface", "lo"); conf.set("dfs.namenode.dns.interface", "lo"); // other settings conf.setBoolean("dfs.permissions", false); conf.setBoolean("dfs.persist.blocks", true); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedAvatarFileSystem"); conf.setLong("dfs.blockreport.initialDelay", 0); conf.setClass("topology.node.switch.mapping.impl", StaticMapping.class, DNSToSwitchMapping.class); // server config for avatar nodes a0Conf = new Configuration(conf); a1Conf = new Configuration(conf); a0Conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); a0Conf.set("dfs.name.dir", fsimagelocal0Dir); a0Conf.set("dfs.name.edits.dir", fseditslocal0Dir); a0Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint0"); a1Conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); a1Conf.set("dfs.name.dir", fsimagelocal1Dir); a1Conf.set("dfs.name.edits.dir", fseditslocal1Dir); a1Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint1"); } private Configuration getServerConf(String startupOption) { // namenode should use DFS, not DAFS if (startupOption. equals(AvatarConstants.StartupOption.NODEZERO.getName())) { return new Configuration(a0Conf); } else if (startupOption. equals(AvatarConstants.StartupOption.NODEONE.getName())) { return new Configuration(a1Conf); } else { throw new IllegalArgumentException("invalid avatar"); } } private void startAvatarNodes() throws IOException { registerZooKeeperNode(nn0Port, nnDn0Port, http0Port); if (format) { LOG.info("formatting"); String[] a0FormatArgs = { AvatarConstants.StartupOption. NODEZERO.getName(), AvatarConstants.StartupOption. FORMATFORCE.getName() }; AvatarNode.createAvatarNode(a0FormatArgs, getServerConf(AvatarConstants.StartupOption. NODEZERO.getName())); } { LOG.info("starting avatar 0"); String[] a0Args = { AvatarConstants.StartupOption.NODEZERO.getName() }; avatars.add(new AvatarInfo(AvatarNode. createAvatarNode(a0Args, getServerConf(AvatarConstants. StartupOption. NODEZERO. getName())), AvatarState.ACTIVE, nn0Port, nnDn0Port, http0Port, AvatarConstants.StartupOption.NODEZERO. getName())); } { LOG.info("starting avatar 1"); String[] a1Args = { AvatarConstants.StartupOption.NODEONE.getName(), AvatarConstants.StartupOption.STANDBY.getName(), AvatarConstants.StartupOption.SYNC.getName() }; avatars.add(new AvatarInfo(AvatarNode. createAvatarNode(a1Args, getServerConf(AvatarConstants. StartupOption. NODEONE. getName())), AvatarState.STANDBY, nn1Port, nnDn1Port, http1Port, AvatarConstants.StartupOption.NODEONE. getName())); } for (AvatarInfo avatar: avatars) { if (avatar.avatar == null) { throw new IOException("Cannot create avatar nodes"); } } } private void shutDownDataNodes() throws IOException, InterruptedException { int i = 0; for (DataNodeProperties dn : dataNodes) { i++; LOG.info("shutting down data node " + i); dn.datanode.shutdown(); LOG.info("data node " + i + " shut down"); } } private void shutDownAvatarNodes() throws IOException, InterruptedException { for (AvatarInfo avatar: avatars) { if (avatar.state == AvatarState.ACTIVE || avatar.state == AvatarState.STANDBY) { LOG.info("shutdownAvatar"); avatar.avatar.shutdownAvatar(); avatar.avatar.stopRPC(); } } try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } public static void shutDownZooKeeper() throws IOException, InterruptedException { cnxnFactory.shutdown(); cnxnFactory.join(); if (zooKeeper.isRunning()) { zooKeeper.shutdown(); } } /** * Shut down the cluster */ public void shutDown() throws IOException, InterruptedException { System.out.println("Shutting down the Mini Avatar Cluster"); shutDownAvatarNodes(); // this doesn't work, so just leave the datanodes running, // they won't interfere with the next run // shutDownDataNodes(); } private void startDataNodes() throws IOException { if (racks != null && numDataNodes > racks.length ) { throw new IllegalArgumentException( "The length of racks [" + racks.length + "] is less than the number " + "of datanodes [" + numDataNodes + "]."); } if (hosts != null && numDataNodes > hosts.length ) { throw new IllegalArgumentException( "The length of hosts [" + hosts.length + "] is less than the number " + "of datanodes [" + numDataNodes + "]."); } //Generate some hostnames if required if (racks != null && hosts == null) { LOG.info("Generating host names for datanodes"); hosts = new String[numDataNodes]; for (int i = 0; i < numDataNodes; i++) { hosts[i] = "host" + i + ".foo.com"; } } String[] dnArgs = { HdfsConstants.StartupOption.REGULAR.getName() }; for (int i = 0; i < numDataNodes; i++) { Configuration dnConf = new Configuration(conf); File dir1 = new File(dataDir, "data"+(2*i+1)); File dir2 = new File(dataDir, "data"+(2*i+2)); dir1.mkdirs(); dir2.mkdirs(); if (!dir1.isDirectory() || !dir2.isDirectory()) { throw new IOException("Mkdirs failed to create directory for DataNode " + i + ": " + dir1 + " or " + dir2); } dnConf.set("dfs.data.dir", dir1.getPath() + "," + dir2.getPath()); LOG.info("Starting DataNode " + i + " with dfs.data.dir: " + dnConf.get("dfs.data.dir")); if (hosts != null) { dnConf.set("slave.host.name", hosts[i]); LOG.info("Starting DataNode " + i + " with hostname set to: " + dnConf.get("slave.host.name")); } if (racks != null) { String name = hosts[i]; LOG.info("Adding node with hostname : " + name + " to rack "+ racks[i]); StaticMapping.addNodeToRack(name, racks[i]); } Configuration newconf = new Configuration(dnConf); // save config if (hosts != null) { NetUtils.addStaticResolution(hosts[i], "localhost"); } AvatarDataNode dn = AvatarDataNode.instantiateDataNode(dnArgs, dnConf); //since the HDFS does things based on IP:port, we need to add the mapping //for IP:port to rackId String ipAddr = dn.getSelfAddr().getAddress().getHostAddress(); if (racks != null) { int port = dn.getSelfAddr().getPort(); System.out.println("Adding node with IP:port : " + ipAddr + ":" + port+ " to rack " + racks[i]); StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i]); } AvatarDataNode.runDatanodeDaemon(dn); dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs)); } } private void waitAvatarNodesActive() { for (AvatarInfo avatar: avatars) { while (avatar.avatar.getNameNodeAddress() == null) { try { LOG.info("waiting for avatar"); Thread.sleep(200); } catch (InterruptedException ignore) { // do nothing } } } } private void waitDataNodesActive() throws IOException { InetSocketAddress addr = new InetSocketAddress("localhost", nn0Port); DFSClient client = new DFSClient(addr, conf); // make sure all datanodes are alive while(client.datanodeReport(DatanodeReportType.LIVE).length != numDataNodes) { try { LOG.info("waiting for data nodes"); Thread.sleep(200); } catch (InterruptedException ignore) { // do nothing } } client.close(); } private AvatarInfo getPrimaryAvatar() { return getAvatarByState(AvatarState.ACTIVE); } private AvatarInfo getStandbyAvatar() { return getAvatarByState(AvatarState.STANDBY); } private AvatarInfo getDeadAvatar() { return getAvatarByState(AvatarState.DEAD); } private AvatarInfo getAvatarByState(AvatarState state) { for (AvatarInfo avatar: avatars) { if (avatar.state == state) { return avatar; } } return null; } /** * Return true if primary avatar has left safe mode */ private boolean hasLeftSafeMode() throws IOException { AvatarInfo primary = getPrimaryAvatar(); return (primary != null && !primary.avatar.isInSafeMode() && primary.avatar.getStats()[0] != 0); } private void waitExitSafeMode() throws IOException { // make sure all datanodes are alive while(!hasLeftSafeMode()) { try { LOG.info("waiting until avatar0 has left safe mode"); Thread.sleep(50); } catch (InterruptedException ignore) { // do nothing } } } /** * Get DAFS. */ public DistributedAvatarFileSystem getFileSystem() throws IOException { FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedAvatarFileSystem)) { throw new IOException("fs is not avatar fs"); } return (DistributedAvatarFileSystem) fs; } /** * Kill the primary avatar node. */ public void killPrimary() throws IOException { AvatarInfo primary = getPrimaryAvatar(); if (primary != null) { clearZooKeeperNode(); primary.avatar.shutdownAvatar(); primary.avatar.stopRPC(); primary.avatar = null; primary.state = AvatarState.DEAD; try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } else { throw new IOException("can't kill primary avatar, already dead"); } } /** * Kill the standby avatar node. */ public void killStandby() throws IOException { AvatarInfo standby = getStandbyAvatar(); if (standby != null) { standby.avatar.shutdownAvatar(); standby.avatar.stopRPC(); standby.avatar = null; standby.state = AvatarState.DEAD; try { Thread.sleep(1000); } catch (InterruptedException ignore) { // do nothing } } else { LOG.info("can't kill standby avatar, already dead"); } } /** * Make standby avatar the new primary avatar. Kill the old * primary avatar first if necessary. */ public void failOver() throws IOException { if (getPrimaryAvatar() != null) { LOG.info("killing primary avatar before failover"); killPrimary(); } AvatarInfo standby = getStandbyAvatar(); if (standby == null) { throw new IOException("no standby avatar running"); } standby.avatar.setAvatar(AvatarConstants.Avatar.ACTIVE); standby.state = AvatarState.ACTIVE; registerZooKeeperNode(standby.nnPort, standby.nnDnPort, standby.httpPort); } /** * Restart a dead avatar node as a standby avatar. */ public void restartStandby() throws IOException { AvatarInfo dead = getDeadAvatar(); if (getPrimaryAvatar() == null || dead == null) { throw new IOException("cannot start standby avatar: " + "primary or dead avatar not found"); } LOG.info("restarting " + dead.startupOption + " as standby"); String[] args = { dead.startupOption, AvatarConstants.StartupOption.STANDBY.getName(), AvatarConstants.StartupOption.SYNC.getName() }; dead.avatar = AvatarNode.createAvatarNode(args, getServerConf(dead.startupOption)); dead.state = AvatarState.STANDBY; if (dead.avatar == null) { throw new IOException("cannot start avatar node"); } } }