/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.FilenameFilter; import java.io.IOException; import java.io.File; import java.io.FileOutputStream; import java.io.DataOutputStream; import java.io.DataInputStream; import java.io.FileInputStream; import java.net.InetSocketAddress; import java.net.URI; import java.util.Arrays; import java.util.Comparator; import java.util.Date; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.ArrayList; import java.util.Iterator; import java.util.Collection; import java.text.ParseException; import java.text.SimpleDateFormat; import javax.management.NotCompliantMBeanException; import javax.management.StandardMBean; import org.apache.hadoop.ipc.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.metrics.util.MBeanUtil; import org.apache.hadoop.hdfs.AvatarZooKeeperClient; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.hdfs.protocol.AvatarProtocol; import org.apache.hadoop.hdfs.protocol.AvatarConstants.Avatar; import org.apache.hadoop.hdfs.protocol.AvatarConstants.StartupOption; import org.apache.hadoop.hdfs.protocol.AvatarConstants.InstanceId; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.protocol.AvatarDatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.BlockReport; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.ReceivedBlockInfo; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo; import org.apache.hadoop.hdfs.server.namenode.metrics.AvatarNodeStatusMBean; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; /** * This is an implementation of the AvatarNode, a hot * standby for the NameNode. * This is really cool, believe me! * The AvatarNode has two avatars.. the Standby avatar and the Active * avatar. * * In the Standby avatar, the AvatarNode is consuming transaction logs * generated by the primary (via a transaction log stored in a shared device). * Typically, the primary Namenode is writing transactions to a NFS filesystem * and the Standby is reading the log from the same NFS filesystem. The * Standby is also making periodic checkpoints to the primary namenode. * * A manual command can switch the AvatarNode from the Standby avatar * to the Active avatar. In the Active avatar, the AvatarNode performs precisely * the same functionality as a real usual Namenode. The switching from * Standby avatar to the Active avatar is fast and can typically occur * within seconds. * * Typically, an adminstrator will run require two shared mount points for * transaction logs. It has to be set in fs.name.dir.shared0 and * fs.name.dir.shared1 (similarly for edits). Then the adminstrator starts * the AvatarNode on two different machines as follows: * * bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -zero -active * bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -one -standby * The first AvatarNode uses fs.name.dir.shared0 while the second * AvatarNode uses fs.name.dir.shared1 to write its transaction logs. * Also, at startup, the first instance is the primary Namenode and the * second instance is the Standby * * After a while, the adminstrator decides to change the avatar of the * second instance to Active. In this case, he/she has to first ensure that the * first instance is really really dead. This code does not handle the * split-brain scenario where there are two active namenodes in one cluster. * */ public class AvatarNode extends NameNode implements AvatarProtocol, AvatarNodeStatusMBean { static { Configuration.addDefaultResource("avatar-default.xml"); Configuration.addDefaultResource("avatar-site.xml"); } public static final Log LOG = LogFactory.getLog(AvatarNode.class.getName()); private static final int INVALIDATES_CLEANUP_INTERVAL = 60 * 1000; private static final String STORAGE_FILE_LOCK = "in_use.lock"; private static final String EDITSFILE = "/current/edits"; private static final String EDITSNEW = "/current/edits.new"; private static final String TIMEFILE = "/current/fstime"; private static final String IMAGENEW ="/current/fsimage.ckpt"; static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss.SSS"); // The instanceId is assigned at startuptime and does not change for // the lifetime of the Node. The adminstrator has to name each instance // of the AvatarNode with a different instanceId. The node number is used // by the AvaterNode to determine which shared devices it should use to // checkpoint the image. // private InstanceId instance; // The time when (and if) the fsimage was sync-ed from the remote AvatarNode volatile private long startCheckpointTime; private Server server; /** RPC server */ private InetSocketAddress serverAddress; /** RPC server address */ private Avatar currentAvatar; // the current incarnation of this node private Standby standby; // the standby object private Configuration confg; // config for the standby namenode private Configuration startupConf; // config for the namenode private Thread standbyThread; // the standby daemon thread private InvalidatesCleaner cleaner; // The thread cleaning up invalidates private Thread cleanerThread; private RunInfo runInfo; AvatarNode(Configuration conf) throws IOException { super(conf); runInfo = new RunInfo(); initialize(conf); } /** * The startup Conf is the original configuration of the AvatarNode. It is used by the * secondary namenode to talk to the primary namenode. * The conf is the modified configuration that is used by the standby namenode */ AvatarNode(Configuration startupConf, Configuration conf, StartupInfo startInfo, RunInfo runInfo) throws IOException { super(conf); this.runInfo = runInfo; this.instance = startInfo.instance; // if we are starting as the standby then // record the fstime of the checkpoint that we are about to sync from if (startInfo.isStandby) { // Set the checkpoint time to the fstime of the image and edits // that were copied setStartCheckpointTime(readLocalFstime(conf)); } initialize(conf); currentAvatar = startInfo.isStandby ? Avatar.STANDBY : Avatar.ACTIVE; this.startupConf = startupConf; this.confg = conf; this.nameserviceId = startInfo.serviceName; if (currentAvatar == Avatar.STANDBY) { // Standby has a different property for the max buffered transactions // to replay the log faster int maxStandbyBufferedTransactions = confg.getInt("dfs.max.standby.buffered.transactions", HdfsConstants.DEFAULT_MAX_BUFFERED_TRANSACTIONS); FSEditLog.setMaxBufferedTransactions(maxStandbyBufferedTransactions); // do not allow anybody else to directly contact the underlying // namenode object to exit safemode. super.namesystem.setSafeModeManualOverride(true); // Create a standby object which does the actual work of // processing transactions from the primary and checkpointing standby = new Standby(this, startupConf, confg); standbyThread = new Thread(standby); standbyThread.start(); cleaner = new InvalidatesCleaner(); cleanerThread = new Thread(cleaner); cleanerThread.start(); } } /** * Wait for the StandbyNode to exit. If it does, then stop the underlying namenode. */ void waitForRestart() { if (standbyThread != null) { try { // if this is the standby avatarnode, then wait for the Standby to exit standbyThread.join(); } catch (InterruptedException ie) { //eat it up } standbyThread = null; LOG.info("waitForRestart Standby thread exited."); // if we are still in standbymode, that means we need to restart from scratch. if (getAvatar() == Avatar.STANDBY) { runInfo.isRunning = false; LOG.info("waitForRestart Stopping encapsulated namenode."); super.stop(); // terminate encapsulated namenode super.join(); // wait for encapsulated namenode to exit shutdownStandby(); LOG.info("waitForRestart exiting"); return; } } super.join(); // wait for encapsulated namenode } public void registerMBean() { StandardMBean avatarNodeBean; try { avatarNodeBean = new StandardMBean(this, AvatarNodeStatusMBean.class); MBeanUtil.registerMBean("AvatarNode", "AvatarNodeState", avatarNodeBean); } catch (NotCompliantMBeanException mex) { LOG.error("Error registering mbean with JMX", mex); } } @Override public String getInstance() { return this.instance.toString(); } @Override public String getState() { return this.currentAvatar.toString(); } @Override public long getLagBytes() { if (this.standby == null) return 0; return this.standby.getLagBytes(); } /** * Initialize AvatarNode * @param conf the configuration */ private void initialize(Configuration conf) throws IOException { InetSocketAddress socAddr = AvatarNode.getAddress(conf); int handlerCount = conf.getInt("hdfs.avatarnode.handler.count", 3); // create rpc server this.server = RPC.getServer(this, socAddr.getHostName(), socAddr.getPort(), handlerCount, false, conf); // The rpc-server port can be ephemeral... ensure we have the // correct info this.serverAddress = this.server.getListenerAddress(); LOG.info("AvatarNode up at: " + this.serverAddress); this.registerMBean(); this.server.start(); } /** * If the specified protocol is AvatarProtocol, then return the * AvatarProtocol version id, otherwise delegate to the underlying * namenode. */ public long getProtocolVersion(String protocol, long clientVersion) throws IOException { if (protocol.equals(AvatarProtocol.class.getName())) { return AvatarProtocol.versionID; } else { return super.getProtocolVersion(protocol, clientVersion); } } // // methods to support Avatar Protocol // /** * @inheritDoc */ public synchronized Avatar getAvatar() { return currentAvatar; } @Override public void shutdownAvatar() throws IOException { runInfo.shutdown = true; LOG.info("Got shutdown message"); super.stop(); super.join(); // wait for encapsulated namenode to exit if (getAvatar() == Avatar.STANDBY) { try { standby.quiesce(); } catch (Throwable e) { LOG.warn("Standby: ", e); } shutdownStandby(); } else { stopRPC(); } } public void shutdownStandby() { standby.shutdown(); if (server != null) { // shutdown the AvatarNode LOG.info("Stopping avatarnode rpcserver."); server.stop(); try { server.join(); } catch (InterruptedException ie) { //eat it up } } if (cleaner != null) { // Shut down the cleaner thread as it will keep // the process from shutting down cleaner.stop(); cleanerThread.interrupt(); try { cleanerThread.join(); } catch (InterruptedException iex) { Thread.currentThread().interrupt(); } } } private void stopRPC() throws IOException { this.server.stop(); try { this.server.join(); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); } } /** * @inheritDoc */ public synchronized void setAvatar(Avatar avatar) throws IOException { if (avatar == currentAvatar) { LOG.info("Trying to change avatar to " + avatar + " but am already in that state."); return; } if (avatar == Avatar.STANDBY) { // ACTIVE to STANDBY String msg = "Changing state from active to standby is not allowed." + "If you really want to pause your primary, put it in safemode."; LOG.warn(msg); throw new IOException(msg); } else { // STANDBY to ACTIVE // Check to see if the primary is somehow checkpointing itself. If so, then // refuse to switch to active mode. This check is not foolproof but is a // defensive mechanism to prevent administrator errors. try { if (!zkIsEmpty()) { throw new IOException("Can't switch the AvatarNode to primary since " + "zookeeper record is not clean. Either use shutdownAvatar to kill " + "the current primary and clean the ZooKeeper entry, " + "or clear out the ZooKeeper entry if the primary is dead"); } } catch (Exception ex) { throw new IOException("Cancelling setAvatar because of Exception", ex); } if (standby.hasStaleCheckpoint()) { String msg = "Failed to change avatar from " + currentAvatar + " to " + avatar + " because the Standby has not yet consumed all transactions."; LOG.warn(msg); throw new IOException(msg); } standby.quiesce(); cleaner.stop(); cleanerThread.interrupt(); try { cleanerThread.join(); } catch (InterruptedException iex) { Thread.currentThread().interrupt(); } clearInvalidates(); // change the value to the one for the primary int maxStandbyBufferedTransactions = confg.getInt("dfs.max.buffered.transactions", HdfsConstants.DEFAULT_MAX_BUFFERED_TRANSACTIONS); FSEditLog.setMaxBufferedTransactions(maxStandbyBufferedTransactions); super.namesystem.setSafeModeManualOverride(false); setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } LOG.info("Changed avatar from " + currentAvatar + " to " + avatar); currentAvatar = avatar; } /* * As the AvatarNode is running in Standby mode it fills up * invalidates queues for each datanode with blocks it * assumes have to be deleted. This information is not * entirely accurate and fills up memory as well as leads * to dataloss since those queues are flushed to the datanodes * on failover and valid blocks may be deleted. * * To help prevent filling up the memory we clear these queues * periodically. And we do a final cleanup jsut before switching * to primary. */ private class InvalidatesCleaner implements Runnable { volatile boolean running = true; @Override public void run() { while (running) { clearInvalidates(); try { Thread.sleep(INVALIDATES_CLEANUP_INTERVAL); } catch (InterruptedException iex) { if (running == false) return; Thread.currentThread().interrupt(); } } } public void stop() { running = false; } } private void clearInvalidates() { try { DatanodeInfo[] nodes = super.getDatanodeReport(DatanodeReportType.ALL); assert namesystem.isInSafeMode(); super.namesystem.writeLock(); try { for (DatanodeInfo node : nodes) { super.namesystem.removeFromInvalidates(node.getStorageID()); } } finally { super.namesystem.writeUnlock(); } } catch (IOException e) { e.printStackTrace(); } } private boolean ignoreDatanodes() { return currentAvatar == Avatar.STANDBY && (standby == null || standby.fellBehind()); } public DatanodeCommand[] sendHeartbeatNew(DatanodeRegistration registration, long capacity, long dfsUsed, long remaining, long namespaceUsed, int xmitsInProgress, int xceiverCount) throws IOException { DatanodeCommand[] cmds = super.sendHeartbeat( registration, capacity, dfsUsed, remaining, namespaceUsed, xmitsInProgress, xceiverCount); if (ignoreDatanodes()) { if (cmds == null) { return new DatanodeCommand[]{AvatarDatanodeCommand.BACKOFF}; } else { DatanodeCommand[] newCmds = Arrays.copyOf(cmds, cmds.length+1); newCmds[cmds.length] = AvatarDatanodeCommand.BACKOFF; return newCmds; } } else { return cmds; } } public DatanodeCommand blockReportNew(DatanodeRegistration nodeReg, BlockReport rep) throws IOException { if (runInfo.shutdown || !runInfo.isRunning) { return null; } if (ignoreDatanodes()) { LOG.info("Standby fell behind. Telling " + nodeReg.toString() + " to back off"); // Do not process block reports yet as the ingest thread is catching up return AvatarDatanodeCommand.BACKOFF; } return super.blockReport(nodeReg, rep); } /** * @inheritDoc */ public Block[] blockReceivedAndDeletedNew(DatanodeRegistration nodeReg, Block blocksReceivedAndDeleted[]) throws IOException { if (runInfo.shutdown || !runInfo.isRunning) { // Do not attempt to process blocks when // the namenode is not running return new ReceivedBlockInfo[0]; } if (ignoreDatanodes()) { LOG.info("Standby fell behind. Telling " + nodeReg.toString() + " to retry incremental block report of " + blocksReceivedAndDeleted.length + " blocks later."); return blocksReceivedAndDeleted; } List<Block> failed = new ArrayList<Block>(); HashSet<Long> failedIds; if (currentAvatar == Avatar.STANDBY) { failedIds = new HashSet<Long>(); namesystem.writeLock(); try { for (int index = 0; index < blocksReceivedAndDeleted.length; index++) { Block blockRD = blocksReceivedAndDeleted[index]; if(failedIds.contains(blockRD.getBlockId())){ // check if there was no other blocking failed request blocksReceivedAndDeleted[index] = null; failed.add(blockRD); continue; } BlockInfo storedBlock = namesystem.blocksMap.getStoredBlock(blockRD); if (!DFSUtil.isDeleted(blockRD) && (storedBlock == null) && (!namesystem.getPersistBlocks() || blockRD.getGenerationStamp() >= namesystem.getGenerationStamp())) { // If this block does not belong to anyfile and its GS // is no less than the avatar node's GS, // AvatarNode may not consume the file/block creation edit log yet, // so adding it to the failed list. // - do not process any requestes for blocks with the same block id // (also add them to the failed list. // - do not block other requests blocksReceivedAndDeleted[index] = null; failed.add(blockRD); failedIds.add(blockRD.getBlockId()); } } } finally { namesystem.writeUnlock(); if (!failed.isEmpty()) { LOG.info("*BLOCK* NameNode.blockReceivedAndDeleted: " + "from " + nodeReg.getName() + " has to retry " + failed.size() + " blocks."); } for (Block blockRD : failed) { LOG.info("blockReceivedDeleted " + (DFSUtil.isDeleted(blockRD) ? "DELETED" : "RECEIVED") + " request received for " + blockRD + " on " + nodeReg.getName() + " size " + blockRD.getNumBytes() + " But it does not belong to any file." + " Retry later."); } } } super.blockReceivedAndDeleted(nodeReg, blocksReceivedAndDeleted); return failed.toArray(new Block[failed.size()]); } /** * @inheritDoc */ public ReceivedDeletedBlockInfo[] blockReceivedAndDeletedNew(DatanodeRegistration nodeReg, ReceivedDeletedBlockInfo blocksReceivedAndDeleted[]) throws IOException { if (runInfo.shutdown || !runInfo.isRunning) { // Do not attempt to process blocks when // the namenode is not running return new ReceivedDeletedBlockInfo[0]; } if (ignoreDatanodes()) { LOG.info("Standby fell behind. Telling " + nodeReg.toString() + " to retry incremental block report of " + blocksReceivedAndDeleted.length + " blocks later."); return blocksReceivedAndDeleted; } List<ReceivedDeletedBlockInfo> failed = new ArrayList<ReceivedDeletedBlockInfo>(); HashSet<Long> failedIds; if (currentAvatar == Avatar.STANDBY) { failedIds = new HashSet<Long>(); namesystem.writeLock(); try { for (int index = 0; index < blocksReceivedAndDeleted.length; index++) { ReceivedDeletedBlockInfo blockRD = blocksReceivedAndDeleted[index]; if(failedIds.contains(blockRD.getBlock().getBlockId())){ // check if there was no other blocking failed request blocksReceivedAndDeleted[index] = null; failed.add(blockRD); continue; } Block blkToDelete = blockRD.getBlock(); BlockInfo storedBlock = namesystem.blocksMap.getStoredBlock(blkToDelete); if (!blockRD.isDeletedBlock() && (storedBlock == null) && (!namesystem.getPersistBlocks() || blkToDelete.getGenerationStamp() >= namesystem.getGenerationStamp())) { // If this block does not belong to anyfile and its GS // is no less than the avatar node's GS, // AvatarNode may not consume the file/block creation edit log yet, // so adding it to the failed list. // - do not process any requestes for blocks with the same block id // (also add them to the failed list. // - do not block other requests blocksReceivedAndDeleted[index] = null; failed.add(blockRD); failedIds.add(blockRD.getBlock().getBlockId()); } } } finally { namesystem.writeUnlock(); if (!failed.isEmpty()) { LOG.info("*BLOCK* NameNode.blockReceivedAndDeleted: " + "from " + nodeReg.getName() + " has to retry " + failed.size() + " blocks."); } for (ReceivedDeletedBlockInfo blockRD : failed) { LOG.info("blockReceivedDeleted " + (blockRD.isDeletedBlock() ? "DELETED" : "RECEIVED") + " request received for " + blockRD.getBlock() + " on " + nodeReg.getName() + " size " + blockRD.getBlock().getNumBytes() + " But it does not belong to any file." + " Retry later."); } } } super.blockReceivedAndDeleted(nodeReg, blocksReceivedAndDeleted); return failed.toArray(new ReceivedDeletedBlockInfo[failed.size()]); } /** * Returns the hostname:port for the AvatarNode. The default * port for the AvatarNode is one more than the port of the * underlying namenode. */ public static InetSocketAddress getAddress(Configuration conf) { InetSocketAddress u = NameNode.getAddress(conf); int port = conf.getInt(AvatarNode.DFS_AVATARNODE_PORT_KEY, u.getPort() + 1); return new InetSocketAddress(u.getHostName(), port); } /** * Help message for a user */ private static void printUsage() { System.err.println( "Usage: java AvatarNode [" + StartupOption.STANDBY.getName() + "] | [" + StartupOption.NODEZERO.getName() + "] | [" + StartupOption.NODEONE.getName() + "] | [" + StartupOption.FORMAT.getName() + "] | [" + StartupOption.UPGRADE.getName() + "] | [" + StartupOption.ROLLBACK.getName() + "] | [" + StartupOption.FINALIZE.getName() + "] | [" + StartupOption.IMPORT.getName() + "]"); } /** * validates command line arguments */ static void validateStartupOptions(StartupInfo startInfo) throws IOException { // sync cannot be specified along with format or finalize if (startInfo.isStandby) { if (startInfo.startOpt == StartupOption.FORMAT || startInfo.startOpt == StartupOption.FINALIZE || startInfo.startOpt == StartupOption.ROLLBACK || startInfo.startOpt == StartupOption.UPGRADE) { throw new IOException("Standby avatar node cannot be started with " + startInfo.startOpt + " option."); } } } private static class StartupInfo { StartupOption startOpt; InstanceId instance; boolean isStandby; String serviceName; public StartupInfo(StartupOption startOpt, InstanceId instance, boolean isStandby, String serviceName) { this.startOpt = startOpt; this.instance = instance; this.isStandby = isStandby; this.serviceName = serviceName; } } /** * Analyze the command line options */ private static StartupInfo parseArguments(String args[]) { InstanceId instance = InstanceId.NODEZERO; StartupOption startOpt = StartupOption.REGULAR; boolean isStandby= false; String serviceName = null; int argsLen = (args == null) ? 0 : args.length; for (int i=0; i < argsLen; i++) { String cmd = args[i]; if (StartupOption.SERVICE.getName().equalsIgnoreCase(cmd)) { if (++i < argsLen) { serviceName = args[i]; } else { return null; } } else if (StartupOption.STANDBY.getName().equalsIgnoreCase(cmd)) { isStandby = true; } else if (StartupOption.NODEZERO.getName().equalsIgnoreCase(cmd)) { instance = InstanceId.NODEZERO; } else if (StartupOption.NODEONE.getName().equalsIgnoreCase(cmd)) { instance = InstanceId.NODEONE; } else if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.FORMAT; } else if (StartupOption.FORMATFORCE.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.FORMATFORCE; } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.REGULAR; } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.UPGRADE; } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.ROLLBACK; } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.FINALIZE; } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.IMPORT; } else { startOpt = null; } } return new StartupInfo(startOpt, instance, isStandby, serviceName); } /** * Records the startup command in the configuration */ private static void setStartupOption(Configuration conf, StartupOption opt) { conf.set("dfs.namenode.startup", opt.toString()); } public static AvatarNode createAvatarNode(String argv[], Configuration conf) throws IOException { return createAvatarNode(argv, conf, new RunInfo()); } /** * HDFS federation configuration that is specific to a name service. * This keys are suffixed with nameserviceId in the configuration. For example, * "dfs.namenode.rpc-address.nameservice1".</li> * </ol> * * Following are nameservice specific keys. */ final private static String DFS_AVATARNODE_PORT_KEY = "dfs.avatarnode.port"; final private static String DFS_SHARED_NAME_DIR0_KEY = "dfs.name.dir.shared0"; final private static String DFS_SHARED_NAME_DIR1_KEY = "dfs.name.dir.shared1"; final private static String DFS_SHARED_EDITS_DIR0_KEY = "dfs.name.edits.dir.shared0"; final private static String DFS_SHARED_EDITS_DIR1_KEY = "dfs.name.edits.dir.shared1"; final private static String ZERO = "0"; final private static String ONE = "1"; final public static String DFS_NAMENODE_RPC_ADDRESS0_KEY = DFS_NAMENODE_RPC_ADDRESS_KEY+ZERO; final public static String DFS_NAMENODE_RPC_ADDRESS1_KEY = DFS_NAMENODE_RPC_ADDRESS_KEY+ONE; public static final String[] AVATARSERVICE_SPECIFIC_KEYS = { DFS_AVATARNODE_PORT_KEY, DFS_NAMENODE_RPC_ADDRESS0_KEY, DFS_NAMENODE_RPC_ADDRESS1_KEY, DATANODE_PROTOCOL_ADDRESS+ZERO, DATANODE_PROTOCOL_ADDRESS+ONE, DFS_NAMENODE_HTTP_ADDRESS_KEY+ZERO, DFS_NAMENODE_HTTP_ADDRESS_KEY+ONE, }; /** * In federation configuration is set for a set of * avartanodes, namenodes etc, which are * grouped under a logical nameservice ID. The configuration keys specific * to them have suffix set to configured nameserviceId. * * This method copies the value from specific key of format key.nameserviceId * to key, to set up the generic configuration. Once this is done, only * generic version of the configuration is read in rest of the code, for * backward compatibility and simpler code changes. * * @param conf * Configuration object to lookup specific key and to set the value * to the key passed. Note the conf object is modified * @see DFSUtil#setGenericConf(Configuration, String, String...) */ public static void initializeGenericKeys(Configuration conf, String serviceKey) { if ((serviceKey == null) || serviceKey.isEmpty()) { return; } NameNode.initializeGenericKeys(conf, serviceKey); // adjust meta directory names for this service adjustMetaDirectoryNames(conf, serviceKey); DFSUtil.setGenericConf(conf, serviceKey, AVATARSERVICE_SPECIFIC_KEYS); } /** Append service name to each avatar meta directory name * * @param conf configuration of NameNode * @param serviceKey the non-empty name of the name node service */ protected static void adjustMetaDirectoryNames(Configuration conf, String serviceKey) { adjustMetaDirectoryName(conf, DFS_SHARED_NAME_DIR0_KEY, serviceKey); adjustMetaDirectoryName(conf, DFS_SHARED_NAME_DIR1_KEY, serviceKey); adjustMetaDirectoryName(conf, DFS_SHARED_EDITS_DIR0_KEY, serviceKey); adjustMetaDirectoryName(conf, DFS_SHARED_EDITS_DIR1_KEY, serviceKey); } public static AvatarNode createAvatarNode(String argv[], Configuration conf, RunInfo runInfo) throws IOException { if (conf == null) { conf = new Configuration(); } Configuration startupConf = conf; // save configuration at startup StartupInfo startInfo = parseArguments(argv); StartupOption startOpt = startInfo.startOpt; if (startOpt == null) { printUsage(); return null; } if (!validateServiceName(conf, startInfo.serviceName)) { return null; } initializeGenericKeys(conf, startInfo.serviceName); setStartupOption(conf, startOpt); conf = updateAddressConf(conf, startInfo.instance); NameNode.setupDefaultURI(conf); // sync cannot be specified along with format or finalize validateStartupOptions(startInfo); // We need to check the zookeeper so that the node starting as active // is the one registered with the zookeeper // and if the node is starting as standby there has to be a master // already so that the node doesn't move the log and the image InetSocketAddress defaultAddr = NameNode.getClientProtocolAddress(startupConf); String fsname = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); InetSocketAddress actualAddr = NameNode.getClientProtocolAddress(conf); String actualName = actualAddr.getHostName() + ":" + actualAddr.getPort(); AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null); boolean zkRegistryMatch = true; boolean primaryPresent = false; String errorMsg = null; try { Stat stat = new Stat(); String zkRegistry = zk.getPrimaryAvatarAddress(fsname, stat, false); if (zkRegistry == null) { // The registry is empty. Usually this means failover is in progress // we need to manually fix it before starting primary errorMsg = "A zNode that indicates the primary is empty. " + "AvatarNode can only be started as primary if it " + "is registered as primary with ZooKeeper"; zkRegistryMatch = false; } else { primaryPresent = true; if (!zkRegistry.equalsIgnoreCase(actualName)) { zkRegistryMatch = false; errorMsg = "Registration information in ZooKeeper doesn't " + "match the address of this node. AvatarNode can " + "only be started as primary if it is registered as " + "primary with ZooKeeper. zkRegistry = " + zkRegistry + ", actual name = " + actualName; } } } catch (Exception e) { LOG.error("Got Exception reading primary node registration " + "from ZooKeeper. Aborting the start", e); zkRegistryMatch = false; } finally { try { zk.shutdown(); } catch (InterruptedException e) { LOG.error("Error shutting down ZooKeeper client", e); } } if (!zkRegistryMatch && !startInfo.isStandby) { LOG.error(errorMsg); throw new IOException("Cannot start this AvatarNode as Primary."); } if (!primaryPresent && startInfo.isStandby) { throw new IOException("Cannot start Standby since the " + "primary is unknown"); } // If sync is requested, then we copy only the fsimage // (and not the transaction logs) from the other node. // If we are NODEONE, then modify the configuration to // set fs.name.dir, fs.default.name and dfs.http.address. // conf = copyFsImage(conf, startInfo); // namenode options. switch (startOpt) { case FORMAT: boolean aborted = format(conf, true); System.exit(aborted ? 1 : 0); case FORMATFORCE: aborted = format(conf, false); return null; case FINALIZE: aborted = finalize(conf, true); System.exit(aborted ? 1 : 0); default: } // We need to put the Namenode into safemode as soon as it starts up. // There is a race condition, where before the Standby AvatarNode can put // the NameNode into safemode, the NameNode might leave safemode. This could // occur in the case of a start where the FSImage and FSEdits are empty // and hence the NameNode doesn't wait at all in safemode. if (startInfo.isStandby) { conf.setBoolean("dfs.startup.safemode.manual", true); } return new AvatarNode(startupConf, conf, startInfo, runInfo); } private boolean zkIsEmpty() throws Exception { InetSocketAddress defaultAddr = NameNode.getClientProtocolAddress(startupConf); String fsname = defaultAddr.getHostName() + ":" + defaultAddr.getPort(); AvatarZooKeeperClient zk = new AvatarZooKeeperClient(this.confg, null); try { Stat stat = new Stat(); String zkRegistry = zk.getPrimaryAvatarAddress(fsname, stat, false); return zkRegistry == null; } catch (Exception e) { LOG.error("Got Exception reading primary node registration " + "from ZooKeeper.", e); throw e; } finally { try { zk.shutdown(); } catch (InterruptedException e) { LOG.error("Error shutting down ZooKeeper client", e); } } } /** * Return the configuration that should be used by this instance of AvatarNode * Copy fsimages from the remote shared device. */ static Configuration copyFsImage(Configuration conf, StartupInfo startInfo) throws IOException { String img0 = conf.get("dfs.name.dir.shared0"); String img1 = conf.get("dfs.name.dir.shared1"); String edit0 = conf.get("dfs.name.edits.dir.shared0"); String edit1 = conf.get("dfs.name.edits.dir.shared1"); Collection<String> namedirs = conf.getStringCollection("dfs.name.dir"); Collection<String> editsdir = conf.getStringCollection("dfs.name.edits.dir"); String msg = ""; if (img0 == null || img0.isEmpty()) { msg += "No values specified in dfs.name.dir.share0"; } if (img1 == null || img1.isEmpty()) { msg += " No values specified in dfs.name.dir.share1"; } if (edit0 == null || edit0.isEmpty()) { msg += " No values specified in dfs.name.edits.dir.share0"; } if (edit1 == null || edit1.isEmpty()) { msg += " No values specified in dfs.name.edits.dir.share1"; } if (msg.length() != 0) { LOG.info(msg); throw new IOException(msg); } // verify that the shared dirctories are not specified as dfs.name.dir for (String str : namedirs) { if (str.equalsIgnoreCase(img0)) { msg = "The name specified in dfs.name.dir.shared0 " + img0 + " is already part of dfs.name.dir "; } if (str.equalsIgnoreCase(img1)) { msg += " The name specified in dfs.name.dir.shared1 " + img1 + " is already part of dfs.name.dir "; } } if (msg.length() != 0) { LOG.info(msg); throw new IOException(msg); } // verify that the shared edits directories are not specified as dfs.name.edits.dir for (String str : editsdir) { if (str.equalsIgnoreCase(edit0)) { msg = "The name specified in dfs.name.edits.dir.shared0 " + img0 + " is already part of dfs.name.dir "; } if (str.equalsIgnoreCase(edit1)) { msg += " The name specified in dfs.name.edits.dir.shared1 " + img1 + " is already part of dfs.name.dir "; } } if (msg.length() != 0) { LOG.info(msg); throw new IOException(msg); } File primary = new File(img0); File standby = new File(img1); String mdate = dateForm.format(new Date(now())); FileSystem localFs = FileSystem.getLocal(conf).getRaw(); File src = null; File dest = null; File srcedit = null; File destedit = null; // // if we are instance one then copy from primary to secondary // otherwise copy from secondary to primary. // if (startInfo.instance == InstanceId.NODEONE) { src = primary; dest = standby; srcedit = new File(edit0); destedit = new File(edit1); } else if (startInfo.instance == InstanceId.NODEZERO) { dest = primary; src = standby; destedit = new File(edit0); srcedit = new File(edit1); } // copy fsimage directory if needed if (src.exists() && startInfo.isStandby) { if (dest.exists()) { File tmp = new File (dest + File.pathSeparator + mdate); if (!dest.renameTo(tmp)) { throw new IOException("Unable to rename " + dest + " to " + tmp); } cleanupBackup(conf, dest); LOG.info("Moved aside " + dest + " as " + tmp); } if (!FileUtil.copy(localFs, new Path(src.toString()), localFs, new Path(dest.toString()), false, conf)) { msg = "Error copying " + src + " to " + dest; LOG.error(msg); throw new IOException(msg); } LOG.info("Copied " + src + " into " + dest); // Remove the lock file from the newly synced directory File lockfile = new File(dest, STORAGE_FILE_LOCK); lockfile.delete(); // Remove fsimage.ckpt if it exists. File ckptfile = new File(dest.toString() + IMAGENEW); ckptfile.delete(); // Now, copy from the now-updated shared directory to all other // local dirs specified in fs.name.dir src = dest; if (!namedirs.isEmpty()) { for (String str : namedirs) { dest = new File(str); if (dest.exists()) { File tmp = new File (dest + File.pathSeparator + mdate); if (!dest.renameTo(tmp)) { throw new IOException("Unable to rename " + dest + " to " + tmp); } cleanupBackup(conf, dest); LOG.info("Moved aside " + dest + " as " + tmp); } if (!FileUtil.copy(localFs, new Path(src.toString()), localFs, new Path(dest.toString()), false, conf)) { msg = "Error copying " + src + " to " + dest; LOG.error(msg); throw new IOException(msg); } LOG.info("Copied " + src + " into " + dest); } } } // copy edits directory if needed if (srcedit.exists() && startInfo.isStandby) { if (destedit.exists()) { File tmp = new File (destedit + File.pathSeparator + mdate); if (!destedit.renameTo(tmp)) { throw new IOException("Unable to rename " + destedit + " to " + tmp); } cleanupBackup(conf, destedit); LOG.info("Moved aside " + destedit + " as " + tmp); } if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs, new Path(destedit.toString()), false, conf)) { msg = "Error copying " + srcedit + " to " + destedit; LOG.error(msg); throw new IOException(msg); } LOG.info("Copied " + srcedit + " into " + destedit); // Remove the lock file from the newly synced directory File lockfile = new File(destedit, STORAGE_FILE_LOCK); if (lockfile.exists() && lockfile.delete() == false) { throw new IOException("Unable to delete lock file " + lockfile); } // Remove edits and edits.new. Create empty edits file. File efile = new File(destedit.toString() + EDITSFILE); if (efile.exists() && efile.delete() == false) { throw new IOException("Unable to delete edits file " + efile); } efile = new File(destedit + EDITSNEW); efile.delete(); createEditsFile(destedit.toString()); // Now, copy from the now-updated shared directory to all other // local dirs specified in fs.name.edits.dir srcedit = destedit; if (!editsdir.isEmpty()) { for (String str : editsdir) { destedit = new File(str); if (destedit.exists()) { File tmp = new File (destedit + File.pathSeparator + mdate); if (!destedit.renameTo(tmp)) { throw new IOException("Unable to rename " + destedit + " to " + tmp); } cleanupBackup(conf, destedit); LOG.info("Moved aside " + destedit + " as " + tmp); } if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs, new Path(destedit.toString()), false, conf)) { msg = "Error copying " + srcedit + " to " + destedit; LOG.error(msg); throw new IOException(msg); } LOG.info("Copied " + srcedit + " into " + destedit); } } } // allocate a new configuration and update fs.name.dir approprately // The shared device should be the first in the list. Configuration newconf = new Configuration(conf); StringBuffer buf = new StringBuffer(); if (startInfo.instance == InstanceId.NODEONE) { buf.append(img1); } else if (startInfo.instance == InstanceId.NODEZERO) { buf.append(img0); } for (String str : namedirs) { buf.append(","); buf.append(str); } newconf.set("dfs.name.dir", buf.toString()); buf = null; // update fs.name.edits.dir approprately in the new configuration // The shared device should be the first in the list. StringBuffer buf1 = new StringBuffer(); if (startInfo.instance == InstanceId.NODEONE) { buf1.append(edit1); } else if (startInfo.instance == InstanceId.NODEZERO) { buf1.append(edit0); } for (String str : editsdir) { buf1.append(","); buf1.append(str); } newconf.set("dfs.name.edits.dir", buf1.toString()); return newconf; } static void cleanupBackup(Configuration conf, File origin) { File root = origin.getParentFile(); final String originName = origin.getName(); String[] backups = root.list(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (!name.startsWith(originName) || name.equals(originName)) return false; try { dateForm.parse(name.substring(name.indexOf(File.pathSeparator) + 1)); } catch (ParseException pex) { return false; } return true; } }); Arrays.sort(backups, new Comparator<String>() { @Override public int compare(String back1, String back2) { try { Date date1 = dateForm.parse(back1.substring(back1 .indexOf(File.pathSeparator) + 1)); Date date2 = dateForm.parse(back2.substring(back2 .indexOf(File.pathSeparator) + 1)); // Sorting in reverse order, from later dates to earlier return date2.compareTo(date1); } catch (ParseException pex) { return 0; } } }); int copiesToKeep = conf.getInt("standby.image.copies.tokeep", 0); int daysToKeep = conf.getInt("standby.image.days.tokeep", 0); if (copiesToKeep == 0 && daysToKeep == 0) { // Do not delete anything in this case return; } Date now = new Date(now()); int copies = 0; for (String backup : backups) { copies++; Date backupDate = null; try { backupDate = dateForm.parse(backup.substring(backup .indexOf(File.pathSeparator) + 1)); } catch (ParseException pex) { // This should not happen because of the // way we construct the list } long backupAge = now.getTime() - backupDate.getTime(); if (copies > copiesToKeep && backupAge > daysToKeep * 24 * 60 * 60 * 1000) { // This backup is both old and we have enough of newer backups stored - // delete try { FileUtil.fullyDelete(new File(root, backup)); LOG.info("Deleted backup " + new File(root, backup)); } catch (IOException iex) { LOG.error("Error deleting backup " + new File(root, backup), iex); } } } } public static Configuration updateAddressConf(Configuration conf, InstanceId instance) { Configuration newconf = new Configuration(conf); // if we are starting as the other namenode, then change the // default URL to make the namenode attach to the appropriate URL if (instance == InstanceId.NODEZERO) { String fs = conf.get("dfs.http.address0"); if (fs != null) { newconf.set("dfs.http.address", fs); } fs = conf.get("dfs.namenode.dn-address0"); if (fs != null) { newconf.set("dfs.namenode.dn-address", fs); } fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS0_KEY); if (fs != null) { newconf.set(AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, fs); newconf.set("fs.default.name0", fs); conf.set("fs.default.name0", fs); } fs = conf.get("fs.default.name0"); if (fs != null) { newconf.set("fs.default.name", fs); } } if (instance == InstanceId.NODEONE) { String fs = conf.get("dfs.http.address1"); if (fs != null) { newconf.set("dfs.http.address", fs); } fs = conf.get("dfs.namenode.dn-address1"); if (fs != null) { newconf.set("dfs.namenode.dn-address", fs); } fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS1_KEY); if (fs != null) { newconf.set(AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, fs); newconf.set("fs.default.name1", fs); conf.set("fs.default.name1", fs); } fs = conf.get("fs.default.name1"); if (fs != null) { newconf.set("fs.default.name", fs); } } return newconf; } /** * Returns the address of the remote namenode */ InetSocketAddress getRemoteNamenodeAddress(Configuration conf) throws IOException { String fs = null; if (instance == InstanceId.NODEZERO) { fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS1_KEY); if (fs == null) fs = conf.get("fs.default.name1"); } else if (instance == InstanceId.NODEONE) { fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS0_KEY); if (fs == null) fs = conf.get("fs.default.name0"); } else { throw new IOException("Unknown instance " + instance); } if (fs != null) { conf = new Configuration(conf); conf.set("fs.default.name", fs); } return NameNode.getAddress(conf); } /** * Returns the name of the http server of the local namenode */ String getRemoteNamenodeHttpName(Configuration conf) throws IOException { if (instance == InstanceId.NODEZERO) { return conf.get("dfs.http.address1"); } else if (instance == InstanceId.NODEONE) { return conf.get("dfs.http.address0"); } else { throw new IOException("Unknown instance " + instance); } } /** * Create an empty edits log */ static void createEditsFile(String editDir) throws IOException { File editfile = new File(editDir + EDITSFILE); FileOutputStream fp = new FileOutputStream(editfile); DataOutputBuffer buf = new DataOutputBuffer(1024); buf.writeInt(FSConstants.LAYOUT_VERSION); buf.writeTo(fp); buf.close(); fp.close(); } /** * Return the edits file of the remote NameNode */ File getRemoteEditsFile(Configuration conf) throws IOException { String edit = null; if (instance == InstanceId.NODEZERO) { edit = conf.get("dfs.name.edits.dir.shared1"); } else if (instance == InstanceId.NODEONE) { edit = conf.get("dfs.name.edits.dir.shared0"); } else { LOG.info("Instance is invalid. " + instance); throw new IOException("Instance is invalid. " + instance); } return new File(edit + EDITSFILE); } /** * Return the edits.new file of the remote NameNode */ File getRemoteEditsFileNew(Configuration conf) throws IOException { String edit = null; if (instance == InstanceId.NODEZERO) { edit = conf.get("dfs.name.edits.dir.shared1"); } else if (instance == InstanceId.NODEONE) { edit = conf.get("dfs.name.edits.dir.shared0"); } else { LOG.info("Instance is invalid. " + instance); throw new IOException("Instance is invalid. " + instance); } return new File(edit + EDITSNEW); } /** * Return the fstime file of the remote NameNode */ File getRemoteTimeFile(Configuration conf) throws IOException { String edit = null; if (instance == InstanceId.NODEZERO) { edit = conf.get("dfs.name.edits.dir.shared1"); } else if (instance == InstanceId.NODEONE) { edit = conf.get("dfs.name.edits.dir.shared0"); } else { LOG.info("Instance is invalid. " + instance); throw new IOException("Instance is invalid. " + instance); } return new File(edit + TIMEFILE); } /** * Reads the timestamp of the last checkpoint from the remote fstime file. */ long readRemoteFstime(Configuration conf) throws IOException { String edit = null; if (instance == InstanceId.NODEZERO) { edit = conf.get("dfs.name.edits.dir.shared1"); } else if (instance == InstanceId.NODEONE) { edit = conf.get("dfs.name.edits.dir.shared0"); } else { LOG.info("Instance is invalid. " + instance); throw new IOException("Instance is invalid. " + instance); } return readFstime(edit); } long readFstime(String location) throws IOException { File timeFile = new File(location + TIMEFILE); long timeStamp = 0L; DataInputStream in = null; try { in = new DataInputStream(new FileInputStream(timeFile)); timeStamp = in.readLong(); } catch (IOException e) { if (!timeFile.exists()) { String msg = "Error reading checkpoint time file " + timeFile + " file does not exist."; LOG.error(msg); throw new IOException(msg + e); } else if (!timeFile.canRead()) { String msg = "Error reading checkpoint time file " + timeFile + " cannot read file of size " + timeFile.length() + " last modified " + dateForm.format(new Date(timeFile.lastModified())); LOG.error(msg); throw new IOException(msg + e); } else { String msg = "Error reading checkpoint time file " + timeFile; LOG.error(msg); throw new IOException(msg + e); } } finally { if (in != null) { in.close(); } } return timeStamp; } long readLocalFstime(Configuration conf) throws IOException { String edits = null; if (instance == InstanceId.NODEZERO) { edits = conf.get("dfs.name.edits.dir.shared0"); } else { edits = conf.get("dfs.name.edits.dir.shared1"); } long editsTime = readFstime(edits); String image = null; if (instance == InstanceId.NODEZERO) { image = conf.get("dfs.name.dir.shared0"); } else { image = conf.get("dfs.name.dir.shared1"); } if (editsTime == readFstime(image)) { return editsTime; } throw new IOException("The checkpoint time of the local fsimage does not" + " match the time of the local edits"); } /** * Returns the starting checkpoint time of this AvatarNode */ long getStartCheckpointTime() { return startCheckpointTime; } /** * Sets the starting checkpoint time of this AvatarNode */ void setStartCheckpointTime(Configuration conf) throws IOException { startCheckpointTime = readRemoteFstime(conf); } void setStartCheckpointTime(long time) { startCheckpointTime = time; } /** * Indicates that the AvatarNode shoudl restart */ void doRestart() { runInfo.doRestart = true; } /** * Returns true if both edits and edits.new for the * remote namenode exists. */ boolean twoEditsFile(Configuration conf) throws IOException{ File f1 = getRemoteEditsFile(conf); File f2 = getRemoteEditsFileNew(conf); return f1.exists() && f2.exists(); } /** * Returns the size of the edits file for the remote * namenode. */ long editSize(Configuration conf) throws IOException{ return getRemoteEditsFile(conf).length(); } /** * Current system time. * @return current time in msec. */ static long now() { return System.currentTimeMillis(); } /** * Verify that configured directories exist, then * Interactively confirm that formatting is desired * for each existing directory and format them. * * @param conf * @param isConfirmationNeeded * @return true if formatting was aborted, false otherwise * @throws IOException */ private static boolean format(Configuration conf, boolean isConfirmationNeeded ) throws IOException { boolean allowFormat = conf.getBoolean("dfs.namenode.support.allowformat", true); if (!allowFormat) { throw new IOException("The option dfs.namenode.support.allowformat is " + "set to false for this filesystem, so it " + "cannot be formatted. You will need to set " + "dfs.namenode.support.allowformat parameter " + "to true in order to format this filesystem"); } Collection<File> dirsToFormat = FSNamesystem.getNamespaceDirs(conf); Collection<File> editDirsToFormat = FSNamesystem.getNamespaceEditsDirs(conf); for(Iterator<File> it = dirsToFormat.iterator(); it.hasNext();) { File curDir = it.next(); if (!curDir.exists()) continue; if (isConfirmationNeeded) { System.err.print("Re-format filesystem in " + curDir +" ? (Y or N) "); if (!(System.in.read() == 'Y')) { System.err.println("Format aborted in "+ curDir); return true; } while(System.in.read() != '\n'); // discard the enter-key } } FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat, editDirsToFormat), conf); nsys.dir.fsImage.format(); return false; } private static boolean finalize(Configuration conf, boolean isConfirmationNeeded ) throws IOException { Collection<File> dirsToFormat = FSNamesystem.getNamespaceDirs(conf); Collection<File> editDirsToFormat = FSNamesystem.getNamespaceEditsDirs(conf); FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat, editDirsToFormat), conf); System.err.print( "\"finalize\" will remove the previous state of the files system.\n" + "Recent upgrade will become permanent.\n" + "Rollback option will not be available anymore.\n"); if (isConfirmationNeeded) { System.err.print("Finalize filesystem state ? (Y or N) "); if (!(System.in.read() == 'Y')) { System.err.println("Finalize aborted."); return true; } while(System.in.read() != '\n'); // discard the enter-key } nsys.dir.fsImage.finalizeUpgrade(); return false; } public static class RunInfo { volatile boolean doRestart; volatile boolean shutdown; volatile boolean isRunning; public RunInfo(boolean doRestart, boolean shutdown, boolean isRunning) { this.doRestart = doRestart; this.shutdown = shutdown; this.isRunning = isRunning; } public RunInfo() { this.doRestart = false; this.shutdown = false; this.isRunning = true; } } public InetSocketAddress getNameNodeAddress() { return serverAddress; } /** */ public static void main(String argv[]) throws Exception { Exception exception = null; AvatarNode avatarnode = null; RunInfo runInfo = new RunInfo(); do { runInfo.doRestart = false; runInfo.isRunning = true; exception = null; try { StringUtils.startupShutdownMessage(AvatarNode.class, argv, LOG); avatarnode = createAvatarNode(argv, null, runInfo); if (avatarnode != null) { avatarnode.waitForRestart(); } } catch (Throwable e) { LOG.error(StringUtils.stringifyException(e)); if (runInfo.doRestart) { LOG.error("AvatarNode restarting..."); } else { exception = new Exception(StringUtils.stringifyException(e)); } } } while (runInfo.doRestart == true); if (runInfo.shutdown) { avatarnode.stopRPC(); } if (exception != null) { LOG.fatal("Exception running avatarnode. Shutting down", exception); Runtime.getRuntime().exit(1); } } }