/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.IOException;
import java.io.BufferedOutputStream;
import java.io.BufferedInputStream;
import java.io.DataOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import javax.management.NotCompliantMBeanException;
import javax.management.StandardMBean;
import org.apache.hadoop.ipc.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.ReconfigurationException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics.util.MBeanUtil;
import org.apache.hadoop.hdfs.AvatarFailoverSnapshot;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.FastProtocolHDFS;
import org.apache.hadoop.hdfs.FastWritableHDFS;
import org.apache.hadoop.hdfs.FileStatusExtended;
import org.apache.hadoop.hdfs.OpenFilesInfo;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.util.FlushableLogger;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.protocol.AvatarProtocol;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.Avatar;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.StartupOption;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.InstanceId;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.protocol.AvatarDatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.BlockFlags;
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
import org.apache.hadoop.hdfs.server.protocol.BlockReport;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.IncrementalBlockReport;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.ReceivedBlockInfo;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.Transition;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.datanode.DatanodeProtocols;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.ClusterJspHelper.NameNodeKey;
import org.apache.hadoop.hdfs.server.namenode.JournalStream.JournalType;
import org.apache.hadoop.hdfs.server.namenode.metrics.AvatarNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.metrics.AvatarNodeStatusMBean;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.hdfs.util.LightWeightBitSet;
/**
* This is an implementation of the AvatarNode, a hot
* standby for the NameNode.
* This is really cool, believe me!
* The AvatarNode has two avatars.. the Standby avatar and the Active
* avatar.
*
* In the Standby avatar, the AvatarNode is consuming transaction logs
* generated by the primary (via a transaction log stored in a shared device).
* Typically, the primary Namenode is writing transactions to a NFS filesystem
* and the Standby is reading the log from the same NFS filesystem. The
* Standby is also making periodic checkpoints to the primary namenode.
*
* A manual command can switch the AvatarNode from the Standby avatar
* to the Active avatar. In the Active avatar, the AvatarNode performs precisely
* the same functionality as a real usual Namenode. The switching from
* Standby avatar to the Active avatar is fast and can typically occur
* within seconds.
*
* Typically, an adminstrator will run require two shared mount points for
* transaction logs. It has to be set in fs.name.dir.shared0 and
* fs.name.dir.shared1 (similarly for edits). Then the adminstrator starts
* the AvatarNode on two different machines as follows:
*
* bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -zero -active
* bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -one -standby
* The first AvatarNode uses fs.name.dir.shared0 while the second
* AvatarNode uses fs.name.dir.shared1 to write its transaction logs.
* Also, at startup, the first instance is the primary Namenode and the
* second instance is the Standby
*
* After a while, the adminstrator decides to change the avatar of the
* second instance to Active. In this case, he/she has to first ensure that the
* first instance is really really dead. This code does not handle the
* split-brain scenario where there are two active namenodes in one cluster.
*
*/
public class AvatarNode extends NameNode
implements AvatarProtocol, AvatarNodeStatusMBean {
static {
Configuration.addDefaultResource("avatar-default.xml");
Configuration.addDefaultResource("avatar-site.xml");
}
public static final Log LOG = LogFactory.getLog(AvatarNode.class.getName());
// immediate flush logger
private static final Log FLOG = FlushableLogger.getLogger(LOG);
private static final int INVALIDATES_CLEANUP_INTERVAL = 60 * 1000;
public static final String FAILOVER_SNAPSHOT_FILE = "failover_snapshot_file";
// The instanceId is assigned at startuptime and does not change for
// the lifetime of the Node. The adminstrator has to name each instance
// of the AvatarNode with a different instanceId. The node number is used
// by the AvaterNode to determine which shared devices it should use to
// checkpoint the image.
//
private InstanceId instance;
// The txid the fsimage was sync-ed from the remote AvatarNode
volatile private long startCheckpointTxId;
private Server server; /** RPC server */
private InetSocketAddress serverAddress; /** RPC server address */
private volatile Avatar currentAvatar; // the current incarnation of this node
private Standby standby; // the standby object
private Configuration confg; // config for the standby namenode
private Configuration startupConf; // config for the namenode
private Thread standbyThread; // the standby daemon thread
private Cleaner cleaner; // The thread cleaning up invalidates and mis-replicated blocks
private Thread cleanerThread;
private RunInfo runInfo;
private long sessionId;
private StandbySafeMode standbySafeMode;
private volatile boolean isInitialized = false;
protected final boolean enableTestFramework;
protected final boolean enableTestFrameworkFsck;
private String failoverFsck = "";
private String oldPrimaryFsck = "";
private volatile FailoverState failoverState = FailoverState.BEFORE_FAILOVER;
private final AvatarNodeMetrics metrics;
static public enum FailoverState {
BEFORE_FAILOVER("BeforeFailover"),
START_FAILOVER("StartFailover"),
FAILED_FAILOVER("FailedFailover"),
AWAIT_FAILOVER("AwaitFailover"),
PERFORM_FAILOVER("PerformFailover");
private String name = null;
private FailoverState(String arg) {this.name = arg;}
@Override
public String toString() {
return name;
}
}
/**
* The startup Conf is the original configuration of the AvatarNode. It is used by the
* secondary namenode to talk to the primary namenode.
* The conf is the modified configuration that is used by the standby namenode
*/
AvatarNode(Configuration startupConf, Configuration conf,
StartupInfo startInfo, RunInfo runInfo, long sessionId,
InetSocketAddress nameNodeAddr, NamenodeProtocol primaryNamenode)
throws IOException {
super(conf);
// wrap namenode metrics
this.metrics = new AvatarNodeMetrics(super.getNameNodeMetrics());
// check if we talk to primary
if (startInfo.isStandby
&& (nameNodeAddr == null || primaryNamenode == null)) {
throw new IOException("RPC to primary namenode not initialized");
}
this.sessionId = sessionId;
this.runInfo = runInfo;
this.instance = startInfo.instance;
this.enableTestFramework =
(conf.getFloat("dfs.avatarnode.failover.sample.percent", 0.0f) != 0.0f);
this.enableTestFrameworkFsck =
(conf.getBoolean("dfs.avatarnode.failover.fsck", false));
// if we are starting as the standby then
// record the fstime of the checkpoint that we are about to sync from
if (startInfo.isStandby) {
// Set the checkpoint time to the fstime of the image and edits
// that were copied
setStartCheckpointTxId(namesystem.getFSImage().storage
.getMostRecentCheckpointTxId());
}
initialize(conf);
currentAvatar = startInfo.isStandby ? Avatar.STANDBY : Avatar.ACTIVE;
this.startupConf = startupConf;
this.confg = conf;
this.nameserviceId = startInfo.serviceName;
if (currentAvatar == Avatar.STANDBY) {
// Verify we have the correct safemode.
SafeModeInfo safeMode = super.namesystem.getSafeModeInstance();
if (safeMode == null || !(safeMode instanceof StandbySafeMode)) {
throw new IOException("Invalid safe mode for Standby Avatar : "
+ safeMode + " Standby Avatar should be using "
+ StandbySafeMode.class + " as its dfs.safemode.impl");
}
standbySafeMode = (StandbySafeMode) safeMode;
// Standby has a different property for the max buffered transactions
// to replay the log faster
int maxStandbyBufferedTransactions =
confg.getInt("dfs.max.standby.buffered.transactions",
HdfsConstants.DEFAULT_MAX_BUFFERED_TRANSACTIONS);
FSEditLog.setMaxBufferedTransactions(maxStandbyBufferedTransactions);
// Create a standby object which does the actual work of
// processing transactions from the primary and checkpointing
standby = new Standby(this, startupConf, confg, nameNodeAddr, primaryNamenode);
standbyThread = new Thread(standby);
standbyThread.setName("Standby");
standbyThread.start();
cleaner = new Cleaner();
cleanerThread = new Thread(cleaner);
cleanerThread.start();
}
isInitialized = true;
}
protected void setFailoverFsck(String fsck) {
failoverFsck = fsck;
}
/**
* Wait for the StandbyNode to exit. If it does, then stop the underlying namenode.
*/
public void waitForRestart() {
if (standbyThread != null) {
try {
// if this is the standby avatarnode, then wait for the Standby to exit
standbyThread.join();
} catch (InterruptedException ie) {
//eat it up
}
standbyThread = null;
LOG.info("waitForRestart: Standby thread exited.");
InjectionHandler.processEvent(InjectionEvent.AVATARNODE_WAIT_FOR_RESTART);
while (failoverState == FailoverState.START_FAILOVER
|| failoverState == FailoverState.AWAIT_FAILOVER) {
LOG.info("Current state : " + failoverState
+ ". Waiting for failover ....");
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
throw new RuntimeException("waitForRestart() interrupted");
}
}
// if we are still in standbymode, that means we need to restart from
// scratch.
if (getAvatar() == Avatar.STANDBY) {
runInfo.isRunning = false;
LOG.info("waitForRestart Stopping encapsulated namenode.");
super.stop(); // terminate encapsulated namenode
super.join(); // wait for encapsulated namenode to exit
shutdownStandby();
LOG.info("waitForRestart exiting");
return;
}
}
super.join(); // wait for encapsulated namenode
}
public void registerMBean() {
StandardMBean avatarNodeBean;
try {
avatarNodeBean = new StandardMBean(this, AvatarNodeStatusMBean.class);
MBeanUtil.registerMBean("AvatarNode", "AvatarNodeState", avatarNodeBean);
} catch (NotCompliantMBeanException mex) {
LOG.error("Error registering mbean with JMX", mex);
}
}
public AvatarNodeMetrics getAvatarNodeMetrics() {
return metrics;
}
@Override
public String getInstance() {
return this.instance.toString();
}
@Override
public String getState() {
return this.currentAvatar.toString();
}
@Override
public long getLagBytes() {
return standby == null ? 0 : standby.getLagBytes();
}
public Configuration getStartupConf() {
return this.startupConf;
}
/**
* Initialize AvatarNode
* @param conf the configuration
*/
private void initialize(Configuration conf) throws IOException {
InetSocketAddress socAddr = AvatarNode.getAddress(conf);
int handlerCount = conf.getInt("hdfs.avatarnode.handler.count", 3);
// create rpc server
// no point to deserialize job file in Avatar node.
this.server = RPC.getServer(this, socAddr.getAddress().getHostAddress(), socAddr.getPort(),
handlerCount, false, conf, false);
// The rpc-server port can be ephemeral... ensure we have the
// correct info
this.serverAddress = this.server.getListenerAddress();
LOG.info("AvatarNode up at: " + this.serverAddress);
this.registerMBean();
this.server.start();
}
/**
* If the specified protocol is AvatarProtocol, then return the
* AvatarProtocol version id, otherwise delegate to the underlying
* namenode.
*/
public long getProtocolVersion(String protocol,
long clientVersion) throws IOException {
if (protocol.equals(AvatarProtocol.class.getName())) {
return AvatarProtocol.versionID;
} else {
return super.getProtocolVersion(protocol, clientVersion);
}
}
//
// methods to support Avatar Protocol
//
/**
* @inheritDoc
*/
public synchronized Avatar getAvatar() {
return currentAvatar;
}
/**
* @inheritDoc
*/
public Avatar reportAvatar() {
return currentAvatar;
}
/**
* @inheritDoc
*/
public boolean isInitialized() throws IOException {
// check permissions, if this fails, the failover will not
// progress, and this will prevent shutting down the primary avatar
super.namesystem.checkSuperuserPrivilege();
if (currentAvatar == Avatar.ACTIVE) {
// check for upgrades
if (!getFSImage().isUpgradeFinalized()) {
throw new IOException(
"Failover: Upgrade must be finalized before failover");
}
} else /* STANDBY */ {
// check if there are problems with checkpointing
if (standby.getNumCheckpointFailures() > 0) {
throw new IOException("Failover: Standby has problems with checkpointing");
}
}
// for both check if the node is initialized
if (!isInitialized) {
throw new IOException("Failover: Standby is not initialized");
}
// for both check active edit streams
verifyEditStreams();
return true;
}
/**
* @inheritDoc
*/
protected boolean shouldCheckHeartbeat() {
if (currentAvatar == Avatar.ACTIVE) {
return super.shouldCheckHeartbeat();
}
return true;
}
private static class ShutdownAvatarThread extends Thread {
private final AvatarNode node;
public ShutdownAvatarThread(AvatarNode node) {
this.node = node;
}
public void run() {
try {
node.runInfo.shutdown = true;
LOG.info("Failover: Shutdown thread for " + node.currentAvatar
+ " starting...");
if (node.currentAvatar == Avatar.STANDBY) {
// make sure that all transactions are consumed
try {
// do not recover the unclosed segment here.
node.standby.quiesce(FSEditLogLoader.TXID_IGNORE, false);
} catch (Throwable e) {
LOG.warn("Failover: standby error ", e);
}
}
// Need to stop RPC threads before capturing any final data about the
// primary avatar.
node.stopRPC(false);
String fsck = "";
try {
if (node.enableTestFramework
&& node.enableTestFrameworkFsck) {
LOG.info("Failover: Test framework - running fsck");
fsck = node.runFailoverFsck();
LOG.info("Failover: Test framework - fsck done");
}
} catch (IOException e) { /*ignore*/ }
// check if the shared journal is still available
node.verifyEditStreams();
// stop the node (namesystem, fsimage, editlog, etc.)
node.stop();
node.join(); // wait for encapsulated namenode to exit
long totalBlocks = node.namesystem.getBlocksTotal();
if (InjectionHandler.falseCondition(InjectionEvent.AVATARNODE_SHUTDOWN,
totalBlocks)) {
// simulate crash
return;
}
if (node.currentAvatar == Avatar.STANDBY) {
node.shutdownStandby();
} else if (node.currentAvatar == Avatar.ACTIVE) {
// If we are the primary we need to sync our last transaction id to
// zookeeper.
node.writeFailoverTestData(fsck);
AvatarNodeZkUtil.writeLastTxidToZookeeper(
node.getLastWrittenTxId(),
totalBlocks,
node.namesystem.getFilesAndDirectoriesTotal(),
node.sessionId,
node.startupConf,
node.confg);
}
InjectionHandler.processEvent(InjectionEvent.AVATARNODE_SHUTDOWN_COMPLETE);
} catch (Exception e) {
LOG.error("Failover: shutdownAvatar() failed", e);
} finally {
LOG.info("Failover: Shutdown thread for " + node.currentAvatar
+ " DONE.");
}
}
}
/**
* Return true if the shared journal is active, or if the number
* of active journals is equal to the number of configured journals.
* Throw IOException otherwise.
*/
private void verifyEditStreams() throws IOException {
// we check if the shared stream is still available
if (getFSImage().getEditLog().isSharedJournalAvailable()
&& InjectionHandler
.trueCondition(InjectionEvent.AVATARNODE_CHECKEDITSTREAMS)) {
return;
}
// for sanity check if the number of available journals
// is equal to the number of configured ones
int expectedEditStreams = NNStorageConfiguration.getNamespaceEditsDirs(
confg).size();
int actualEditStreams = this.namesystem.getFSImage().getEditLog()
.getNumberOfAvailableJournals();
if (expectedEditStreams == actualEditStreams
&& InjectionHandler
.trueCondition(InjectionEvent.AVATARNODE_CHECKEDITSTREAMS)) {
return;
}
String msg = "Failover: Cannot proceed - shared journal is not available. "
+ "Number of required edit streams: " + expectedEditStreams
+ " current number: " + actualEditStreams;
LOG.fatal(msg);
throw new IOException(msg);
}
/**
* Shuts down the avatar node
* @param synchronous - should the function wait for the shutdown to complete
* @throws IOException
*/
public synchronized void shutdown(boolean synchronous) throws IOException {
LOG.info("Failover: Asynchronous shutdown for: " + currentAvatar);
// check permissions before any other actions
super.namesystem.checkSuperuserPrivilege();
if (runInfo.shutdown) {
LOG.info("Failover: Node already shut down");
return;
}
// check edit streams
// if this fails, we still have a chance to fix it
// and shutdown again
verifyEditStreams();
runInfo.shutdown = true;
Thread shutdownThread = new ShutdownAvatarThread(this);
shutdownThread.setName("ShutDown thread for : " + serverAddress);
shutdownThread.setDaemon(false);
shutdownThread.start();
if (synchronous) {
LOG.info("Failover: Waiting for shutdown to complete");
try {
shutdownThread.join();
} catch (InterruptedException ie) {
throw new IOException(ie);
}
}
}
@Override
public void shutdownAvatar() throws IOException {
shutdown(false);
}
/**
* Used only for testing.
*/
public Standby getStandby() throws IOException {
if (currentAvatar != Avatar.STANDBY) {
throw new IOException("This is not the standby avatar");
}
return standby;
}
public long getSessionId() throws IOException {
if (currentAvatar != Avatar.ACTIVE) {
throw new IOException("This is not the primary avatar");
}
return this.sessionId;
}
/**
* Used only for testing.
*/
public void quiesceStandby(long txId) throws IOException {
if (currentAvatar != Avatar.STANDBY) {
throw new IOException("This is not the standby avatar");
}
standby.quiesce(txId);
}
public void shutdownStandby() {
standby.shutdown();
if (server != null) { // shutdown the AvatarNode
LOG.info("Stopping avatarnode rpcserver.");
server.stop();
try {
server.join();
} catch (InterruptedException ie) {
//eat it up
}
}
if (cleaner != null) {
// Shut down the cleaner thread as it will keep
// the process from shutting down
cleaner.stop();
cleanerThread.interrupt();
try {
cleanerThread.join();
} catch (InterruptedException iex) {
Thread.currentThread().interrupt();
}
}
}
/**
* Stops all RPC threads and ensures that all RPC handlers have exited.
* Stops all communication to the namenode.
*/
protected void stopRPC(boolean interruptClientHandlers) throws IOException {
try {
// stop avatardatanode server
stopRPCInternal(server, "avatardatanode", interruptClientHandlers);
// stop namenode rpc (client, datanode)
super.stopRPC(interruptClientHandlers);
// wait for avatardatanode rpc
stopWaitRPCInternal(server, "avatardatanode");
} catch (InterruptedException ex) {
throw new IOException("stopRPC() interrupted", ex);
}
}
private void verifyTransactionIds(ZookeeperTxId zkTxId) throws IOException {
// TODO for unit test it can happen than rollEditLog happens after
// obtaining the txid so we might have a difference of 2 !!!
long zkLastTxId = zkTxId.getTransactionId();
long zkTotalBlocks = zkTxId.getTotalBlocks();
long zkTotalInodes = zkTxId.getTotalInodes();
// local values
long lastTxId = super.getLastWrittenTxId();
long totalBlocks = super.namesystem.getBlocksTotal();
long totalINodes = super.namesystem.getFilesAndDirectoriesTotal();
// Verify transacation ids.
if (lastTxId < 0 || zkLastTxId < 0) {
throw new StandbyStateException(
"Invalid transacation ids, txid in NameNode : " + lastTxId
+ " txid in Zookeeper : " + zkLastTxId);
} else if (lastTxId != zkLastTxId) {
throw new StandbyStateException("The transacation id in the namenode : "
+ lastTxId + " does not match the transaction id in zookeeper : "
+ zkLastTxId
+ formatErrorMessage(lastTxId, zkLastTxId, "transactions"));
} else if (zkTotalBlocks != totalBlocks) {
throw new StandbyStateException("Total blocks in ZK : " + zkTotalBlocks
+ " don't match up with total blocks on Standby : " + totalBlocks
+ formatErrorMessage(totalBlocks, zkTotalBlocks, "blocks"));
} else if (zkTotalInodes != totalINodes) {
throw new StandbyStateException("Total inodes in ZK : " + zkTotalInodes
+ " don't match up with total inodes on Standby : " + totalINodes
+ formatErrorMessage(totalINodes, zkTotalInodes, "inodes"));
}
}
private String formatErrorMessage(long localNumber, long remoteNumber,
String what) {
long diff = localNumber - remoteNumber;
return " Standby has " + Math.abs(diff) + " "
+ ((diff > 0) ? "more " : "fewer ") + what + ".";
}
private static File buildSnapshotFilePath(String pathDir) {
return new File(pathDir, Path.SEPARATOR
+ FAILOVER_SNAPSHOT_FILE);
}
private File getSnapshotFile(Configuration conf, boolean remote)
throws IOException {
URI sharedEditsDirectory = remote ? getRemoteSharedEditsURI(confg)
: getLocalSharedEditsURI(confg);
if (!isFile(sharedEditsDirectory)) {
String failoverDataDir = conf
.get("dfs.avatarnode.failover.test.data.dir");
if (failoverDataDir != null) {
return buildSnapshotFilePath(failoverDataDir);
}
return null;
}
return buildSnapshotFilePath(sharedEditsDirectory.getPath());
}
private void writeFailoverTestData(String fsck) throws IOException {
if (!enableTestFramework) {
LOG.info("Failover: Test framework - disabled");
return;
}
File snapshotFile = getSnapshotFile(confg, true);
if (snapshotFile == null)
return;
float samplePercent = confg.getFloat(
"dfs.avatarnode.failover.sample.percent", 0.05f);
LOG.info("Failover: Test framework - using " + (100.0*samplePercent)
+ " % sample size");
List<FileStatusExtended> stat = super.getRandomFilesSample(samplePercent);
AvatarFailoverSnapshot snapshot = new AvatarFailoverSnapshot(
super.namesystem.getOpenFiles(), stat);
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(new FileOutputStream(snapshotFile)));
try {
snapshot.write(out);
out.writeBoolean(enableTestFrameworkFsck);
if (enableTestFrameworkFsck) {
Text.writeString(out, fsck);
}
} finally {
out.close();
}
LOG.info("Failover: Test framework - saved snapshot file : " + snapshotFile);
}
private void verifySnapshotSampledFile(FileStatusExtended file)
throws IOException {
FileStatusExtended stat = super.namesystem.getFileInfoExtended(file
.getPath().toString());
if (!stat.equals(file)) {
throw new IOException("Information for file : " + file.getPath()
+ " does not match with information on snapshot file, expected : "
+ file + ", actual : " + stat);
}
}
private void verifyOpenFiles(OpenFilesInfo openFilesInfo) throws IOException {
if (openFilesInfo.getGenStamp() != super.namesystem.getGenerationStamp()) {
throw new IOException(
"GS on snapshot file doesn't match with GS on node : "
+ openFilesInfo.getGenStamp() + ", "
+ super.namesystem.getGenerationStamp());
}
for (FileStatusExtended stat : openFilesInfo.getOpenFiles()) {
verifySnapshotSampledFile(stat);
}
}
private void verifyFailoverTestData() throws IOException {
if (!enableTestFramework) {
LOG.info("Failover: Test framework - disabled");
return;
}
String fsck = "";
LOG.info("Failover: Test framework - verification - starting...");
AvatarFailoverSnapshot snapshot = new AvatarFailoverSnapshot();
File snapshotFile = getSnapshotFile(confg, false);
if (snapshotFile == null)
return;
DataInputStream in = new DataInputStream(
new BufferedInputStream(new FileInputStream(snapshotFile)));
try {
snapshot.readFields(in);
if (in.readBoolean()) {
LOG.info("Failover: Test framework - found fsck data");
fsck = Text.readString(in);
}
} finally {
in.close();
}
LOG.info("Failover: Test framework - verifying open files: found "
+ snapshot.getOpenFilesInfo().getOpenFiles().size()
+ " files in the test snapshot");
verifyOpenFiles(snapshot.getOpenFilesInfo());
LOG.info("Failover: Test framework - verifying closed files: found "
+ snapshot.getSampledFiles().size()
+ " files in the test snapshot");
for (FileStatusExtended stat : snapshot.getSampledFiles()) {
verifySnapshotSampledFile(stat);
}
LOG.info("Failover: Test framework - verification - succeeded");
this.oldPrimaryFsck = fsck;
}
protected String runFailoverFsck() throws IOException {
Map<String, String[]> pmap = new HashMap<String, String[]>();
pmap.put("path", new String[] {"/"});
// run fsck
StringWriter stringWriter = new StringWriter();
NamenodeFsck fscker = new NamenodeFsck(confg, this,
pmap, new PrintWriter(stringWriter));
fscker.fsck();
return stringWriter.toString();
}
/**
* Clean failover-outstanding datanodes.
* Used for fast failover, once the overreplication processing starts.
*/
void clearOutstandingNodes() {
if (standbySafeMode != null) {
standbySafeMode.removeOutStandingDatanodes(false);
}
standbySafeMode = null;
}
@Override
public synchronized void performFailover() throws IOException {
// check permissions before any other actions
super.namesystem.checkSuperuserPrivilege();
failoverState = FailoverState.PERFORM_FAILOVER;
enforceStandby("Cannot perform failover on active");
cleaner.stop();
cleanerThread.interrupt();
try {
cleanerThread.join();
} catch (InterruptedException iex) {
Thread.currentThread().interrupt();
}
// change the value to the one for the primary
int maxStandbyBufferedTransactions = confg.getInt(
"dfs.max.buffered.transactions",
HdfsConstants.DEFAULT_MAX_BUFFERED_TRANSACTIONS);
FSEditLog.setMaxBufferedTransactions(maxStandbyBufferedTransactions);
// Clear up deletion and replication queues.
clearInvalidateAndReplicationQueues();
// if the log was closed by ingestion, re-open it
if (!getFSImage().getEditLog().isOpen())
getFSImage().getEditLog().open();
LOG.info("Failover: Triggering safemode failover");
standbySafeMode.triggerFailover();
sessionId = AvatarNodeZkUtil.writeToZooKeeperAfterFailover(startupConf,
confg);
LOG.info("Failover: Changed avatar from " + currentAvatar + " to "
+ Avatar.ACTIVE);
if (enableTestFramework && enableTestFrameworkFsck) {
if (!failoverFsck.equals(oldPrimaryFsck)) {
LOG.warn("Failover: FSCK on old primary and new primary do not match");
LOG.info("----- FSCK ----- OLD BEGIN");
LOG.info("Failover: Old primary fsck: \n " + oldPrimaryFsck + "\n");
LOG.info("----- FSCK ----- NEW BEGIN");
LOG.info("Failover: New primary fsck: \n " + failoverFsck + "\n");
LOG.info("----- FSCK ----- END");
} else {
LOG.info("Failover: Verified fsck.");
}
}
currentAvatar = Avatar.ACTIVE;
confg.setClass("dfs.safemode.impl", NameNodeSafeModeInfo.class,
SafeModeInfo.class);
}
@Override
public synchronized void quiesceForFailover(boolean noverification)
throws IOException {
// check permissions before any other actions
super.namesystem.checkSuperuserPrivilege();
failoverState = FailoverState.START_FAILOVER;
enforceStandby("Cannot quiesce primary");
try {
// Check to see if the primary is somehow checkpointing itself. If so,
// then
// refuse to switch to active mode. This check is not foolproof but is a
// defensive mechanism to prevent administrator errors.
ZookeeperTxId zkTxId = AvatarNodeZkUtil.checkZooKeeperBeforeFailover(
startupConf, startupConf, noverification);
if (!noverification) {
if (zkTxId == null) {
throw new IOException(
"Could not receive last transaction id from zookeeper");
}
standby.quiesce(zkTxId.getTransactionId());
} else {
standby.quiesce(FSEditLogLoader.TXID_IGNORE);
}
if (!noverification) {
verifyTransactionIds(zkTxId);
verifyFailoverTestData();
}
failoverState = FailoverState.AWAIT_FAILOVER;
} catch (StandbyStateException se) {
failoverState = FailoverState.AWAIT_FAILOVER;
throw se;
} finally {
if (failoverState != FailoverState.AWAIT_FAILOVER) {
failoverState = FailoverState.FAILED_FAILOVER;
}
}
}
/**
* @deprecated
*/
@Override
public synchronized void setAvatar(Avatar avatar) throws IOException {
setAvatar(avatar, false);
}
/**
* @deprecated
*/
@Override
public synchronized void setAvatar(Avatar avatar, boolean force)
throws IOException {
try {
if (avatar == currentAvatar) {
LOG.info("Failover: Trying to change avatar to " + avatar +
" but am already in that state.");
return;
}
quiesceForFailover(force);
performFailover();
} catch (IOException e) {
LOG.fatal("Exception when setting avatar", e);
throw e;
}
}
/*
* As the AvatarNode is running in Standby mode it fills up
* invalidates queues for each datanode with blocks it
* assumes have to be deleted. This information is not
* entirely accurate and fills up memory as well as leads
* to dataloss since those queues are flushed to the datanodes
* on failover and valid blocks may be deleted.
*
* To help prevent filling up the memory we clear these queues
* periodically. And we do a final cleanup jsut before switching
* to primary.
*
* Also, we make sure that the replication queues are cleaned
* periodically. They are never processed at standby, and grow
* indefinitely.
*/
private class Cleaner implements Runnable {
volatile boolean running = true;
@Override
public void run() {
LOG.info("Starting Standby Cleaner thread");
while (running) {
clearInvalidateAndReplicationQueues();
try {
Thread.sleep(INVALIDATES_CLEANUP_INTERVAL);
} catch (InterruptedException iex) {
if (running == false)
return;
Thread.currentThread().interrupt();
}
}
}
public void stop() {
running = false;
}
}
private void clearInvalidateAndReplicationQueues() {
try {
LOG.info("Standby Cleaner: cleaning queues");
if(!namesystem.isInSafeMode()) {
throw new IOException("Avatar is not in safemode");
}
DatanodeInfo[] nodes;
super.namesystem.readLock();
try {
nodes = super.namesystem.getDatanodes(DatanodeReportType.ALL);
} finally {
super.namesystem.readUnlock();
}
super.namesystem.writeLock();
try {
for (DatanodeInfo node : nodes) {
super.namesystem.removeFromInvalidates(node.getStorageID());
}
} finally {
super.namesystem.writeUnlock();
}
super.namesystem.clearReplicationQueues();
} catch (Exception e) {
metrics.numCleanerThreadExceptions.inc();
LOG.error("Standby Cleaner : exception when cleaning " +
"replication queues", e);
}
}
private boolean ignoreDatanodes() {
boolean ignore = currentAvatar == Avatar.STANDBY &&
(standby == null
|| standby.fellBehind()
|| InjectionHandler
.falseCondition(InjectionEvent.STANDBY_FELL_BEHIND));
metrics.ignoreDataNodes.set(ignore ? 1 : 0);
return ignore;
}
@Override
public void primaryCleared(DatanodeRegistration registration) {
LOG.info("Received primaryCleared() from : " + registration);
if (standbySafeMode != null) {
standbySafeMode.reportPrimaryCleared(registration);
}
}
@Override
public DatanodeRegistration register(DatanodeRegistration nodeReg)
throws IOException {
DatanodeRegistration reg = super.register(nodeReg);
if (standbySafeMode != null) {
standbySafeMode.reportRegister(nodeReg);
}
return reg;
}
public DatanodeCommand[] sendHeartbeatNew(DatanodeRegistration registration,
long capacity,
long dfsUsed, long remaining,
long namespaceUsed,
int xmitsInProgress,
int xceiverCount) throws IOException {
DatanodeCommand[] cmds = super.sendHeartbeat(
registration, capacity, dfsUsed, remaining, namespaceUsed,
xmitsInProgress, xceiverCount);
if (standbySafeMode != null
&& standbySafeMode.reportHeartBeat(registration)) {
LOG.info("Sending Clear Primary command to : " + registration);
cmds = addCommand(cmds, AvatarDatanodeCommand.CLEARPRIMARY);
} else if (ignoreDatanodes()) {
cmds = addCommand(cmds, AvatarDatanodeCommand.BACKOFF);
} else if (standbySafeMode != null &&
standbySafeMode.getPrepareFailover()) {
cmds = addCommand(cmds, AvatarDatanodeCommand.PREPAREFAILOVER);
}
return cmds;
}
private DatanodeCommand[] addCommand(DatanodeCommand[] cmds,
DatanodeCommand toAdd) {
if (cmds == null) {
return new DatanodeCommand[] { toAdd };
} else {
DatanodeCommand[] newCmds = Arrays.copyOf(cmds, cmds.length + 1);
newCmds[cmds.length] = toAdd;
return newCmds;
}
}
@Override
/**
* Determines whether or not the datanode should retry blocks if they are
* not present in the blocks map.
*/
public boolean shouldRetryAbsentBlocks() {
return (currentAvatar == Avatar.STANDBY);
}
@Override
/**
* Determines whether or not the given block should be retried by the datanode
* if its not present in the blocksMap.
*/
public boolean shouldRetryAbsentBlock(Block block, Block storedBlock) {
// If this block does not belong to anyfile and its GS
// is no less than the avatar node's GS,
// AvatarNode may not consume the file/block creation edit log yet,
// so adding it to the retry list.
return (currentAvatar == Avatar.STANDBY &&
(!namesystem.getPersistBlocks() ||
block.getGenerationStamp() >= namesystem.getGenerationStamp()) &&
(storedBlock == null || block.getGenerationStamp()
> storedBlock.getGenerationStamp()));
}
public DatanodeCommand blockReportNew(DatanodeRegistration nodeReg, BlockReport rep) throws IOException {
if (runInfo.shutdown || !runInfo.isRunning) {
return null;
}
if (ignoreDatanodes()) {
LOG.info("Standby fell behind. Telling " + nodeReg.toString() +
" to back off");
// Do not process block reports yet as the ingest thread is catching up
return AvatarDatanodeCommand.BACKOFF;
}
if (currentAvatar == Avatar.STANDBY) {
Collection<Block> failed = super.blockReportWithRetries(nodeReg, rep);
// standby should send only DNA_RETRY
BlockCommand bCmd = new BlockCommand(DatanodeProtocols.DNA_RETRY,
failed.toArray(new Block[failed.size()]));
return bCmd;
} else {
// only the primary can send DNA_FINALIZE
return super.blockReport(nodeReg, rep);
}
}
private void updateIBRMetrics(int reported, int retried) {
if (metrics != null) {
metrics.numReportedBlocks.inc(reported);
metrics.numRetryBlocks.inc(retried);
}
}
/**
* @inheritDoc
*/
public Block[] blockReceivedAndDeletedNew(DatanodeRegistration nodeReg,
Block blocksReceivedAndDeleted[]) throws IOException {
if (runInfo.shutdown || !runInfo.isRunning) {
// Do not attempt to process blocks when
// the namenode is not running
return new ReceivedBlockInfo[0];
}
if (ignoreDatanodes()) {
LOG.info("Standby fell behind. Telling " + nodeReg.toString() +
" to retry incremental block report of " + blocksReceivedAndDeleted.length
+ " blocks later.");
metrics.numIgnoredDatanodes.inc();
return blocksReceivedAndDeleted;
}
List<Block> failed = new ArrayList<Block>();
HashSet<Long> failedIds;
if (currentAvatar == Avatar.STANDBY) {
failedIds = new HashSet<Long>();
namesystem.writeLock();
try {
for (int index = 0; index < blocksReceivedAndDeleted.length; index++) {
Block blockRD = blocksReceivedAndDeleted[index];
if(failedIds.contains(blockRD.getBlockId())){
// check if there was no other blocking failed request
blocksReceivedAndDeleted[index] = null;
failed.add(blockRD);
continue;
}
BlockInfo storedBlock = namesystem.blocksMap.getStoredBlock(blockRD);
if (!DFSUtil.isDeleted(blockRD)
&& shouldRetryAbsentBlock(blockRD, storedBlock)) {
// If this block does not belong to anyfile and its GS
// is no less than the avatar node's GS,
// AvatarNode may not consume the file/block creation edit log yet,
// so adding it to the failed list.
// - do not process any requestes for blocks with the same block id
// (also add them to the failed list.
// - do not block other requests
blocksReceivedAndDeleted[index] = null;
failed.add(blockRD);
failedIds.add(blockRD.getBlockId());
}
}
} finally {
namesystem.writeUnlock();
if (!failed.isEmpty()) {
LOG.info("*BLOCK* NameNode.blockReceivedAndDeleted: "
+ "from " + nodeReg.getName() + " has to retry "
+ failed.size() + " blocks.");
}
for (Block blockRD : failed) {
LOG.info("blockReceivedDeleted " + (DFSUtil.isDeleted(blockRD) ? "DELETED" : "RECEIVED")
+ " request received for "
+ blockRD + " on " + nodeReg.getName() + " size "
+ blockRD.getNumBytes()
+ " But it does not belong to any file." + " Retry later.");
}
}
}
super.blockReceivedAndDeleted(nodeReg, blocksReceivedAndDeleted);
updateIBRMetrics(blocksReceivedAndDeleted.length, failed.size());
return failed.toArray(new Block[failed.size()]);
}
/**
* @inheritDoc
*/
public long[] blockReceivedAndDeletedNew(DatanodeRegistration nodeReg,
IncrementalBlockReport receivedAndDeletedBlocks) throws IOException {
InjectionHandler
.processEvent(InjectionEvent.AVATARNODE_BLOCKRECEIVED_AND_DELETED_NEW);
long[] failedMap = null;
if (runInfo.shutdown || !runInfo.isRunning) {
// Do not attempt to process blocks when
// the namenode is not running
if (currentAvatar == Avatar.STANDBY) {
return new long[0];
} else {
return null;
}
}
HashSet<Long> failedIds;
if (currentAvatar == Avatar.STANDBY) {
int noAck = receivedAndDeletedBlocks.getLength();
// retry all block if the standby is behind consuming edits
if (ignoreDatanodes()) {
LOG.info("Standby fell behind. Telling " + nodeReg.toString() +
" to retry incremental block report of " + noAck
+ " blocks later.");
failedMap = LightWeightBitSet.getBitSet(noAck);
for (int i = 0; i < noAck; i++)
LightWeightBitSet.set(failedMap, i);
metrics.numIgnoredDatanodes.inc();
return failedMap;
}
Block blockRD = new Block();
failedIds = new HashSet<Long>();
failedMap = LightWeightBitSet.getBitSet(noAck);
namesystem.writeLock();
try {
receivedAndDeletedBlocks.resetIterator();
for (int currentBlock = 0; currentBlock < noAck; currentBlock++) {
receivedAndDeletedBlocks.getNext(blockRD);
if(failedIds.contains(blockRD.getBlockId())){
// check if there was no other blocking failed request
blockRD.setNumBytes(BlockFlags.IGNORE);
receivedAndDeletedBlocks.setBlock(blockRD, currentBlock);
LightWeightBitSet.set(failedMap, currentBlock);
continue;
}
BlockInfo storedBlock = namesystem.blocksMap.getStoredBlock(blockRD);
if ((!DFSUtil.isDeleted(blockRD)
&& shouldRetryAbsentBlock(blockRD, storedBlock))) {
// If this block does not belong to anyfile and its GS
// is no less than the avatar node's GS,
// AvatarNode may not consume the file/block creation edit log yet,
// so adding it to the failed list. Also, if the stored block GS is
// less than the reported GS then we need to retry the block.
// - do not process any requestes for blocks with the same block id
// (also add them to the failed list.
// - do not block other requests
blockRD.setNumBytes(BlockFlags.IGNORE);
receivedAndDeletedBlocks.setBlock(blockRD, currentBlock);
LightWeightBitSet.set(failedMap, currentBlock);
failedIds.add(blockRD.getBlockId());
}
}
} finally {
namesystem.writeUnlock();
if (failedMap != null && LightWeightBitSet.cardinality(failedMap) != 0) {
LOG.info("*BLOCK* NameNode.blockReceivedAndDeleted: "
+ "from " + nodeReg.getName() + " has to retry "
+ LightWeightBitSet.cardinality(failedMap) + " blocks.");
}
receivedAndDeletedBlocks.resetIterator();
for (int currentBlock = 0; currentBlock < noAck; currentBlock++) {
receivedAndDeletedBlocks.getNext(blockRD);
if (!LightWeightBitSet.get(failedMap, currentBlock))
continue;
LOG.info("blockReceivedDeleted " + (DFSUtil.isDeleted(blockRD) ? "DELETED" : "RECEIVED")
+ " request received for "
+ blockRD + " on " + nodeReg.getName() + " size "
+ blockRD.getNumBytes()
+ " But it does not belong to any file." + " Retry later.");
}
}
}
super.blockReceivedAndDeleted(nodeReg, receivedAndDeletedBlocks);
updateIBRMetrics(receivedAndDeletedBlocks.getLength(),
failedMap != null ? LightWeightBitSet.cardinality(failedMap) : 0);
return failedMap;
}
/**
* Roll the edit log.
*/
public CheckpointSignature rollEditLog() throws IOException {
enforceActive("Cannot roll edit log on standby");
verifyCheckpointerAddress();
return super.rollEditLog();
}
/**
* Roll the edit log manually.
*/
@Override
public void rollEditLogAdmin() throws IOException {
enforceActive("Cannot roll edit log on standby");
// no verification of the checkpointer address since this is dfsadmin call
super.rollEditLog();
}
/**
* Roll the image
*/
public void rollFsImage(CheckpointSignature newImageSignature) throws IOException {
enforceActive("Cannot roll image on standby");
verifyCheckpointerAddress();
super.rollFsImage(newImageSignature);
}
@Override
public void saveNamespace() throws IOException {
this.saveNamespace(false, false);
}
@Override
public void saveNamespace(boolean force, boolean uncompressed)
throws IOException {
// for manually triggered SN, reset earlier cancellations
namesystem.clearCancelSaveNamespace();
if (currentAvatar == Avatar.ACTIVE) {
// regular saving process
super.saveNamespace(force, uncompressed);
} else /* STANDBY */ {
// "force" is not needed for standby since it's always
// in safemode
standby.triggerCheckpoint(uncompressed);
}
}
/**
* @inheritDoc
*/
@Override
public boolean setSafeMode(SafeModeAction action) throws IOException {
if (action == SafeModeAction.SAFEMODE_PREP_FAILOVER) {
if (currentAvatar == Avatar.ACTIVE) {
// for now, we do not take any actions
} else /* STANDBY */ {
if (standby != null) {
standby.disableCheckpoint();
namesystem.cancelSaveNamespace("Prepare for failover");
}
if (standbySafeMode != null) {
// inform the safemode to prepare failover
standbySafeMode.setPrepareFailover(true);
}
}
return namesystem.isInSafeMode();
}
return super.setSafeMode(action);
}
void enforceActive(String msg) throws IOException {
if (currentAvatar == Avatar.STANDBY) {
throw new IOException(msg);
}
}
private void enforceStandby(String msg) throws IOException {
if (currentAvatar != Avatar.STANDBY) {
throw new IOException(msg);
}
}
/**
* Register standby with this primary
*/
@Override
public int register() throws IOException {
enforceActive("Standby can only register with active namenode");
verifyCheckpointerAddress();
return DataTransferProtocol.DATA_TRANSFER_VERSION;
}
private void verifyCheckpointerAddress() throws IOException {
InetAddress configuredRemoteAddress = getRemoteNamenodeAddress(getConf(),
instance).getAddress();
validateCheckpointerAddress(configuredRemoteAddress);
}
/**
* Returns the hostname:port for the AvatarNode. The default
* port for the AvatarNode is (the client RPC port
* of the underlying namenode + 1)
*/
public static InetSocketAddress getAddress(Configuration conf) {
InetSocketAddress u = NameNode.getClientProtocolAddress(conf);
int port = conf.getInt(AvatarNode.DFS_AVATARNODE_PORT_KEY, u.getPort() + 1);
return new InetSocketAddress(u.getAddress(), port);
}
/**
* Help message for a user
*/
private static void printUsage() {
System.err.println(
"Usage: java AvatarNode [" +
StartupOption.STANDBY.getName() + "] | [" +
StartupOption.NODEZERO.getName() + "] | [" +
StartupOption.NODEONE.getName() + "] | [" +
StartupOption.FORMAT.getName() + "] | [" +
StartupOption.UPGRADE.getName() + "] | [" +
StartupOption.ROLLBACK.getName() + "] | [" +
StartupOption.FINALIZE.getName() + "] | [" +
StartupOption.IMPORT.getName() + "]");
}
/**
* validates command line arguments
*/
static void validateStartupOptions(StartupInfo startInfo) throws IOException {
// sync cannot be specified along with format or finalize
if (startInfo.isStandby) {
if (startInfo.startOpt == StartupOption.FORMAT ||
startInfo.startOpt == StartupOption.FINALIZE ||
startInfo.startOpt == StartupOption.ROLLBACK ||
startInfo.startOpt == StartupOption.UPGRADE) {
throw new IOException("Standby avatar node cannot be started with " +
startInfo.startOpt + " option.");
}
}
}
static class StartupInfo {
StartupOption startOpt;
InstanceId instance;
boolean isStandby;
String serviceName;
boolean forceStartup;
public StartupInfo(StartupOption startOpt, InstanceId instance,
boolean isStandby, String serviceName, boolean forceStartup) {
this.startOpt = startOpt;
this.instance = instance;
this.isStandby = isStandby;
this.serviceName = serviceName;
this.forceStartup = forceStartup;
}
}
/**
* Analyze the command line options
*/
private static StartupInfo parseArguments(String args[]) {
InstanceId instance = InstanceId.NODEZERO;
StartupOption startOpt = StartupOption.REGULAR;
boolean isStandby= false;
String serviceName = null;
boolean force = false;
int argsLen = (args == null) ? 0 : args.length;
for (int i=0; i < argsLen; i++) {
String cmd = args[i];
if (StartupOption.SERVICE.getName().equalsIgnoreCase(cmd)) {
if (++i < argsLen) {
serviceName = args[i];
} else {
return null;
}
} else if (StartupOption.STANDBY.getName().equalsIgnoreCase(cmd)) {
isStandby = true;
} else if (StartupOption.NODEZERO.getName().equalsIgnoreCase(cmd)) {
instance = InstanceId.NODEZERO;
} else if (StartupOption.NODEONE.getName().equalsIgnoreCase(cmd)) {
instance = InstanceId.NODEONE;
} else if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.FORMAT;
} else if (StartupOption.FORMATFORCE.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.FORMATFORCE;
} else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.REGULAR;
} else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.UPGRADE;
} else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.ROLLBACK;
} else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.FINALIZE;
} else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.IMPORT;
} else if (StartupOption.FORCE.getName().equalsIgnoreCase(cmd)) {
force = true;
} else {
return null;
}
}
return new StartupInfo(startOpt, instance, isStandby, serviceName, force);
}
/**
* Records the startup command in the configuration
*/
private static void setStartupOption(Configuration conf, StartupOption opt) {
conf.set("dfs.namenode.startup", opt.toString());
}
public static AvatarNode createAvatarNode(String argv[],
Configuration conf) throws IOException {
return createAvatarNode(argv, conf, new RunInfo());
}
/**
* HDFS federation configuration that is specific to a name service.
* This keys are suffixed with nameserviceId in the configuration. For example,
* "dfs.namenode.rpc-address.nameservice1".</li>
* </ol>
*
* Following are nameservice specific keys.
*/
final private static String DFS_AVATARNODE_PORT_KEY = "dfs.avatarnode.port";
final private static String DFS_SHARED_NAME_DIR0_KEY = "dfs.name.dir.shared0";
final private static String DFS_SHARED_NAME_DIR1_KEY = "dfs.name.dir.shared1";
final public static String DFS_SHARED_EDITS_DIR0_KEY =
"dfs.name.edits.dir.shared0";
final public static String DFS_SHARED_EDITS_DIR1_KEY =
"dfs.name.edits.dir.shared1";
final private static String ZERO = "0";
final private static String ONE = "1";
final public static String DFS_NAMENODE_RPC_ADDRESS0_KEY =
DFS_NAMENODE_RPC_ADDRESS_KEY+ZERO;
final public static String DFS_NAMENODE_RPC_ADDRESS1_KEY =
DFS_NAMENODE_RPC_ADDRESS_KEY+ONE;
public static final String[] AVATARSERVICE_SPECIFIC_KEYS = {
DFS_AVATARNODE_PORT_KEY,
DFS_NAMENODE_RPC_ADDRESS0_KEY,
DFS_NAMENODE_RPC_ADDRESS1_KEY,
DATANODE_PROTOCOL_ADDRESS+ZERO,
DATANODE_PROTOCOL_ADDRESS+ONE,
DFS_NAMENODE_HTTP_ADDRESS_KEY+ZERO,
DFS_NAMENODE_HTTP_ADDRESS_KEY+ONE,
DFS_SHARED_NAME_DIR0_KEY,
DFS_SHARED_NAME_DIR1_KEY,
DFS_SHARED_EDITS_DIR0_KEY,
DFS_SHARED_EDITS_DIR1_KEY,
};
/**
* In federation configuration is set for a set of
* avartanodes, namenodes etc, which are
* grouped under a logical nameservice ID. The configuration keys specific
* to them have suffix set to configured nameserviceId.
*
* This method copies the value from specific key of format key.nameserviceId
* to key, to set up the generic configuration. Once this is done, only
* generic version of the configuration is read in rest of the code, for
* backward compatibility and simpler code changes.
*
* @param conf
* Configuration object to lookup specific key and to set the value
* to the key passed. Note the conf object is modified
* @see DFSUtil#setGenericConf(Configuration, String, String...)
*/
public static void initializeGenericKeys(Configuration conf, String serviceKey) {
if ((serviceKey == null) || serviceKey.isEmpty()) {
return;
}
NameNode.initializeGenericKeys(conf, serviceKey);
DFSUtil.setGenericConf(conf, serviceKey, AVATARSERVICE_SPECIFIC_KEYS);
// adjust meta directory names for this service
adjustMetaDirectoryNames(conf, serviceKey);
}
/** Append service name to each avatar meta directory name
*
* @param conf configuration of NameNode
* @param serviceKey the non-empty name of the name node service
*/
public static void adjustMetaDirectoryNames(Configuration conf, String serviceKey) {
adjustMetaDirectoryName(conf, DFS_SHARED_NAME_DIR0_KEY, serviceKey);
adjustMetaDirectoryName(conf, DFS_SHARED_NAME_DIR1_KEY, serviceKey);
adjustMetaDirectoryName(conf, DFS_SHARED_EDITS_DIR0_KEY, serviceKey);
adjustMetaDirectoryName(conf, DFS_SHARED_EDITS_DIR1_KEY, serviceKey);
}
@Override
public void reconfigurePropertyImpl(String property, String newVal) throws ReconfigurationException {
String expectation = "";
switch(instance) {
case NODEZERO:
expectation = DFS_NAMENODE_RPC_ADDRESS1_KEY;
break;
case NODEONE:
expectation = DFS_NAMENODE_RPC_ADDRESS0_KEY;
break;
}
if (property.equals(expectation)) {
getConf().set(property, newVal);
return;
}
super.reconfigurePropertyImpl(property, newVal);
}
@Override
public List<String> getReconfigurableProperties() {
List<String> allProps = super.getReconfigurableProperties();
switch (instance) {
case NODEONE:
allProps.add(DFS_NAMENODE_RPC_ADDRESS0_KEY);
break;
case NODEZERO:
allProps.add(DFS_NAMENODE_RPC_ADDRESS1_KEY);
break;
}
return allProps;
}
/**
* Tries to bind to the address specified in ZooKeeper, this will always fail
* if the primary is alive either on the same machine or on a remote machine.
*/
private static void isPrimaryAlive(String zkRegistry) throws IOException {
String parts[] = zkRegistry.split(":");
if (parts.length != 2) {
throw new IllegalArgumentException("Invalid Address : " + zkRegistry);
}
String host = parts[0];
int port = Integer.parseInt(parts[1]);
InetSocketAddress clientSocket = new InetSocketAddress(host, port);
ServerSocket socket = new ServerSocket();
socket.bind(clientSocket);
socket.close();
}
private static void failStartup(String message) throws IOException {
LOG.error(message);
throw new IOException(message);
}
private static String getWildcardDir(String instance, Configuration conf,
String dirKey) {
String dir = conf.get(dirKey);
dir = dir.replaceAll(FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD,
instance);
return dir;
}
/**
* When avatarone and avatarzero both write to the filer in the case where the
* filer is not the shared directory (when we use QJM). We need both the nodes
* to write to different directories. For this purpose we configure
* dfs.name.dir and dfs.name.edits.dir as follows
*
* <property>
* <name>dfs.name.dir</name>
* <value>/hadoop/<cluster>/,/mnt/fsimage/<cluster>/%</value>
* </property>
*
* Then depending upon the instance we replace '%' with zero or one in this
* function to ensure both nodes write to different locations.
*/
private static void processNameDirectories(Configuration conf,
InstanceId instanceId) {
if (instanceId == InstanceId.NODEONE || instanceId == InstanceId.NODEZERO) {
String instance = (instanceId == InstanceId.NODEZERO) ? "zero" : "one";
// Edits directory.
String editDirs = getWildcardDir(instance, conf,
FSConstants.DFS_NAMENODE_EDITS_DIR_KEY);
conf.set(FSConstants.DFS_NAMENODE_EDITS_DIR_KEY, editDirs);
// Image directory.
String imageDirs = getWildcardDir(instance, conf,
FSConstants.DFS_NAMENODE_NAME_DIR_KEY);
conf.set(FSConstants.DFS_NAMENODE_NAME_DIR_KEY, imageDirs);
}
}
public static AvatarNode createAvatarNode(String argv[],
Configuration conf,
RunInfo runInfo) throws IOException {
if (conf == null) {
conf = new Configuration();
}
Configuration startupConf = conf; // save configuration at startup
StartupInfo startInfo = parseArguments(argv);
StartupOption startOpt = startInfo.startOpt;
if (startInfo.instance != null) {
processNameDirectories(conf, startInfo.instance);
processNameDirectories(startupConf, startInfo.instance);
}
if (startOpt == null) {
printUsage();
return null;
}
if (!validateServiceName(conf, startInfo.serviceName)) {
return null;
}
initializeGenericKeys(conf, startInfo.serviceName);
setStartupOption(conf, startOpt);
conf = updateAddressConf(conf, startInfo.instance);
NameNode.setupDefaultURI(conf);
// sync cannot be specified along with format or finalize
validateStartupOptions(startInfo);
// We need to check the zookeeper so that the node starting as active
// is the one registered with the zookeeper
// and if the node is starting as standby there has to be a master
// already so that the node doesn't move the log and the image
String fsname = startupConf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY);
String actualName = conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY);
String errorMsg = null;
String zkRegistry = AvatarNodeZkUtil.getPrimaryRegistration(startupConf, startupConf, fsname);
if (zkRegistry == null) {
// The registry is empty. Usually this means failover is in progress
// we need to manually fix it before starting primary
if (!startInfo.forceStartup) {
errorMsg = "A zNode that indicates the primary is empty. "
+ "AvatarNode can only be started as primary if it "
+ "is registered as primary with ZooKeeper";
failStartup(errorMsg);
}
} else {
if (!zkRegistry.equalsIgnoreCase(actualName) && !startInfo.isStandby) {
errorMsg = "Registration information in ZooKeeper doesn't "
+ "match the address of this node. AvatarNode can "
+ "only be started as primary if it is registered as "
+ "primary with ZooKeeper. zkRegistry = " + zkRegistry
+ ", actual name = " + actualName;
failStartup(errorMsg);
}
}
if (!startInfo.isStandby && !startInfo.forceStartup) {
isPrimaryAlive(zkRegistry);
}
long ssid = 0;
// We are the primary avatar, write session Id to ZK.
if (!startInfo.isStandby) {
ssid = AvatarNodeZkUtil.writeSessionIdToZK(startupConf);
}
// If sync is requested, then we copy only the fsimage
// (and not the transaction logs) from the other node.
// If we are NODEONE, then modify the configuration to
// set fs.name.dir, fs.default.name and dfs.http.address.
//
// setup rpc proxy if we are starting as standby
NamenodeProtocol primaryNamenode = null;
InetSocketAddress nameNodeAddr = null;
if (startInfo.isStandby) {
nameNodeAddr = getRemoteNamenodeAddress(conf, startInfo.instance);
FLOG.info("Connecting to the primary namenode: " + nameNodeAddr);
primaryNamenode = (NamenodeProtocol) RPC.waitForProxy(
NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr,
conf);
// make sure we can talk to this primary
int primaryDataTransferVersion = primaryNamenode.register();
// check if we have the same data transfer version as primary
InjectionHandler.processEventIO(
InjectionEvent.AVATARNODE_RECEIVED_DATA_TRANSFER_VERSION, primaryDataTransferVersion);
int standbyDataTransferVersion = DataTransferProtocol.DATA_TRANSFER_VERSION;
if (standbyDataTransferVersion != primaryDataTransferVersion && !startInfo.forceStartup) {
throw new IncorrectVersionException(
primaryDataTransferVersion, "data transfer", standbyDataTransferVersion);
}
}
conf = setupAvatarNodeStorage(conf, startInfo, primaryNamenode);
// namenode options.
switch (startOpt) {
case FORMAT:
boolean aborted = format(conf, false, true);
System.exit(aborted ? 1 : 0);
case FORMATFORCE:
aborted = format(conf, true, false);
return null;
case FINALIZE:
aborted = finalize(conf, true);
System.exit(aborted ? 1 : 0);
default:
}
// We need to put the Namenode into safemode as soon as it starts up.
// There is a race condition, where before the Standby AvatarNode can put
// the NameNode into safemode, the NameNode might leave safemode. This could
// occur in the case of a start where the FSImage and FSEdits are empty
// and hence the NameNode doesn't wait at all in safemode.
if (startInfo.isStandby) {
conf.setClass("dfs.safemode.impl", StandbySafeMode.class,
SafeModeInfo.class);
}
// set persisting blocks to be true
conf.setBoolean("dfs.persist.blocks", true);
return new AvatarNode(startupConf, conf,
startInfo, runInfo, ssid, nameNodeAddr, primaryNamenode);
}
private static boolean isFile(URI uri) throws IOException {
return uri.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) == 0;
}
/**
* Return the configuration that should be used by this instance of AvatarNode
* Copy fsimages from the remote shared device.
*/
static Configuration setupAvatarNodeStorage(Configuration conf,
StartupInfo startInfo, NamenodeProtocol primaryNamenode)
throws IOException {
// shared loations for image and edits
URI img0 = NNStorageConfiguration.getURIKey(conf, "dfs.name.dir.shared0");
URI img1 = NNStorageConfiguration.getURIKey(conf, "dfs.name.dir.shared1");
URI edit0 = NNStorageConfiguration.getURIKey(conf, "dfs.name.edits.dir.shared0");
URI edit1 = NNStorageConfiguration.getURIKey(conf, "dfs.name.edits.dir.shared1");
// local locations for image and edits
Collection<URI> namedirs = NNStorageConfiguration.getNamespaceDirs(conf, null);
Collection<URI> editsdir = NNStorageConfiguration.getNamespaceEditsDirs(conf, null);
// validate correctness of the configuration
AvatarStorageSetup.validate(conf,
namedirs, editsdir,
img0, img1, edit0, edit1);
FileSystem localFs = FileSystem.getLocal(conf).getRaw();
URI ownSharedImage = null;
URI ownSharedEdits = null;
//
// if we are instance one then copy from primary to secondary
// otherwise copy from secondary to primary.
//
if (startInfo.instance == InstanceId.NODEONE) {
ownSharedImage = img1;
ownSharedEdits = edit1;
} else if (startInfo.instance == InstanceId.NODEZERO) {
ownSharedImage = img0;
ownSharedEdits = edit0;
}
// allocate a new configuration and update fs.name.dir approprately
// The shared device should be the first in the list.
Configuration newconf = new Configuration(conf);
AvatarStorageSetup.updateConf(startInfo, newconf, namedirs, img0, img1,
"dfs.name.dir");
// update fs.name.edits.dir approprately in the new configuration
// The shared device should be the first in the list.
AvatarStorageSetup.updateConf(startInfo, newconf, editsdir, edit0,
edit1, "dfs.name.edits.dir");
// copy fsimage directory if needed
if (startInfo.isStandby) {
// do not open edit log at startup
newconf.setBoolean("dfs.namenode.openlog", false);
// connect to primary
String fsName = getRemoteNamenodeHttpName(conf, startInfo.instance);
FSImage tempImage = new FSImage(newconf,
NNStorageConfiguration.getNamespaceDirs(newconf),
NNStorageConfiguration.getNamespaceEditsDirs(newconf), null);
// will block until Primary has left the safemode
CheckpointSignature cs = getCheckpointSignature(primaryNamenode);
long lastCheckpointTxId = cs.mostRecentCheckpointTxId;
if(cs.layoutVersion != FSConstants.LAYOUT_VERSION) {
throw new IOException("Upgrade for standby is not supported");
}
if (isFile(ownSharedImage)) {
File destFile = new File (ownSharedImage.getPath());
NNStorageDirectoryRetentionManager.backupFiles(localFs, destFile, conf);
}
if (isFile(ownSharedEdits)) {
File destFile = new File (ownSharedEdits.getPath());
NNStorageDirectoryRetentionManager.backupFiles(localFs, destFile, newconf);
}
// setup storage
NNStorage tempStorage = tempImage.storage;
tempStorage.format();
tempStorage.setStorageInfo(cs);
tempStorage.writeAll();
tempImage.editLog.transitionNonFileJournals(tempStorage, false,
Transition.FORMAT, null);
tempImage.transitionNonFileImages(tempStorage, false, Transition.FORMAT);
// we need to become the active writer to upload image successfully to
// non-file images storage
tempImage.editLog.recoverUnclosedStreams();
// Download the image to all storage directories
FLOG.info("Downloading image to all storage directories.");
MD5Hash digest = downloadImageToStorage(fsName, lastCheckpointTxId, tempImage);
List<StorageDirectory> badSds = new ArrayList<StorageDirectory>();
tempStorage.checkpointUploadDone(lastCheckpointTxId, digest);
FLOG.info("Downloading image to all storage directories. DONE");
tempImage.saveDigestAndRenameCheckpointImage(lastCheckpointTxId, digest);
tempStorage.reportErrorsOnDirectories(badSds, tempImage);
tempStorage.close();
tempImage.close();
}
return newconf;
}
private static MD5Hash downloadImageToStorage(String fsName,
long lastCheckpointTxId, FSImage tempImage) throws IOException {
IOException e = null;
for (int i = 0; i < 3; i++) {
try {
// disable throttling for image download
return TransferFsImage.downloadImageToStorage(fsName,
lastCheckpointTxId, tempImage, true, true);
} catch (SocketTimeoutException ex) {
e = ex;
LOG.info("Downloading image - socked timeout exception. Will retry...");
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
}
throw e;
}
private static CheckpointSignature getCheckpointSignature(
NamenodeProtocol primaryNamenode) throws IOException {
while (true) {
try {
return primaryNamenode.getCheckpointSignature();
} catch (RemoteException e) {
LOG.info("Active namenode is not available. Standby cannot initialize" , e);
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
throw new IOException(ex);
}
}
}
}
public static Configuration updateAddressConf(Configuration conf, InstanceId instance) {
Configuration newconf = new Configuration(conf);
// if we are starting as the other namenode, then change the
// default URL to make the namenode attach to the appropriate URL
if (instance == InstanceId.NODEZERO) {
String fs = conf.get("dfs.http.address0");
if (fs != null) {
newconf.set("dfs.http.address", fs);
}
fs = conf.get("dfs.namenode.dn-address0");
if (fs != null) {
newconf.set("dfs.namenode.dn-address", fs);
}
fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS0_KEY);
if (fs != null) {
newconf.set(AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, fs);
newconf.set("fs.default.name0", fs);
conf.set("fs.default.name0", fs);
}
fs = conf.get("fs.default.name0");
if (fs != null) {
newconf.set("fs.default.name", fs);
}
}
if (instance == InstanceId.NODEONE) {
String fs = conf.get("dfs.http.address1");
if (fs != null) {
newconf.set("dfs.http.address", fs);
}
fs = conf.get("dfs.namenode.dn-address1");
if (fs != null) {
newconf.set("dfs.namenode.dn-address", fs);
}
fs = conf.get(AvatarNode.DFS_NAMENODE_RPC_ADDRESS1_KEY);
if (fs != null) {
newconf.set(AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, fs);
newconf.set("fs.default.name1", fs);
conf.set("fs.default.name1", fs);
}
fs = conf.get("fs.default.name1");
if (fs != null) {
newconf.set("fs.default.name", fs);
}
}
return newconf;
}
/**
* Returns the address of the remote namenode
*/
static InetSocketAddress getRemoteNamenodeAddress(Configuration conf,
InstanceId instance)
throws IOException {
String fs = null;
if (instance == InstanceId.NODEZERO) {
fs = conf.get(DFS_NAMENODE_RPC_ADDRESS1_KEY);
if (fs == null)
fs = conf.get("fs.default.name1");
} else if (instance == InstanceId.NODEONE) {
fs = conf.get(DFS_NAMENODE_RPC_ADDRESS0_KEY);
if (fs == null)
fs = conf.get("fs.default.name0");
} else {
throw new IOException("Unknown instance " + instance);
}
if(fs != null) {
Configuration newConf = new Configuration(conf);
newConf.set(FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY, fs);
conf = newConf;
}
return NameNode.getClientProtocolAddress(conf);
}
/**
* Returns the name of the http server of the local namenode
*/
static String getRemoteNamenodeHttpName(Configuration conf,
InstanceId instance)
throws IOException {
if (instance == InstanceId.NODEZERO) {
return conf.get("dfs.http.address1");
} else if (instance == InstanceId.NODEONE) {
return conf.get("dfs.http.address0");
} else {
throw new IOException("Unknown instance " + instance);
}
}
/**
* Return the shared edits file of the remote NameNode
*/
URI getRemoteSharedEditsURI(Configuration conf) throws IOException {
return getSharedEditsURI(conf, true);
}
/**
* Return the shared edits file of the local NameNode
*/
URI getLocalSharedEditsURI(Configuration conf) throws IOException {
return getSharedEditsURI(conf, false);
}
private URI getSharedEditsURI(Configuration conf, boolean remote)
throws IOException {
URI edits = null;
if (instance == InstanceId.NODEZERO) {
edits = remote ? NNStorageConfiguration.getURIKey(conf,
"dfs.name.edits.dir.shared1") : NNStorageConfiguration.getURIKey(
conf, "dfs.name.edits.dir.shared0");
} else if (instance == InstanceId.NODEONE) {
edits = remote ? NNStorageConfiguration.getURIKey(conf,
"dfs.name.edits.dir.shared0") : NNStorageConfiguration.getURIKey(
conf, "dfs.name.edits.dir.shared1");
} else {
LOG.info("Instance is invalid. " + instance);
throw new IOException("Instance is invalid. " + instance);
}
return edits;
}
/**
* Returns the starting checkpoint time of this AvatarNode
*/
long getStartCheckpointTxId() {
return startCheckpointTxId;
}
void setStartCheckpointTxId(long txid) {
startCheckpointTxId = txid;
}
/**
* Indicates that the AvatarNode shoudl restart
*/
void doRestart() {
runInfo.doRestart = true;
}
/**
* Current system time.
* @return current time in msec.
*/
static long now() {
return System.currentTimeMillis();
}
protected Map<NameNodeKey, String> getNameNodeSpecificKeys(){
Map<NameNodeKey, String> map = new HashMap<NameNodeKey, String>();
try{
map.put(new NameNodeKey("Last applied transaction id", NameNodeKey.BOTH),
toStr(getFSImage().getEditLog().getLastWrittenTxId()));
if (currentAvatar == Avatar.STANDBY) {
map.put(new NameNodeKey("Standby: ignore datanodes",
NameNodeKey.STANDBY), toStr(this.ignoreDatanodes()));
map.put(new NameNodeKey("Standby: ingest state", NameNodeKey.STANDBY),
toStr((standby == null) ? "" : standby.currentIngestState));
map.put(new NameNodeKey("Standby: ingest fell behind", NameNodeKey.STANDBY),
toStr((standby == null) ? "" : standby.fellBehind()));
map.put(new NameNodeKey("Standby: ingest lag bytes", NameNodeKey.STANDBY),
toStr((standby == null) ? 0L : standby.getLagBytes()));
map.put(new NameNodeKey("Standby: checkpoint status", NameNodeKey.STANDBY),
toStr((standby == null) ? "" : standby.getCheckpointStatus()));
map.put(new NameNodeKey("Standby: failover in progress",
NameNodeKey.STANDBY), toStr(standbySafeMode.failoverInProgress()));
if (standbySafeMode.failoverInProgress()) {
map.put(new NameNodeKey("Standby: failover outstanding heartbeats",
NameNodeKey.STANDBY), toStr(standbySafeMode
.getOutStandingHeartbeats().size()));
map.put(new NameNodeKey("Standby: failover outstanding reports",
NameNodeKey.STANDBY), toStr(standbySafeMode
.getOutStandingReports().size()));
}
} else {
map.put(new NameNodeKey("Last checkpoint txid", NameNodeKey.ACTIVE),
toStr(this.getFSImage().storage.getMostRecentCheckpointTxId()));
map.put(new NameNodeKey("Last checkpoint time", NameNodeKey.ACTIVE),
this.getFSImage().storage.getMostRecentCheckpointTime());
}
} catch (Exception e) {
// send partial information
LOG.error(e.toString());
}
return map;
}
protected boolean getIsPrimary() {
return currentAvatar == Avatar.ACTIVE;
}
private String toStr(Object o){
return o.toString();
}
public static class RunInfo {
volatile boolean doRestart;
volatile boolean shutdown;
volatile boolean isRunning;
public RunInfo(boolean doRestart, boolean shutdown, boolean isRunning) {
this.doRestart = doRestart;
this.shutdown = shutdown;
this.isRunning = isRunning;
}
public RunInfo() {
this.doRestart = false;
this.shutdown = false;
this.isRunning = true;
}
}
public InetSocketAddress getNameNodeAddress() {
return serverAddress;
}
public StandbySafeMode getStandbySafeMode() {
return this.standbySafeMode;
}
public InstanceId getInstanceId() {
return this.instance;
}
/**
*/
public static void main(String argv[]) throws Exception {
org.apache.hadoop.hdfs.DnsMonitorSecurityManager.setTheManager();
Exception exception = null;
AvatarNode avatarnode = null;
RunInfo runInfo = new RunInfo();
do {
runInfo.doRestart = false;
runInfo.isRunning = true;
exception = null;
try {
StringUtils.startupShutdownMessage(AvatarNode.class, argv, LOG);
FastWritableHDFS.init();
FastProtocolHDFS.init();
avatarnode = createAvatarNode(argv, null, runInfo);
if (avatarnode != null) {
avatarnode.waitForRestart();
}
} catch (Throwable e) {
LOG.error(StringUtils.stringifyException(e));
if (runInfo.doRestart) {
LOG.error("AvatarNode restarting...");
} else {
exception = new Exception(StringUtils.stringifyException(e));
}
}
} while (runInfo.doRestart == true);
if (runInfo.shutdown) {
avatarnode.stopRPC(true);
}
if (exception != null) {
LOG.fatal("Exception running avatarnode. Shutting down", exception);
Runtime.getRuntime().exit(1);
}
}
public boolean isInitDone() {
return isInitialized;
}
}