/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.security.auth.login.LoginException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.InstanceId;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.StartupOption;
import org.apache.hadoop.hdfs.protocol.AvatarProtocol;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.Avatar;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.namenode.AvatarNode;
import org.apache.hadoop.hdfs.server.namenode.AvatarNodeZkUtil;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.StandbyStateException;
import org.apache.hadoop.hdfs.server.namenode.ZookeeperTxId;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryProxy;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.Stat;
/**
* A {@link AvatarShell} that allows browsing configured avatar policies.
*/
public class AvatarShell extends Configured implements Tool {
public static final Log LOG = LogFactory
.getLog("org.apache.hadoop.AvatarShell");
// AvatarShell deals with hdfs configuration so need to add these
static {
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
Configuration.addDefaultResource("avatar-default.xml");
Configuration.addDefaultResource("avatar-site.xml");
}
public AvatarProtocol avatarnode;
AvatarProtocol rpcAvatarnode;
private UnixUserGroupInformation ugi;
volatile boolean clientRunning = true;
private Configuration conf;
// We need to keep the default configuration around with
// Avatar specific fields unmodified
private Configuration originalConf;
// used when waiting for last txid from primary,
// by default polling zk every second
public static long retrySleep = 1 * 1000;
/**
* Start AvatarShell.
* <p>
* The AvatarShell connects to the specified AvatarNode and performs basic
* configuration options.
*
* @throws IOException
*/
public AvatarShell() throws IOException {
this(new Configuration());
}
/**
* The AvatarShell connects to the specified AvatarNode and performs basic
* configuration options.
*
* @param conf
* The Hadoop configuration
* @throws IOException
*/
public AvatarShell(Configuration conf) {
super(conf);
this.conf = this.originalConf = conf;
}
public void initAvatarRPC(String address) throws IOException {
InetSocketAddress addr = null;
if (address != null) {
addr = NetUtils.createSocketAddr(address);
} else {
addr = AvatarNode.getAddress(conf);
}
try {
this.ugi = UnixUserGroupInformation.login(conf, true);
} catch (LoginException e) {
throw (IOException) (new IOException().initCause(e));
}
this.rpcAvatarnode = createRPCAvatarnode(addr, conf, ugi);
this.avatarnode = createAvatarnode(rpcAvatarnode);
}
public static AvatarProtocol createAvatarnode(Configuration conf)
throws IOException {
return createAvatarnode(AvatarNode.getAddress(conf), conf);
}
public static AvatarProtocol createAvatarnode(
InetSocketAddress avatarNodeAddr, Configuration conf) throws IOException {
try {
return createAvatarnode(createRPCAvatarnode(avatarNodeAddr, conf,
UnixUserGroupInformation.login(conf, true)));
} catch (LoginException e) {
throw (IOException) (new IOException().initCause(e));
}
}
private static AvatarProtocol createRPCAvatarnode(
InetSocketAddress avatarNodeAddr, Configuration conf,
UnixUserGroupInformation ugi) throws IOException {
LOG.info("AvatarShell connecting to " + avatarNodeAddr);
return (AvatarProtocol) RPC.getProxy(AvatarProtocol.class,
AvatarProtocol.versionID, avatarNodeAddr, ugi, conf, NetUtils
.getSocketFactory(conf, AvatarProtocol.class));
}
private static AvatarProtocol createAvatarnode(AvatarProtocol rpcAvatarnode)
throws IOException {
Map<Class<? extends Exception>, RetryPolicy> remoteExceptionToPolicyMap = new HashMap<Class<? extends Exception>, RetryPolicy>();
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap = new HashMap<Class<? extends Exception>, RetryPolicy>();
exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
.retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL,
remoteExceptionToPolicyMap));
RetryPolicy methodPolicy = RetryPolicies.retryByException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
Map<String, RetryPolicy> methodNameToPolicyMap = new HashMap<String, RetryPolicy>();
methodNameToPolicyMap.put("create", methodPolicy);
return (AvatarProtocol) RetryProxy.create(AvatarProtocol.class,
rpcAvatarnode, methodNameToPolicyMap);
}
/**
* Close the connection to the avatarNode.
*/
public synchronized void close() throws IOException {
if (clientRunning) {
clientRunning = false;
RPC.stopProxy(rpcAvatarnode);
}
}
/**
* Displays format of commands.
*/
static void printUsage() {
System.err.println("Usage: java AvatarShell");
System.err.println(" [-waittxid] [-service serviceName]");
System.err.println(" [-failover] [-service serviceName]");
System.err.println(" [-prepfailover] [-service serviceName]");
System.err.println(" [-{zero|one} -showAvatar] [-service serviceName]");
System.err.println(" [-{zero|one} -setAvatar primary [force]] [-service serviceName]");
System.err.println(" [-{zero|one} -shutdownAvatar] [-service serviceName]");
System.err.println(" [-{zero|one} -safemode enter|leave|get|wait|initqueues] [-service serviceName]");
System.err.println(" [-{zero|one} -metasave filename] [-service serviceName]");
System.err.println(" [-{zero|one} -isInitialized] [-service serviceName]");
System.err.println(" [-{zero|one} -saveNamespace [force] [uncompressed]] [-service serviceName]");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err);
}
private boolean isPrimary(Configuration conf, String zkRegistration) {
String actualName = conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY);
return actualName.equals(zkRegistration);
}
protected long getMaxWaitTimeForWaitTxid() {
return 1000 * 60 * 10; // 10 minutes.
}
/**
* Waits till the last txid node appears in Zookeeper, such that it matches
* the ssid node.
*/
private void waitForLastTxIdNode(AvatarZooKeeperClient zk, Configuration conf)
throws Exception {
// Gather session id and transaction id data.
String address = conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY);
long maxWaitTime = this.getMaxWaitTimeForWaitTxid();
long start = System.currentTimeMillis();
while (true) {
if (System.currentTimeMillis() - start > maxWaitTime) {
throw new IOException("No valid last txid znode found");
}
try {
long sessionId = zk.getPrimarySsId(address, false);
ZookeeperTxId zkTxId = zk.getPrimaryLastTxId(address, false);
if (sessionId != zkTxId.getSessionId()) {
LOG.warn("Session Id in the ssid node : " + sessionId
+ " does not match the session Id in the txid node : "
+ zkTxId.getSessionId() + " retrying...");
Thread.sleep(retrySleep);
continue;
}
} catch (Throwable e) {
LOG.warn("Caught exception : " + e + " retrying ...");
Thread.sleep(retrySleep);
continue;
}
break;
}
}
private String[] getAvatarCommand(String serviceName, String... args) {
List<String> cmdlist = new ArrayList<String>();
for (String arg : args) {
cmdlist.add(arg);
}
if (serviceName != null) {
cmdlist.add("-service");
cmdlist.add(serviceName);
}
return cmdlist.toArray(new String[cmdlist.size()]);
}
private int failover(String serviceName, boolean prepareOnly) throws Exception {
AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null);
String prefix = "Failover" + (prepareOnly ? " (prepare):" : ":");
System.out.println(prefix + " START");
LOG.info(prefix + " START");
long start = System.currentTimeMillis();
try {
String defaultName = conf.get(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY);
String registration = zk.getPrimaryAvatarAddress(defaultName, new Stat(),
false);
if (registration == null) {
throw new IOException("No node found in zookeeper");
}
Configuration zeroConf = AvatarNode.updateAddressConf(conf,
InstanceId.NODEZERO);
Configuration oneConf = AvatarNode.updateAddressConf(conf,
InstanceId.NODEONE);
boolean onePrimary = isPrimary(oneConf, registration);
boolean zeroPrimary = isPrimary(zeroConf, registration);
if (!onePrimary && !zeroPrimary) {
throw new IOException(
"None of the -zero or -one instances are the primary in zk, zk registration : "
+ registration);
}
AvatarShell shell = new AvatarShell(originalConf);
String[] cmd = null;
// perform pre-failover health check
cmd = getAvatarCommand(serviceName, "-zero", "-isInitialized");
runCommand(shell, cmd, "-zero is not initialized");
cmd = getAvatarCommand(serviceName, "-one", "-isInitialized");
runCommand(shell, cmd, "-one is not initialized");
String primary = zeroPrimary ? "-zero" : "-one";
String standby = zeroPrimary ? "-one" : "-zero";
if (prepareOnly) {
// instruct standby that we are about to failover
cmd = getAvatarCommand(serviceName, standby, "-safemode", "prepfailover");
runCommand(shell, cmd, standby + " prepare failover failed");
// initialize replication queues on standby
cmd = getAvatarCommand(serviceName, standby, "-safemode", "initqueues");
runCommand(shell, cmd, standby
+ " standby replication queues initialization failed");
return 0;
} else {
// perform actual failover
cmd = getAvatarCommand(serviceName, primary, "-shutdownAvatar");
runCommand(shell, cmd, primary + " shutdownAvatar failed");
waitForLastTxIdNode(zk, originalConf);
cmd = getAvatarCommand(serviceName, standby, "-setAvatar", "primary");
return shell.run(cmd);
}
} finally {
zk.shutdown();
long stop = System.currentTimeMillis();
String msg = prefix + " DONE - Time taken: " + ((stop - start) / 1000.0f)
+ " sec.";
System.out.println(msg);
LOG.info(msg);
}
}
private void runCommand(AvatarShell shell, String[] cmd, String failureMessage)
throws Exception {
if (shell.run(cmd) != 0) {
throw new IOException(failureMessage);
}
}
private boolean processServiceName(String serviceName, boolean failOnError)
throws IOException {
// validate service name
if (serviceName != null) {
if (!AvatarNode.validateServiceName(conf, serviceName)) {
if (failOnError) {
throw new IOException("Wrong service name");
}
return false;
}
// remove the service name suffix
AvatarNode.initializeGenericKeys(conf, serviceName);
}
return true;
}
private void printError(Throwable e) {
System.err.println(e.getLocalizedMessage());
}
/**
* run
*/
public int run(String argv[]) throws Exception {
if (argv.length < 1) {
printUsage();
return -1;
}
AvatarShellCommand cmd = AvatarShellCommand.parseCommand(argv);
if (cmd == null) {
printUsage();
return -1;
}
int exitCode = 0;
if (conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES) != null
&& (!cmd.isServiceCommand) && (!cmd.isAddressCommand)) {
printServiceErrorMessage("AvatarShell", conf);
return -1;
}
String serviceName = null;
if (cmd.isServiceCommand) {
serviceName = cmd.serviceArgs[0];
}
// commands without -{zero|one} prefix
if (cmd.isWaitTxIdCommand) {
AvatarZooKeeperClient zk = new AvatarZooKeeperClient(conf, null);
try {
processServiceName(serviceName, true);
waitForLastTxIdNode(zk, originalConf);
} catch (Exception e) {
exitCode = -1;
printError(e);
} finally {
zk.shutdown();
}
if (exitCode == 0) {
LOG.info("Primary shutdown was successful!");
}
return exitCode;
}
if (cmd.isFailoverCommand || cmd.isPrepfailoverCommand) {
boolean prep = cmd.isPrepfailoverCommand;
try {
processServiceName(serviceName, true);
exitCode = failover(serviceName, prep);
} catch (Exception e) {
exitCode = -1;
printError(e);
}
String prefix = prep ? "Prep" : "";
if (exitCode == 0) {
LOG.info(prefix + "Failover was successful!");
if (prep) {
System.out
.println("WARNING: Standby is in pre-failover state! If the failover "
+ "is not performed, the standby needs to be restarted to "
+ "continue checkpointing.");
}
} else {
LOG.error(prefix + "Failover failed!");
if (prep) {
System.out
.println("WARNING: Standby is in bad state! Restart the standby node!");
}
}
return exitCode;
}
/////////////////////// direct commands (-zero -one -address)
String address = cmd.isAddressCommand ? cmd.addressArgs[0] : null;
String instance = cmd.isZeroCommand ? StartupOption.NODEZERO.getName()
: (cmd.isOneCommand ? StartupOption.NODEONE.getName() : null);
if (!processServiceName(serviceName, false)) {
return -1;
}
// remove 0/1 suffix
if (instance != null) {
if ((conf = AvatarZKShell.updateConf(instance, originalConf)) == null) {
printUsage();
return -1;
}
}
initAvatarRPC(address);
try {
if (cmd.isShowAvatarCommand) {
exitCode = showAvatar();
} else if (cmd.isSetAvatarCommand) {
exitCode = setAvatar("primary", contains(cmd.setAvatarArgs, "force"), serviceName, instance);
} else if (cmd.isIsInitializedCommand) {
exitCode = isInitialized();
} else if (cmd.isMetasaveCommand) {
exitCode = metasave(cmd.metasageArgs[0]);
} else if (cmd.isSaveNamespaceCommand) {
exitCode = saveNamespace(cmd.getSaveNamespaceArgs());
} else if (cmd.isShutdownAvatarCommand) {
shutdownAvatar(serviceName);
} else if (cmd.isSafemodeCommand) {
processSafeMode(cmd.safemodeArgs[0]);
} else {
exitCode = -1;
System.err.println("Unknown command");
printUsage();
}
} catch (IllegalArgumentException arge) {
exitCode = -1;
arge.printStackTrace();
printError(arge);
printUsage();
} catch (RemoteException e) {
//
// This is a error returned by avatarnode server. Print
// out the first line of the error mesage, ignore the stack trace.
exitCode = -1;
try {
String[] content;
content = e.getLocalizedMessage().split("\n");
System.err.println(content[0]);
} catch (Exception ex) {
System.err.println(ex.getLocalizedMessage());
}
} catch (IOException e) {
//
// IO exception encountered locally.
//
exitCode = -1;
printError(e);
} catch (Throwable re) {
exitCode = -1;
printError(re);
} finally {
}
if (exitCode == 0) {
LOG.info("Command was successful!");
}
return exitCode;
}
private boolean contains(String[] args, String arg) {
if (args == null)
return false;
for(String s : args) {
if (s.equalsIgnoreCase(arg))
return true;
}
return false;
}
/**
* Apply operation specified by 'cmd' on all parameters starting from
* argv[startindex].
*/
private int showAvatar()
throws IOException {
int exitCode = 0;
Avatar avatar = avatarnode.reportAvatar();
System.out.println("The current avatar of " + AvatarNode.getAddress(conf)
+ " is " + avatar);
return exitCode;
}
private int isInitialized()
throws IOException {
int exitCode = avatarnode.isInitialized() ? 0 : -1;
if (exitCode == 0) {
LOG.info("Standby has been successfully initialized");
} else {
LOG.error("Standby has not been initialized yet");
}
return exitCode;
}
private int metasave(String filename)
throws IOException {
try {
avatarnode.metaSave(filename);
return 0;
} catch (Exception e) {
LOG.error("Exception when saving metadata", e);
return 1;
}
}
public int saveNamespace(List<String> args) throws IOException {
int exitCode = -1;
boolean force = false;
boolean uncompressed = false;
for(String arg : args) {
if (arg.equals("force")) {
force = true;
} else if (arg.equals("uncompressed")) {
uncompressed = true;
} else {
printUsage();
return exitCode;
}
}
avatarnode.saveNamespace(force, uncompressed);
return 0;
}
public static void handleRemoteException(RemoteException re) throws IOException {
IOException ie = re.unwrapRemoteException();
if (!(ie instanceof StandbyStateException)) {
throw re;
}
BufferedReader in = new BufferedReader(new InputStreamReader(
System.in));
String input = null;
do {
System.out.println("The Standby's state is incorrect : " + ie
+ "\n. You can still force a failover after some manual "
+ "verification. This is an EXTEREMELY DANGEROUS operation if "
+ "you don't know what you are doing. Do you wish to "
+ "continue with forcing the failover ? (Y/N)");
input = in.readLine();
} while (input == null || (!input.equalsIgnoreCase("Y") && !input
.equalsIgnoreCase("N")));
if (input.equalsIgnoreCase("N")) {
throw re;
}
}
/**
* Sets the avatar to the specified value
*/
public int setAvatar(String role, boolean noverification, String serviceName, String instance)
throws IOException {
Avatar dest;
if (Avatar.ACTIVE.toString().equalsIgnoreCase(role)) {
dest = Avatar.ACTIVE;
} else if (Avatar.STANDBY.toString().equalsIgnoreCase(role)) {
throw new IOException("setAvatar Command only works to switch avatar" +
" from Standby to Primary");
} else {
throw new IOException("Unknown avatar type " + role);
}
Avatar current = avatarnode.getAvatar();
if (current == dest) {
System.out.println("This instance is already in " + current + " avatar.");
} else {
try {
avatarnode.quiesceForFailover(noverification);
} catch (RemoteException re) {
handleRemoteException(re);
}
avatarnode.performFailover();
updateZooKeeper(serviceName , instance);
}
return 0;
}
public void shutdownAvatar(String serviceName) throws IOException {
clearZooKeeper(serviceName);
avatarnode.shutdownAvatar();
}
public void processSafeMode(String safeModeAction) throws IOException {
SafeModeAction action = null;
boolean waitExitSafe = false;
if (safeModeAction.equals("leave")) {
action = SafeModeAction.SAFEMODE_LEAVE;
} else if (safeModeAction.equals("get")) {
action = SafeModeAction.SAFEMODE_GET;
} else if (safeModeAction.equals("enter")) {
action = SafeModeAction.SAFEMODE_ENTER;
} else if (safeModeAction.equals("initqueues")) {
action = SafeModeAction.SAFEMODE_INITQUEUES;
} else if (safeModeAction.equals("prepfailover")) {
action = SafeModeAction.SAFEMODE_PREP_FAILOVER;
} else if (safeModeAction.equals("wait")) {
action = SafeModeAction.SAFEMODE_GET;
waitExitSafe = true;
}
if (action == null) {
System.err.println("Invalid safemode action : " + safeModeAction);
printUsage();
return;
}
boolean inSafeMode = avatarnode.setSafeMode(action);
//
// If we are waiting for safemode to exit, then poll and
// sleep till we are out of safemode.
//
while (inSafeMode && waitExitSafe) {
System.out.println("Safe mode is " + (inSafeMode ? "ON" : "OFF")
+ ". Waiting for safemode to be OFF.");
try {
Thread.sleep(5000);
} catch (java.lang.InterruptedException e) {
throw new IOException("Wait Interrupted");
}
inSafeMode = avatarnode.setSafeMode(action);
}
System.out.println("Safe mode is " + (inSafeMode ? "ON" : "OFF"));
}
public void clearZooKeeper(String serviceName) throws IOException {
Avatar avatar = avatarnode.getAvatar();
if (avatar != Avatar.ACTIVE) {
throw new IOException("Cannot clear zookeeper because the node " +
" provided is not Primary");
}
AvatarNodeZkUtil.clearZookeeper(originalConf, conf, serviceName);
}
/*
* This method tries to update the information in ZooKeeper
* For every address of the NameNode it is being run for
* (fs.default.name, dfs.namenode.dn-address, dfs.namenode.http.address)
* if they are present.
* It also creates information for aliases in ZooKeeper for lists of strings
* in fs.default.name.aliases, dfs.namenode.dn-address.aliases and
* dfs.namenode.http.address.aliases
*
* Each address it transformed to the address of the zNode to be created by
* substituting all . and : characters to /. The slash is also added in the
* front to make it a valid zNode address.
* So dfs.domain.com:9000 will be /dfs/domain/com/9000
*
* If any part of the path does not exist it is created automatically
*
*/
public void updateZooKeeper(String serviceName, String instance) throws IOException {
Avatar avatar = avatarnode.getAvatar();
if (avatar != Avatar.ACTIVE) {
throw new IOException("Cannot update ZooKeeper information to point to " +
"the AvatarNode in Standby mode");
}
AvatarNodeZkUtil.updateZooKeeper(originalConf, conf, true, serviceName, instance);
}
public static void printServiceErrorMessage(String command, Configuration conf) {
System.err.println(command
+ " must specify a service to operate on when "
+ "dfs.federation.nameservices is set in the cluster config\n"
+ "Nameservices available: "
+ conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES));
}
/**
* Checks if the service argument is specified in the command arguments.
*/
public static boolean isServiceSpecified(String command, Configuration conf,
String[] argv) {
if (conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES) != null) {
for (int i = 0; i < argv.length; i++) {
if (argv[i].equals("-service")) {
// found service specs
return true;
}
}
// no service specs
printServiceErrorMessage(command, conf);
return false;
}
return true;
}
public static class DummyWatcher implements Watcher {
@Override
public void process(WatchedEvent event) {
// This is a dummy watcher since we are only doing creates and deletes
}
}
/**
* main() has some simple utility methods
*/
public static void main(String argv[]) throws Exception {
DnsMonitorSecurityManager.setTheManager();
AvatarShell shell = null;
try {
shell = new AvatarShell();
} catch (RPC.VersionMismatch v) {
System.err.println("Version Mismatch between client and server"
+ "... command aborted.");
System.exit(-1);
} catch (IOException e) {
System.err.println("Bad connection to AvatarNode. command aborted.");
System.exit(-1);
}
int res;
try {
res = ToolRunner.run(shell, argv);
} finally {
shell.close();
}
System.exit(res);
}
}