/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.corona;
import java.io.IOException;
import java.net.InetSocketAddress;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.mapred.ClusterManagerSafeModeProtocol;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.transport.TFramedTransport;
import org.apache.thrift.transport.TSocket;
/**
* A tool to perform administrative actions on a corona cluster.
*/
public class CoronaAdmin extends Configured implements Tool {
static {
Configuration.addDefaultResource("mapred-default.xml");
Configuration.addDefaultResource("mapred-site.xml");
}
/**
* Displays format of commands.
* @param cmd The command that is being executed.
*/
private static void printUsage(String cmd) {
if ("-refreshNodes".equals(cmd)) {
System.err.println("Usage: java CoronaAdmin [-refreshNodes]");
} else {
System.err.println("Usage: java CoronaAdmin");
System.err.println(" [-refreshNodes]");
System.err.println(" [-setSafeMode]");
System.err.println(" [-unsetSafeMode]");
System.err.println(" [-forceSetSafeModeOnPJT]");
System.err.println(" [-forceUnsetSafeModeOnPJT]");
System.err.println(" [-restartTaskTracker]");
System.err.println(" [-forceRestartTaskTracker]");
System.err.println(" [-help [cmd]]");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err);
}
}
/**
* Command to ask the Cluster Manager to reread the hosts and excluded hosts
* file.
*
* @exception IOException
* @return Returns 0 where no exception is thrown.
*/
private int refreshNodes() throws IOException {
// Get the current configuration
CoronaConf conf = new CoronaConf(getConf());
InetSocketAddress address = NetUtils.createSocketAddr(conf
.getClusterManagerAddress());
TFramedTransport transport = new TFramedTransport(
new TSocket(address.getHostName(), address.getPort()));
ClusterManagerService.Client client = new ClusterManagerService.Client(
new TBinaryProtocol(transport));
try {
transport.open();
client.refreshNodes();
} catch (SafeModeException e) {
System.err.println("ClusterManager is in Safe Mode");
} catch (TException e) {
throw new IOException(e);
}
return 0;
}
/**
* Command to ask the Cluster Manager to restart all the task tracker
*
* @param forceFlag if CM shall ignore all previous restart requests
* @exception IOException
* @return Returns 0 where no exception is thrown.
*/
private int restartTaskTracker(boolean forceFlag, int batchSize) throws IOException {
// Get the current configuration
CoronaConf conf = new CoronaConf(getConf());
InetSocketAddress address = NetUtils.createSocketAddr(conf
.getClusterManagerAddress());
TFramedTransport transport = new TFramedTransport(
new TSocket(address.getHostName(), address.getPort()));
ClusterManagerService.Client client = new ClusterManagerService.Client(
new TBinaryProtocol(transport));
int restartBatch = (batchSize > 0) ? batchSize :
conf.getCoronaNodeRestartBatch();
try {
transport.open();
RestartNodesArgs restartNodeArgs = new RestartNodesArgs(
forceFlag, restartBatch);
client.restartNodes(restartNodeArgs);
} catch (SafeModeException e) {
System.err.println("ClusterManager is in Safe Mode");
} catch (TException e) {
throw new IOException(e);
}
return 0;
}
/**
* Turns on the Safe Mode if safeMode is true. Turns off the Safe Mode if
* safeMode is false.
* @param safeMode Is true if we want the Safe Mode to be on. false
* otherwise.
* @return 0 if successful.
* @throws IOException
*/
private int setSafeMode(boolean safeMode) throws IOException {
// Get the current configuration
CoronaConf conf = new CoronaConf(getConf());
InetSocketAddress address = NetUtils.createSocketAddr(conf
.getClusterManagerAddress());
TFramedTransport transport = new TFramedTransport(
new TSocket(address.getHostName(), address.getPort()));
ClusterManagerService.Client client = new ClusterManagerService.Client(
new TBinaryProtocol(transport));
try {
transport.open();
if (client.setSafeMode(safeMode)) {
System.out.println("The safeMode is: " +
(safeMode ? "ON" : "OFF"));
} else {
System.err.println("Could not set the safeMode flag");
}
} catch (TException e) {
throw new IOException(e);
}
return 0;
}
/**
* Persists the state of the ClusterManager
* @return 0 if successful.
* @throws IOException
*/
private int persistState() throws IOException {
// Get the current configuration
CoronaConf conf = new CoronaConf(getConf());
InetSocketAddress address = NetUtils.createSocketAddr(conf
.getClusterManagerAddress());
TFramedTransport transport = new TFramedTransport(
new TSocket(address.getHostName(), address.getPort()));
ClusterManagerService.Client client = new ClusterManagerService.Client(
new TBinaryProtocol(transport));
try {
transport.open();
if (!client.persistState()) {
System.err.println("Persisting Cluster Manager state failed. ");
}
} catch (TException e) {
throw new IOException(e);
}
return 0;
}
/**
* Forcefully set the Safe Mode on the PJT
* @return 0 if successful
* @throws IOException
*/
private int forceSetSafeModeOnPJT(boolean safeMode) throws IOException {
CoronaConf conf = new CoronaConf(getConf());
try {
ClusterManagerAvailabilityChecker.getPJTClient(conf).
setClusterManagerSafeModeFlag(safeMode);
} catch (IOException e) {
System.err.println("Could not set the Safe Mode flag on the PJT: " + e);
} catch (TException e) {
System.err.println("Could not set the Safe Mode flag on the PJT: " + e);
}
return 0;
}
@Override
public int run(String[] args) throws Exception {
if (args.length < 1) {
printUsage("");
return -1;
}
int i = 0;
String cmd = args[i++];
int exitCode = 0;
try {
if ("-refreshNodes".equals(cmd)) {
exitCode = refreshNodes();
} else if ("-help".equals(cmd)) {
printUsage(args[i]);
} else if ("-setSafeMode".equals(cmd)) {
exitCode = setSafeMode(true);
} else if ("-unsetSafeMode".equals(cmd)) {
exitCode = setSafeMode(false);
} else if ("-persistState".equals(cmd)) {
exitCode = persistState();
} else if ("-forceSetSafeModeOnPJT".equals(cmd)) {
exitCode = forceSetSafeModeOnPJT(true);
} else if ("-forceUnsetSafeModeOnPJT".equals(cmd)) {
exitCode = forceSetSafeModeOnPJT(false);
} else if ("-restartTaskTracker".equals(cmd)) {
int batchSize = 0;
if (args.length > 1) {
batchSize = Integer.parseInt(args[i++]);
}
exitCode = restartTaskTracker(false, batchSize);
} else if ("-forceRestartTaskTracker".equals(cmd)) {
int batchSize = 0;
if (args.length > 1) {
batchSize = Integer.parseInt(args[i++]);
}
exitCode = restartTaskTracker(true, batchSize);
} else {
exitCode = -1;
System.err.println(cmd.substring(1) + ": Unknown command");
printUsage("");
}
} catch (NumberFormatException e) {
exitCode = -1;
System.err.println(cmd.substring(1));
e.printStackTrace();
} catch (IllegalArgumentException arge) {
exitCode = -1;
System.err.println(cmd.substring(1) + ": " + arge);
printUsage(cmd);
} catch (RemoteException e) {
exitCode = -1;
String[] content;
content = e.getLocalizedMessage().split("\n");
System.err.println(cmd.substring(1) + ": " + content[0]);
}
return exitCode;
}
/**
* Entry point for the tool.
*
* @param args
* The command line arguments.
* @throws Exception
*/
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new CoronaAdmin(), args);
System.exit(result);
}
}