/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.Comparator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.NodeBase;
import org.apache.hadoop.security.UserGroupInformation;
public class DFSUtil {
/**
* Compartor for sorting DataNodeInfo[] based on decommissioned states.
* Decommissioned nodes are moved to the end of the array on sorting with
* this compartor.
*/
public static final Comparator<DatanodeInfo> DECOM_COMPARATOR =
new Comparator<DatanodeInfo>() {
@Override
public int compare(DatanodeInfo a, DatanodeInfo b) {
return a.isDecommissioned() == b.isDecommissioned() ? 0 :
a.isDecommissioned() ? 1 : -1;
}
};
/**
* Given a list of path components returns a path as a UTF8 String
*/
public static String byteArray2String(byte[][] pathComponents) {
if (pathComponents.length == 0)
return "";
if (pathComponents.length == 1 && pathComponents[0].length == 0) {
return Path.SEPARATOR;
}
try {
StringBuilder result = new StringBuilder();
for (int i = 0; i < pathComponents.length; i++) {
result.append(new String(pathComponents[i], "UTF-8"));
if (i < pathComponents.length - 1) {
result.append(Path.SEPARATOR_CHAR);
}
}
return result.toString();
} catch (UnsupportedEncodingException ex) {
assert false : "UTF8 encoding is not supported ";
}
return null;
}
/**
* Splits the array of bytes into array of arrays of bytes on byte separator
*
* @param bytes
* the array of bytes to split
* @param separator
* the delimiting byte
*/
public static byte[][] bytes2byteArray(byte[] bytes, byte separator) {
return bytes2byteArray(bytes, bytes.length, separator);
}
/**
* Converts a byte array to a string using UTF8 encoding.
*/
public static String bytes2String(byte[] bytes) {
try {
return new String(bytes, "UTF8");
} catch(UnsupportedEncodingException e) {
assert false : "UTF8 encoding is not supported ";
}
return null;
}
/**
* Converts a string to a byte array using UTF8 encoding.
*/
public static byte[] string2Bytes(String str) {
try {
return str.getBytes("UTF8");
} catch(UnsupportedEncodingException e) {
assert false : "UTF8 encoding is not supported ";
}
return null;
}
/**
* Splits first len bytes in bytes to array of arrays of bytes on byte
* separator
*
* @param bytes
* the byte array to split
* @param len
* the number of bytes to split
* @param separator
* the delimiting byte
*/
public static byte[][] bytes2byteArray(byte[] bytes, int len, byte separator) {
assert len <= bytes.length;
int splits = 0;
if (len == 0) {
return new byte[][] { null };
}
// Count the splits. Omit multiple separators and the last one
for (int i = 0; i < len; i++) {
if (bytes[i] == separator) {
splits++;
}
}
int last = len - 1;
while (last > -1 && bytes[last--] == separator) {
splits--;
}
if (splits == 0 && bytes[0] == separator) {
return new byte[][] { null };
}
splits++;
byte[][] result = new byte[splits][];
int startIndex = 0;
int nextIndex = 0;
int index = 0;
// Build the splits
while (index < splits) {
while (nextIndex < len && bytes[nextIndex] != separator) {
nextIndex++;
}
result[index] = new byte[nextIndex - startIndex];
System.arraycopy(bytes, startIndex, result[index], 0, nextIndex
- startIndex);
index++;
startIndex = nextIndex + 1;
nextIndex = startIndex;
}
return result;
}
/**
* Convert a LocatedBlocks to BlockLocations[]
* @param blocks a LocatedBlocks
* @return an array of BlockLocations
*/
public static BlockLocation[] locatedBlocks2Locations(LocatedBlocks blocks) {
if (blocks == null) {
return new BlockLocation[0];
}
int nrBlocks = blocks.locatedBlockCount();
BlockLocation[] blkLocations = new BlockLocation[nrBlocks];
if (nrBlocks == 0) {
return blkLocations;
}
int idx = 0;
for (LocatedBlock blk : blocks.getLocatedBlocks()) {
assert idx < nrBlocks : "Incorrect index";
DatanodeInfo[] locations = blk.getLocations();
String[] hosts = new String[locations.length];
String[] names = new String[locations.length];
String[] racks = new String[locations.length];
for (int hCnt = 0; hCnt < locations.length; hCnt++) {
hosts[hCnt] = locations[hCnt].getHostName();
names[hCnt] = locations[hCnt].getName();
NodeBase node = new NodeBase(names[hCnt],
locations[hCnt].getNetworkLocation());
racks[hCnt] = node.toString();
}
blkLocations[idx] = new BlockLocation(names, hosts, racks,
blk.getStartOffset(),
blk.getBlockSize(),
blk.isCorrupt());
idx++;
}
return blkLocations;
}
/**
* @return all corrupt files in dfs
*/
public static String[] getCorruptFiles(DistributedFileSystem dfs)
throws IOException {
return getCorruptFiles(dfs, "/");
}
/**
* @return all corrupt files in dfs under a path.
*/
public static String[] getCorruptFiles(DistributedFileSystem dfs, String path)
throws IOException {
Set<String> corruptFiles = new HashSet<String>();
RemoteIterator<Path> cfb = dfs.listCorruptFileBlocks(new Path(path));
while (cfb.hasNext()) {
corruptFiles.add(cfb.next().toUri().getPath());
}
return corruptFiles.toArray(new String[corruptFiles.size()]);
}
/**
* Check if it is a deleted block or not
*/
public final static long DELETED = Long.MAX_VALUE - 1;
public static boolean isDeleted(Block block) {
return block.getNumBytes() == DELETED;
}
public static void markAsDeleted(Block block) {
block.setNumBytes(DELETED);
}
/**
* Returns collection of nameservice Ids from the configuration.
* @param conf configuration
* @return collection of nameservice Ids
*/
public static Collection<String> getNameServiceIds(Configuration conf) {
return conf.getStringCollection(FSConstants.DFS_FEDERATION_NAMESERVICES);
}
/**
* Given a list of keys in the order of preference, returns a value
* for the key in the given order from the configuration.
* @param defaultValue default value to return, when key was not found
* @param keySuffix suffix to add to the key, if it is not null
* @param conf Configuration
* @param keys list of keys in the order of preference
* @return value of the key or default if a key was not found in configuration
*/
private static String getConfValue(String defaultValue, String keySuffix,
Configuration conf, String... keys) {
String value = null;
for (String key : keys) {
if (keySuffix != null) {
key += "." + keySuffix;
}
value = conf.get(key);
if (value != null) {
break;
}
}
if (value == null) {
value = defaultValue;
}
return value;
}
/**
* Returns list of InetSocketAddress for a given set of keys.
* @param conf configuration
* @param defaultAddress default address to return in case key is not found
* @param keys Set of keys to look for in the order of preference
* @return list of InetSocketAddress corresponding to the key
*/
public static List<InetSocketAddress> getAddresses(Configuration conf,
String defaultAddress, String... keys) {
return getAddresses(conf, getNameServiceIds(conf), defaultAddress, keys);
}
/**
* Set the configuration based on the service id given in the argv
* @param argv argument list
* @param conf configuration
* @return argument list without service name argument
*/
public static String[] setGenericConf(String[] argv, Configuration conf) {
String[] serviceId = new String[1];
serviceId[0] = "";
String[] filteredArgv = getServiceName(argv, serviceId);
if (!serviceId[0].equals("")) {
if (!NameNode.validateServiceName(conf, serviceId[0])) {
throw new IllegalArgumentException("Service Id doesn't match the config");
}
setGenericConf(conf, serviceId[0], NameNode.NAMESERVICE_SPECIFIC_KEYS);
NameNode.setupDefaultURI(conf);
}
return filteredArgv;
}
/**
* Get the service name arguments and return the filtered argument list
* @param argv argument list
* @param serviceId[0] is the service id if it's given in the argv, "" otherwise
* @return argument list without service name argument
*/
public static String[] getServiceName(String[] argv, String[] serviceId)
throws IllegalArgumentException {
ArrayList<String> newArgvList = new ArrayList<String>();
for (int i = 0; i < argv.length; i++) {
if ("-service".equals(argv[i])) {
if (i+1 == argv.length ) {
throw new IllegalArgumentException("Doesn't have service id");
}
serviceId[0] = argv[++i];
} else {
newArgvList.add(argv[i]);
}
}
String[] newArgvs = new String[newArgvList.size()];
newArgvList.toArray(newArgvs);
return newArgvs;
}
/**
* Return list of InetSocketAddress for a given set of services
*
* @param conf configuration
* @param serviceIds services ids
* @param defaultAddress default address
* @param keys set of keys
* @return list of InetSocketAddress
*/
public static List<InetSocketAddress> getAddresses(Configuration conf,
Collection<String> serviceIds, String defaultAddress, String... keys) {
Collection<String> nameserviceIds = getNameServiceIds(conf);
List<InetSocketAddress> isas = new ArrayList<InetSocketAddress>();
// Configuration with a single namenode
if (nameserviceIds == null || nameserviceIds.isEmpty()) {
String address = getConfValue(defaultAddress, null, conf, keys);
if (address == null) {
return null;
}
isas.add(NetUtils.createSocketAddr(address));
} else {
// Get the namenodes for all the configured nameServiceIds
for (String nameserviceId : nameserviceIds) {
String address = getConfValue(null, nameserviceId, conf, keys);
if (address == null) {
return null;
}
isas.add(NetUtils.createSocketAddr(address));
}
}
return isas;
}
/**
* Returns list of InetSocketAddresses corresponding to namenodes from the
* configuration. Note this is to be used by clients to get the list of
* namenode addresses to talk to.
*
* Returns namenode address specifically configured for clients (using
* service ports)
*
* @param conf configuration
* @return list of InetSocketAddress
* @throws IOException on error
*/
public static List<InetSocketAddress> getClientRpcAddresses(
Configuration conf) throws IOException {
// Use default address as fall back
String defaultAddress;
try {
defaultAddress = NameNode.getHostPortString(NameNode.getAddress(conf));
} catch (IllegalArgumentException e) {
defaultAddress = null;
}
List<InetSocketAddress> addressList = getAddresses(conf, defaultAddress,
FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY);
if (addressList == null) {
throw new IOException("Incorrect configuration: namenode address "
+ FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY
+ " is not configured.");
}
return addressList;
}
/**
* Returns list of InetSocketAddresses corresponding to namenodes from the
* configuration. Note this is to be used by datanodes to get the list of
* namenode addresses to talk to.
*
* Returns namenode address specifically configured for datanodes (using
* service ports), if found. If not, regular RPC address configured for other
* clients is returned.
*
* @param conf configuration
* @return list of InetSocketAddress
* @throws IOException on error
*/
public static List<InetSocketAddress> getNNServiceRpcAddresses(
Configuration conf) throws IOException {
// Use default address as fall back
String defaultAddress;
try {
defaultAddress = NameNode.getHostPortString(NameNode.getAddress(conf));
} catch (IllegalArgumentException e) {
defaultAddress = null;
}
List<InetSocketAddress> addressList = getAddresses(conf, defaultAddress,
NameNode.DATANODE_PROTOCOL_ADDRESS, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY);
if (addressList == null) {
throw new IOException("Incorrect configuration: namenode address "
+ NameNode.DATANODE_PROTOCOL_ADDRESS + " or "
+ FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY
+ " is not configured.");
}
return addressList;
}
/**
* Given the InetSocketAddress for any configured communication with a
* namenode, this method returns the corresponding nameservice ID,
* by doing a reverse lookup on the list of nameservices until it
* finds a match.
* If null is returned, client should try {@link #isDefaultNamenodeAddress}
* to check pre-Federated configurations.
* Since the process of resolving URIs to Addresses is slightly expensive,
* this utility method should not be used in performance-critical routines.
*
* @param conf - configuration
* @param address - InetSocketAddress for configured communication with NN.
* Configured addresses are typically given as URIs, but we may have to
* compare against a URI typed in by a human, or the server name may be
* aliased, so we compare unambiguous InetSocketAddresses instead of just
* comparing URI substrings.
* @param keys - list of configured communication parameters that should
* be checked for matches. For example, to compare against RPC addresses,
* provide the list DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
* DFS_NAMENODE_RPC_ADDRESS_KEY. Use the generic parameter keys,
* not the NameServiceId-suffixed keys.
* @return nameserviceId, or null if no match found
*/
public static String getNameServiceIdFromAddress(Configuration conf,
InetSocketAddress address, String... keys) {
Collection<String> nameserviceIds = getNameServiceIds(conf);
// Configuration with a single namenode and no nameserviceId
if (nameserviceIds == null || nameserviceIds.isEmpty()) {
// client should try {@link isDefaultNamenodeAddress} instead
return null;
}
// Get the candidateAddresses for all the configured nameServiceIds
for (String nameserviceId : nameserviceIds) {
for (String key : keys) {
String candidateAddress = conf.get(
getNameServiceIdKey(key, nameserviceId));
if (candidateAddress != null
&& address.equals(NetUtils.createSocketAddr(candidateAddress)))
return nameserviceId;
}
}
// didn't find a match
// client should try {@link isDefaultNamenodeAddress} instead
return null;
}
/**
* return server http address from the configuration
* @param conf
* @param namenode - namenode address
* @return server http
*/
public static String getInfoServer(
InetSocketAddress namenode, Configuration conf) {
String httpAddressDefault =
NetUtils.getServerAddress(conf, "dfs.info.bindAddress",
"dfs.info.port", "dfs.http.address");
String httpAddress = null;
if(namenode != null) {
// if non-default namenode, try reverse look up
// the nameServiceID if it is available
String nameServiceId = DFSUtil.getNameServiceIdFromAddress(
conf, namenode,
FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY);
if (nameServiceId != null) {
httpAddress = conf.get(DFSUtil.getNameServiceIdKey(
FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY, nameServiceId));
}
}
// else - Use non-federation style configuration
if (httpAddress == null) {
httpAddress = conf.get("dfs.http.address", httpAddressDefault);
}
return httpAddress;
}
/**
* Given the InetSocketAddress for any configured communication with a
* namenode, this method determines whether it is the configured
* communication channel for the "default" namenode.
* It does a reverse lookup on the list of default communication parameters
* to see if the given address matches any of them.
* Since the process of resolving URIs to Addresses is slightly expensive,
* this utility method should not be used in performance-critical routines.
*
* @param conf - configuration
* @param address - InetSocketAddress for configured communication with NN.
* Configured addresses are typically given as URIs, but we may have to
* compare against a URI typed in by a human, or the server name may be
* aliased, so we compare unambiguous InetSocketAddresses instead of just
* comparing URI substrings.
* @param keys - list of configured communication parameters that should
* be checked for matches. For example, to compare against RPC addresses,
* provide the list DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
* DFS_NAMENODE_RPC_ADDRESS_KEY
* @return - boolean confirmation if matched generic parameter
*/
public static boolean isDefaultNamenodeAddress(Configuration conf,
InetSocketAddress address, String... keys) {
for (String key : keys) {
String candidateAddress = conf.get(key);
if (candidateAddress != null
&& address.equals(NetUtils.createSocketAddr(candidateAddress)))
return true;
}
return false;
}
/**
* @return key specific to a nameserviceId from a generic key
*/
public static String getNameServiceIdKey(String key, String nameserviceId) {
return key + "." + nameserviceId;
}
/**
* Sets the node specific setting into generic configuration key. Looks up
* value of "key.nameserviceId" and if found sets that value into generic key
* in the conf. Note that this only modifies the runtime conf.
*
* @param conf
* Configuration object to lookup specific key and to set the value
* to the key passed. Note the conf object is modified.
* @param nameserviceId
* nameservice Id to construct the node specific key.
* @param keys
* The key for which node specific value is looked up
*/
public static void setGenericConf(Configuration conf,
String nameserviceId, String... keys) {
for (String key : keys) {
String value = conf.get(getNameServiceIdKey(key, nameserviceId));
if (value != null) {
conf.set(key, value);
}
}
}
/**
* @param address address of format host:port
* @return InetSocketAddress for the address
*/
public static InetSocketAddress getSocketAddress(String address) {
int colon = address.indexOf(":");
if (colon < 0) {
return new InetSocketAddress(address, 0);
}
return new InetSocketAddress(address.substring(0, colon),
Integer.parseInt(address.substring(colon + 1)));
}
}