/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.OutputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.common.UpgradeManager;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.namenode.JournalStream.JournalType;
import org.apache.hadoop.hdfs.server.namenode.ValidateNamespaceDirPolicy.NNStorageLocation;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.util.AtomicFileOutputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
/**
* NNStorage is responsible for management of the StorageDirectories used by
* the NameNode.
*/
public class NNStorage extends Storage implements Closeable, StorageErrorReporter {
public static final Log LOG = LogFactory.getLog(NNStorage.class.getName());
public static final String MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
public static final String LOCAL_URI_SCHEME = "file";
/**
* Namenode storage directory, which stores additional information
* about mount point, if the directory is remote, shared, etc.
*/
public static enum StorageLocationType {
LOCAL,
REMOTE,
SHARED
}
public class NNStorageDirectory extends StorageDirectory {
final StorageLocationType type;
public NNStorageDirectory(File dir, StorageDirType dirType,
NNStorageLocation location) {
super(dir, dirType, true);
if (location == null) {
type = null;
return;
}
type = location.type;
}
}
//
// The filenames used for storing the images
//
public enum NameNodeFile {
IMAGE ("fsimage"),
TIME ("fstime"), // from "old" pre-HDFS-1073 format
SEEN_TXID ("seen_txid"),
EDITS ("edits"),
IMAGE_NEW ("fsimage.ckpt"),
EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
EDITS_INPROGRESS ("edits_inprogress");
private String fileName = null;
private NameNodeFile(String name) { this.fileName = name; }
public String getName() { return fileName; }
}
/**
* Implementation of StorageDirType specific to namenode storage
* A Storage directory could be of type IMAGE which stores only fsimage,
* or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
* stores both fsimage and edits.
*/
public static enum NameNodeDirType implements StorageDirType {
UNDEFINED,
IMAGE,
EDITS,
IMAGE_AND_EDITS;
public StorageDirType getStorageDirType() {
return this;
}
public boolean isOfType(StorageDirType type) {
if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
return true;
return this == type;
}
}
private UpgradeManager upgradeManager = null;
private Object restorationLock = new Object();
private boolean disablePreUpgradableLayoutCheck = false;
/**
* TxId of the last transaction that was included in the most
* recent fsimage file. This does not include any transactions
* that have since been written to the edit log.
*/
private long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
// used for webui
private long mostRecentCheckpointTime = 0;
private final Map<Long, MD5Hash> checkpointImageDigests = new HashMap<Long,MD5Hash>();
/**
* list of failed (and thus removed) storages
*/
final protected List<StorageDirectory> removedStorageDirs
= Collections.synchronizedList(new ArrayList<StorageDirectory>());
/**
* Properties from old layout versions that may be needed
* during upgrade only.
*/
private HashMap<String, String> deprecatedProperties;
private final Configuration conf;
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
/**
* Construct the NNStorage.
* @param conf Namenode configuration.
* @param imageDirs Directories the image can be stored in.
* @param editsDirs Directories the editlog can be stored in.
* @throws IOException if any directories are inaccessible.
*/
public NNStorage(Configuration conf, Collection<URI> imageDirs,
Collection<URI> editsDirs, Map<URI, NNStorageLocation> locationMap)
throws IOException {
super(NodeType.NAME_NODE);
storageDirs = Collections.synchronizedList(new ArrayList<StorageDirectory>());
// this may modify the editsDirs, so copy before passing in
setStorageDirectories(imageDirs,
new ArrayList<URI>(editsDirs), locationMap);
this.conf = conf;
}
public Collection<StorageDirectory> getStorageDirs() {
return storageDirs;
}
void checkpointUploadDone(long txid, MD5Hash checkpointImageMd5)
throws IOException {
setCheckpointImageDigest(txid, checkpointImageMd5);
}
/**
* For testing
* @param storageInfo
* @throws IOException
*/
public NNStorage(StorageInfo storageInfo) throws IOException {
super(NodeType.NAME_NODE, storageInfo);
this.conf = new Configuration();
}
@Override // Storage
public boolean isConversionNeeded(StorageDirectory sd) throws IOException {
if (disablePreUpgradableLayoutCheck) {
return false;
}
File oldImageDir = new File(sd.getRoot(), "image");
if (!oldImageDir.exists()) {
return false;
}
// check the layout version inside the image file
File oldF = new File(oldImageDir, "fsimage");
RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
try {
oldFile.seek(0);
int oldVersion = oldFile.readInt();
oldFile.close();
oldFile = null;
if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
return false;
} finally {
IOUtils.cleanup(LOG, oldFile);
}
return true;
}
@Override // Closeable
public void close() throws IOException {
unlockAll();
storageDirs.clear();
}
/**
* See if any of removed storages is "writable" again, and can be returned
* into service.
*/
void attemptRestoreRemovedStorage() {
// if directory is "alive" - copy the images there...
if(removedStorageDirs.size() == 0)
return; //nothing to restore
/* We don't want more than one thread trying to restore at a time */
synchronized (this.restorationLock) {
LOG.info("attemptRestoreRemovedStorage: check removed(failed) "+
"storage. removedStorages size = " + removedStorageDirs.size());
for(Iterator<StorageDirectory> it
= this.removedStorageDirs.iterator(); it.hasNext();) {
StorageDirectory sd = it.next();
File root = sd.getRoot();
LOG.info("attemptRestoreRemovedStorage: currently disabled dir "
+ root.getAbsolutePath()
+ "; type="
+ sd.getStorageDirType()
+ ";canwrite=" + root.canWrite());
try {
if(root.exists() && root.canWrite()) {
LOG.info("attemptRestoreRemovedStorage: restoring dir "
+ sd.getRoot().getAbsolutePath());
this.addStorageDir(sd); // restore
it.remove();
sd.lock();
}
} catch(IOException e) {
LOG.warn("attemptRestoreRemovedStorage: failed to restore "
+ sd.getRoot().getAbsolutePath(), e);
}
}
}
}
/**
* @return A list of storage directories which are in the errored state.
*/
public List<StorageDirectory> getRemovedStorageDirs() {
return this.removedStorageDirs;
}
public synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
Collection<URI> fsEditsDirs) throws IOException {
setStorageDirectories(fsNameDirs, fsEditsDirs, null);
}
/**
* Set the storage directories which will be used. This should only ever be
* called from inside NNStorage. However, it needs to remain package private
* for testing, as StorageDirectories need to be reinitialised after using
* Mockito.spy() on this class, as Mockito doesn't work well with inner
* classes, such as StorageDirectory in this case.
*
* Synchronized due to initialization of storageDirs and removedStorageDirs.
*
* @param fsNameDirs Locations to store images.
* @param fsEditsDirs Locations to store edit logs.
* @param locationMap location descriptors
* @throws IOException
*/
public synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
Collection<URI> fsEditsDirs,
Map<URI, NNStorageLocation> locationMap)
throws IOException {
this.storageDirs.clear();
this.removedStorageDirs.clear();
for (URI dirName : fsNameDirs) {
boolean isAlsoEdits = false;
for (URI editsDirName : fsEditsDirs) {
if (editsDirName.compareTo(dirName) == 0) {
isAlsoEdits = true;
fsEditsDirs.remove(editsDirName);
break;
}
}
NameNodeDirType dirType = (isAlsoEdits) ?
NameNodeDirType.IMAGE_AND_EDITS :
NameNodeDirType.IMAGE;
// Add to the list of storage directories, only if the
// URI is of type file://
if (dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) == 0) {
this.addStorageDir(new NNStorageDirectory(new File(dirName.getPath()),
dirType, locationMap == null ? null : locationMap.get(dirName)));
}
}
// Add edits dirs if they are different from name dirs
for (URI dirName : fsEditsDirs) {
checkSchemeConsistency(dirName);
// Add to the list of storage directories, only if the
// URI is of type file://
if (dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) == 0)
this.addStorageDir(new NNStorageDirectory(new File(dirName.getPath()),
NameNodeDirType.EDITS, locationMap == null ? null : locationMap.get(dirName)));
}
}
/**
* Return the storage directory corresponding to the passed URI
* @param uri URI of a storage directory
* @return The matching storage directory or null if none found
*/
StorageDirectory getStorageDirectory(URI uri) {
try {
uri = Util.fileAsURI(new File(uri));
Iterator<StorageDirectory> it = dirIterator();
for (; it.hasNext(); ) {
StorageDirectory sd = it.next();
if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
return sd;
}
}
} catch (IOException ioe) {
LOG.warn("Error converting file to URI", ioe);
}
return null;
}
/**
* Checks the consistency of a URI, in particular if the scheme
* is specified
* @param u URI whose consistency is being checked.
*/
private static void checkSchemeConsistency(URI u) throws IOException {
String scheme = u.getScheme();
// the URI should have a proper scheme
if(scheme == null) {
throw new IOException("Undefined scheme for " + u);
}
}
/**
* Retrieve current directories of type IMAGE
* @return Collection of URI representing image directories
* @throws IOException in case of URI processing error
*/
Collection<File> getImageDirectories() throws IOException {
return getDirectories(NameNodeDirType.IMAGE);
}
/**
* Retrieve current directories of type EDITS
* @return Collection of URI representing edits directories
* @throws IOException in case of URI processing error
*/
Collection<File> getEditsDirectories() throws IOException {
return getDirectories(NameNodeDirType.EDITS);
}
/**
* Return number of storage directories of the given type.
* @param dirType directory type
* @return number of storage directories of type dirType
*/
int getNumStorageDirs(NameNodeDirType dirType) {
if(dirType == null)
return getNumStorageDirs();
Iterator<StorageDirectory> it = dirIterator(dirType);
int numDirs = 0;
for(; it.hasNext(); it.next())
numDirs++;
return numDirs;
}
/**
* Return the list of locations being used for a specific purpose.
* i.e. Image or edit log storage.
*
* @param dirType Purpose of locations requested.
* @throws IOException
*/
Collection<File> getDirectories(NameNodeDirType dirType)
throws IOException {
ArrayList<File> list = new ArrayList<File>();
Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
dirIterator(dirType);
for ( ;it.hasNext(); ) {
StorageDirectory sd = it.next();
list.add(sd.getRoot());
}
return list;
}
/**
* Determine the last transaction ID noted in this storage directory.
* This txid is stored in a special seen_txid file since it might not
* correspond to the latest image or edit log. For example, an image-only
* directory will have this txid incremented when edits logs roll, even
* though the edits logs are in a different directory.
*
* @param sd StorageDirectory to check
* @return If file exists and can be read, last recorded txid. If not, 0L.
* @throws IOException On errors processing file pointed to by sd
*/
static long readTransactionIdFile(StorageDirectory sd) throws IOException {
File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
long txid = 0L;
if (txidFile.exists() && txidFile.canRead()) {
BufferedReader br = new BufferedReader(new FileReader(txidFile));
try {
txid = Long.valueOf(br.readLine());
br.close();
br = null;
} finally {
IOUtils.cleanup(LOG, br);
}
}
return txid;
}
/**
* Write last checkpoint time into a separate file.
*
* @param sd
* @throws IOException
*/
void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
if (txid < -1) {
// -1 is valid when formatting
throw new IOException("Bad txid: " + txid);
}
File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
OutputStream fos = new AtomicFileOutputStream(txIdFile);
try {
fos.write(String.valueOf(txid).getBytes());
fos.write('\n');
fos.close();
fos = null;
} finally {
IOUtils.cleanup(LOG, fos);
}
}
/**
* Set the transaction ID of the last checkpoint
*/
synchronized void setMostRecentCheckpointTxId(long txid) {
if(txid > mostRecentCheckpointTxId) {
this.mostRecentCheckpointTxId = txid;
this.mostRecentCheckpointTime = FSNamesystem.now();
}
}
/**
* Return the transaction ID of the last checkpoint.
*/
public long getMostRecentCheckpointTxId() {
return mostRecentCheckpointTxId;
}
/**
* Return the time of last successful checkpoint
*/
public String getMostRecentCheckpointTime() {
return new Date(mostRecentCheckpointTime).toString();
}
/**
* Write a small file in all available storage directories that
* indicates that the namespace has reached some given transaction ID.
*
* This is used when the image is loaded to avoid accidental rollbacks
* in the case where an edit log is fully deleted but there is no
* checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
* @param txid the txid that has been reached
*/
public void writeTransactionIdFileToStorage(long txid, FSImage image)
throws IOException {
// Write txid marker in all storage directories
List<StorageDirectory> badSDs = new ArrayList<StorageDirectory>();
for (StorageDirectory sd : storageDirs) {
try {
writeTransactionIdFile(sd, txid);
} catch(IOException e) {
// Close any edits stream associated with this dir and remove directory
LOG.warn("writeTransactionIdToStorage failed on " + sd,
e);
badSDs.add(sd);
}
}
reportErrorsOnDirectories(badSDs, image);
if (image != null) {
}
}
/**
* Return the name of the image file that is uploaded by periodic
* checkpointing
*
* @return List of filenames to save checkpoints to.
*/
public File[] getFsImageNameCheckpoint(long txid) {
ArrayList<File> list = new ArrayList<File>();
for (Iterator<StorageDirectory> it =
dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
}
return list.toArray(new File[list.size()]);
}
/**
* Return the name of the image file, preferring
* "type" images. Otherwise, return any image.
*
* @return The name of the image file.
*/
public File getFsImageName(StorageLocationType type, long txid) {
File lastCandidate = null;
for (Iterator<StorageDirectory> it =
dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
StorageDirectory sd = it.next();
File fsImage = getStorageFile(sd, NameNodeFile.IMAGE, txid);
if(sd.getRoot().canRead() && fsImage.exists()) {
if (isPreferred(type, sd)) {
return fsImage;
}
lastCandidate = fsImage;
}
}
return lastCandidate;
}
/**
* Return all images for given txid, together with their types
* (local, shared, remote).
*/
public Map<File, StorageLocationType> getImages(long txid) {
Map<File, StorageLocationType> map = new HashMap<File, StorageLocationType>();
for (Iterator<StorageDirectory> it =
dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
StorageDirectory sd = it.next();
File fsImage = getStorageFile(sd, NameNodeFile.IMAGE, txid);
if(sd.getRoot().canRead() && fsImage.exists()) {
map.put(fsImage, getType(sd));
}
}
return map;
}
/**
* Format all available storage directories.
*/
public void format() throws IOException {
this.layoutVersion = FSConstants.LAYOUT_VERSION;
this.namespaceID = newNamespaceID();
this.cTime = 0L;
for (Iterator<StorageDirectory> it =
dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
format(sd);
}
}
/** Create new dfs name directory. Caution: this destroys all files
* in this filesystem. */
private void format(StorageDirectory sd) throws IOException {
sd.clearDirectory(); // create current dir
sd.write();
writeTransactionIdFile(sd, -1);
LOG.info("Storage directory " + sd.getRoot()
+ " has been successfully formatted.");
}
/**
* Generate new namespaceID.
*
* namespaceID is a persistent attribute of the namespace.
* It is generated when the namenode is formatted and remains the same
* during the life cycle of the namenode.
* When a datanodes register they receive it as the registrationID,
* which is checked every time the datanode is communicating with the
* namenode. Datanodes that do not 'know' the namespaceID are rejected.
*
* @return new namespaceID
*/
static int newNamespaceID() {
Random r = new Random();
r.setSeed(FSNamesystem.now());
int newID = 0;
while(newID == 0)
newID = r.nextInt(0x7FFFFFFF); // use 31 bits only
return newID;
}
@Override // Storage
protected void getFields(Properties props, StorageDirectory sd)
throws IOException {
super.getFields(props, sd);
if (layoutVersion == 0)
throw new IOException("NameNode directory " + sd.getRoot()
+ " is not formatted.");
String sDUS, sDUV;
sDUS = props.getProperty("distributedUpgradeState");
sDUV = props.getProperty("distributedUpgradeVersion");
setDistributedUpgradeState(
sDUS == null ? false : Boolean.parseBoolean(sDUS),
sDUV == null ? getLayoutVersion() : Integer.parseInt(sDUV));
setDeprecatedPropertiesForUpgrade(props);
}
/**
* Return a property that was stored in an earlier version of HDFS.
*
* This should only be used during upgrades.
*/
String getDeprecatedProperty(String prop) {
assert getLayoutVersion() > FSConstants.LAYOUT_VERSION :
"getDeprecatedProperty should only be done when loading " +
"storage from past versions during upgrade.";
return deprecatedProperties.get(prop);
}
/**
* Write version file into the storage directory.
*
* The version file should always be written last.
* Missing or corrupted version file indicates that
* the checkpoint is not valid.
*
* @param sd storage directory
* @throws IOException
*/
@Override // Storage
protected void setFields(Properties props, StorageDirectory sd)
throws IOException {
super.setFields(props, sd);
boolean uState = getDistributedUpgradeState();
int uVersion = getDistributedUpgradeVersion();
if (uState && uVersion != getLayoutVersion()) {
props.setProperty("distributedUpgradeState", Boolean.toString(uState));
props
.setProperty("distributedUpgradeVersion", Integer.toString(uVersion));
}
}
/**
* Pull any properties out of the VERSION file that are from older
* versions of HDFS and only necessary during upgrade.
*/
private void setDeprecatedPropertiesForUpgrade(Properties props) {
deprecatedProperties = new HashMap<String, String>();
String md5 = props.getProperty(MESSAGE_DIGEST_PROPERTY);
if (md5 != null) {
deprecatedProperties.put(MESSAGE_DIGEST_PROPERTY, md5);
}
}
////////////////////////////////////////////////////////////////////////
// names and files for images checkpoint images, edits, etc
////////////////////////////////////////////////////////////////////////
static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
return new File(sd.getCurrentDir(),
String.format("%s_%019d", type.getName(), imageTxId));
}
/**
* Get a storage file for one of the files that doesn't need a txid associated
* (e.g version, seen_txid)
*/
static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
return new File(sd.getCurrentDir(), type.getName());
}
public static String getCheckpointImageFileName(long txid) {
return String.format("%s_%019d",
NameNodeFile.IMAGE_NEW.getName(), txid);
}
public static File getCheckpointImageFile(StorageDirectory sd, long txid) {
return new File(sd.getCurrentDir(),
getCheckpointImageFileName(txid));
}
public static String getImageFileName(long txid) {
return String.format("%s_%019d",
NameNodeFile.IMAGE.getName(), txid);
}
public static File getImageFile(StorageDirectory sd, long txid) {
return new File(sd.getCurrentDir(),
getImageFileName(txid));
}
public static String getInProgressEditsFileName(long startTxId) {
return String.format("%s_%019d", NameNodeFile.EDITS_INPROGRESS.getName(),
startTxId);
}
static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
}
static File getFinalizedEditsFile(StorageDirectory sd,
long startTxId, long endTxId) {
return new File(sd.getCurrentDir(),
getFinalizedEditsFileName(startTxId, endTxId));
}
public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
startTxId, endTxId);
}
////////////////////////////////////////////////////////////////////////
/**
* Return the first readable finalized edits file for the given txid.
*/
File findFinalizedEditsFile(long startTxId, long endTxId)
throws IOException {
File ret = findFile(NameNodeDirType.EDITS,
getFinalizedEditsFileName(startTxId, endTxId));
if (ret == null) {
throw new IOException(
"No edits file for txid " + startTxId + "-" + endTxId + " exists!");
}
return ret;
}
/**
* Return the first readable inprogress edits file for the given txid.
*/
File findInProgressEditsFile(long startTxId)
throws IOException {
File ret = findFile(NameNodeDirType.EDITS,
getInProgressEditsFileName(startTxId));
if (ret == null) {
throw new IOException(
"No edits file for txid " + startTxId + "-in progress");
}
return ret;
}
/**
* Return the first readable image file for the given txid, or null
* if no such image can be found
*/
File findImageFile(long txid) throws IOException {
return findFile(NameNodeDirType.IMAGE,
getImageFileName(txid));
}
/**
* Return the first readable storage file of the given name
* across any of the 'current' directories in SDs of the
* given type, or null if no such file exists.
*/
private File findFile(NameNodeDirType dirType, String name) {
for (StorageDirectory sd : dirIterable(dirType)) {
File candidate = new File(sd.getCurrentDir(), name);
if (sd.getCurrentDir().canRead() &&
candidate.exists()) {
return candidate;
}
}
return null;
}
/**
* Checks if we have information about this directory
* that it is preferred.
* @param type preferred type
* @param sd storage directory
*/
static boolean isPreferred(StorageLocationType type, StorageDirectory sd) {
if ((sd instanceof NNStorageDirectory)) {
return ((NNStorageDirectory) sd).type == type;
}
// by default all are preferred
return true;
}
/**
* Get the type of given directory.
*/
static StorageLocationType getType(StorageDirectory sd) {
if ((sd instanceof NNStorageDirectory)) {
return ((NNStorageDirectory) sd).type;
}
// by default all are local
return StorageLocationType.LOCAL;
}
/**
* @return A list of the given File in every available storage directory,
* regardless of whether it might exist.
*/
File[] getFiles(NameNodeDirType dirType, String fileName) {
ArrayList<File> list = new ArrayList<File>();
Iterator<StorageDirectory> it =
(dirType == null) ? dirIterator() : dirIterator(dirType);
for ( ;it.hasNext(); ) {
list.add(new File(it.next().getCurrentDir(), fileName));
}
return list.toArray(new File[list.size()]);
}
/**
* Set the upgrade manager for use in a distributed upgrade.
* @param um The upgrade manager
*/
void setUpgradeManager(UpgradeManager um) {
upgradeManager = um;
}
/**
* @return The current distribued upgrade state.
*/
boolean getDistributedUpgradeState() {
return upgradeManager == null ? false : upgradeManager.getUpgradeState();
}
/**
* @return The current upgrade version.
*/
int getDistributedUpgradeVersion() {
return upgradeManager == null ? 0 : upgradeManager.getUpgradeVersion();
}
/**
* Set the upgrade state and version.
* @param uState the new state.
* @param uVersion the new version.
*/
private void setDistributedUpgradeState(boolean uState, int uVersion) {
if (upgradeManager != null) {
upgradeManager.setUpgradeState(uState, uVersion);
}
}
/**
* Verify that the distributed upgrade state is valid.
* @param startOpt the option the namenode was started with.
*/
void verifyDistributedUpgradeProgress(StartupOption startOpt
) throws IOException {
if(startOpt == StartupOption.ROLLBACK || startOpt == StartupOption.IMPORT)
return;
assert upgradeManager != null : "FSNameSystem.upgradeManager is null.";
if(startOpt != StartupOption.UPGRADE) {
if(upgradeManager.getUpgradeState())
throw new IOException(
"\n Previous distributed upgrade was not completed. "
+ "\n Please restart NameNode with -upgrade option.");
if(upgradeManager.getDistributedUpgrades() != null)
throw new IOException("\n Distributed upgrade for NameNode version "
+ upgradeManager.getUpgradeVersion()
+ " to current LV " + layoutVersion
+ " is required.\n Please restart NameNode"
+ " with -upgrade option.");
}
}
/**
* Initialize a distributed upgrade.
*/
void initializeDistributedUpgrade() throws IOException {
if(! upgradeManager.initializeUpgrade())
return;
// write new upgrade state into disk
writeAll();
LOG.info("\n Distributed upgrade for NameNode version "
+ upgradeManager.getUpgradeVersion() + " to current LV "
+ layoutVersion + " is initialized.");
}
/**
* Disable the check for pre-upgradable layouts. Needed for BackupImage.
* @param val Whether to disable the preupgradeable layout check.
*/
void setDisablePreUpgradableLayoutCheck(boolean val) {
disablePreUpgradableLayoutCheck = val;
}
/**
* Marks a list of directories as having experienced an error.
*
* @param sds A list of storage directories to mark as errored.
* @throws IOException
*/
synchronized void reportErrorsOnDirectories(List<StorageDirectory> sds,
FSImage image) throws IOException {
for (StorageDirectory sd : sds) {
reportErrorsOnDirectory(sd, image);
}
// check image managers (this will update image metrics)
if (image != null) {
image.checkImageManagers();
}
// only check if something was wrong
if(!sds.isEmpty()) {
if (this.getNumStorageDirs() == 0)
throw new IOException("No more storage directories left");
// check image directories, edits are checked withing FSEditLog.checkJournals
if (getNumStorageDirs(NameNodeDirType.IMAGE) == 0)
throw new IOException("No more image storage directories left");
}
}
/**
* Reports that a directory has experienced an error.
* Notifies listeners that the directory is no longer
* available.
*
* @param sd A storage directory to mark as errored.
* @throws IOException
*/
synchronized void reportErrorsOnDirectory(StorageDirectory sd,
FSImage image) {
String lsd = listStorageDirectories();
LOG.info("reportErrorsOnDirectory: Current list of storage dirs:" + lsd);
LOG.error("reportErrorsOnDirectory: Error reported on storage directory "
+ sd.getRoot());
if (this.storageDirs.remove(sd)) {
try {
sd.unlock();
} catch (Exception e) {
LOG.warn(
"reportErrorsOnDirectory: Unable to unlock bad storage directory: "
+ sd.getRoot().getPath(), e);
}
this.removedStorageDirs.add(sd);
}
if (image != null) {
image.reportErrorsOnImageManager(sd);
}
lsd = listStorageDirectories();
LOG.info("reportErrorsOnDirectory: Current list of storage dirs:" + lsd);
}
/**
* Report that an IOE has occurred on some file which may
* or may not be within one of the NN image storage directories.
*/
public void reportErrorOnFile(File f) {
// We use getAbsolutePath here instead of getCanonicalPath since we know
// that there is some IO problem on that drive.
// getCanonicalPath may need to call stat() or readlink() and it's likely
// those calls would fail due to the same underlying IO problem.
String absPath = f.getAbsolutePath();
for (StorageDirectory sd : storageDirs) {
String dirPath = sd.getRoot().getAbsolutePath();
if (!dirPath.endsWith("/")) {
dirPath += "/";
}
if (absPath.startsWith(dirPath)) {
reportErrorsOnDirectory(sd, null);
return;
}
}
}
/**
* Iterate over all current storage directories, inspecting them
* with the given inspector.
*/
void inspectStorageDirs(FSImageStorageInspector inspector)
throws IOException {
// Process each of the storage directories to find the pair of
// newest image file and edit file
for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
inspector.inspectDirectory(sd);
}
}
/**
* Iterate over all of the storage dirs, reading their contents to determine
* their layout versions. Returns an FSImageStorageInspector which has
* inspected each directory.
*
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
* @throws IOException if no valid storage dirs are found
*/
FSImageStorageInspector readAndInspectDirs()
throws IOException {
int minLayoutVersion = Integer.MAX_VALUE; // the newest
int maxLayoutVersion = Integer.MIN_VALUE; // the oldest
// First determine what range of layout versions we're going to inspect
for (Iterator<StorageDirectory> it = dirIterator();
it.hasNext();) {
StorageDirectory sd = it.next();
if (!sd.getVersionFile().exists()) {
FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
continue;
}
sd.read(); // sets layoutVersion
minLayoutVersion = Math.min(minLayoutVersion, getLayoutVersion());
maxLayoutVersion = Math.max(maxLayoutVersion, getLayoutVersion());
}
if (minLayoutVersion > maxLayoutVersion) {
throw new IOException("No storage directories contained VERSION information");
}
assert minLayoutVersion <= maxLayoutVersion;
// If we have any storage directories with the new layout version
// (ie edits_<txnid>) then use the new inspector, which will ignore
// the old format dirs.
FSImageStorageInspector inspector;
if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, minLayoutVersion)) {
inspector = new FSImageTransactionalStorageInspector();
if (!LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, maxLayoutVersion)) {
FSImage.LOG.warn("Ignoring one or more storage directories with old layouts");
}
} else {
inspector = new FSImagePreTransactionalStorageInspector(conf);
}
inspectStorageDirs(inspector);
return inspector;
}
@Override
protected void corruptPreUpgradeStorage(File rootDir) throws IOException {
File oldImageDir = new File(rootDir, "image");
if (!oldImageDir.exists())
if (!oldImageDir.mkdir())
throw new IOException("Cannot create directory " + oldImageDir);
File oldImage = new File(oldImageDir, "fsimage");
if (!oldImage.exists())
// recreate old image file to let pre-upgrade versions fail
if (!oldImage.createNewFile())
throw new IOException("Cannot create file " + oldImage);
RandomAccessFile oldFile = new RandomAccessFile(oldImage, "rws");
// write new version into old image file
try {
writeCorruptedData(oldFile);
} finally {
oldFile.close();
}
}
synchronized void setCheckpointImageDigest(long txid, MD5Hash imageDigest)
throws IOException{
if(checkpointImageDigests.containsKey(txid)) {
MD5Hash existing = checkpointImageDigests.get(txid);
if (!existing.equals(imageDigest)) {
throw new IOException(
"Trying to set checkpoint image digest for txid: " + txid + "="
+ imageDigest + " existing " + existing);
}
} else {
checkpointImageDigests.put(txid, imageDigest);
}
}
synchronized void clearCheckpointImageDigest(long txid)
throws IOException{
checkpointImageDigests.remove(txid);
}
synchronized MD5Hash getCheckpointImageDigest(long txid) throws IOException {
if (checkpointImageDigests.containsKey(txid)) {
return checkpointImageDigests.get(txid);
}
throw new IOException("Trying to get checkpoint image digest for txid: "
+ txid + " but it's not stored");
}
synchronized void purgeOldStorage(long minImageTxId) {
// clear image digests
for (Iterator<Map.Entry<Long, MD5Hash>> it = checkpointImageDigests
.entrySet().iterator(); it.hasNext();) {
Map.Entry<Long, MD5Hash> entry = it.next();
if (entry.getKey() < minImageTxId) {
it.remove();
}
}
}
public static boolean recoverDirectory(StorageDirectory sd,
StartupOption startOpt, StorageState curState, boolean checkImport)
throws IOException {
boolean isFormatted = false;
// sd is locked but not opened
switch (curState) {
case NON_EXISTENT:
// name-node fails if any of the configured storage dirs are missing
throw new InconsistentFSStateException(sd.getRoot(),
"storage directory does not exist or is not accessible.");
case NOT_FORMATTED:
break;
case NORMAL:
break;
default: // recovery is possible
sd.doRecover(curState);
}
if (curState != StorageState.NOT_FORMATTED
&& startOpt != StartupOption.ROLLBACK) {
// read and verify consistency with other directories
sd.read();
isFormatted = true;
}
if (checkImport && startOpt == StartupOption.IMPORT && isFormatted)
// import of a checkpoint is allowed only into empty image directories
throw new IOException("Cannot import image from a checkpoint. "
+ " NameNode already contains an image in " + sd.getRoot());
return isFormatted;
}
public static void finalize(StorageDirectory sd, int layoutVersion,
long cTime) throws IOException {
File prevDir = sd.getPreviousDir();
if (!prevDir.exists()) { // already discarded
LOG.info("Directory " + prevDir + " does not exist.");
LOG.info("Finalize upgrade for " + sd.getRoot() + " is not required.");
return;
}
LOG.info("Finalizing upgrade for storage directory "
+ sd.getRoot()
+ "."
+ (layoutVersion == 0 ? "" : "\n cur LV = " + layoutVersion
+ "; cur CTime = " + cTime));
assert sd.getCurrentDir().exists() : "Current directory must exist.";
final File tmpDir = sd.getFinalizedTmp();
// rename previous to tmp and remove
NNStorage.rename(prevDir, tmpDir);
NNStorage.deleteDir(tmpDir);
LOG.info("Finalize upgrade for " + sd.getRoot() + " is complete.");
}
public static boolean canRollBack(StorageDirectory sd, Storage storage)
throws IOException {
File prevDir = sd.getPreviousDir();
if (!prevDir.exists()) { // use current directory then
LOG.info("Storage directory " + sd.getRoot()
+ " does not contain previous fs state.");
// read and verify consistency with other directories
sd.read();
return false;
}
// read and verify consistency of the prev dir
sd.read(sd.getPreviousVersionFile());
if (storage.getLayoutVersion() != FSConstants.LAYOUT_VERSION) {
throw new IOException("Cannot rollback to storage version "
+ storage.getLayoutVersion()
+ " using this version of the NameNode, which uses storage version "
+ FSConstants.LAYOUT_VERSION + ". "
+ "Please use the previous version of HDFS to perform the rollback.");
}
return true;
}
public static void doRollBack(StorageDirectory sd, Storage storage)
throws IOException {
File prevDir = sd.getPreviousDir();
if (!prevDir.exists())
return;
LOG.info("Rolling back storage directory " + sd.getRoot()
+ ".\n new LV = " + storage.getLayoutVersion() + "; new CTime = "
+ storage.getCTime());
File tmpDir = sd.getRemovedTmp();
assert !tmpDir.exists() : "removed.tmp directory must not exist.";
// rename current to tmp
File curDir = sd.getCurrentDir();
assert curDir.exists() : "Current directory must exist.";
NNStorage.rename(curDir, tmpDir);
// rename previous to current
NNStorage.rename(prevDir, curDir);
// delete tmp dir
NNStorage.deleteDir(tmpDir);
LOG.info("Rollback of " + sd.getRoot() + " is complete.");
}
Configuration getConf() {
return conf;
}
}