/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.BufferedReader; import java.io.Closeable; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.RandomAccessFile; import java.io.OutputStream; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.HdfsConstants; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.UpgradeManager; import org.apache.hadoop.hdfs.server.common.Util; import org.apache.hadoop.hdfs.server.namenode.JournalStream.JournalType; import org.apache.hadoop.hdfs.server.namenode.ValidateNamespaceDirPolicy.NNStorageLocation; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.util.AtomicFileOutputStream; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; /** * NNStorage is responsible for management of the StorageDirectories used by * the NameNode. */ public class NNStorage extends Storage implements Closeable, StorageErrorReporter { public static final Log LOG = LogFactory.getLog(NNStorage.class.getName()); public static final String MESSAGE_DIGEST_PROPERTY = "imageMD5Digest"; public static final String LOCAL_URI_SCHEME = "file"; /** * Namenode storage directory, which stores additional information * about mount point, if the directory is remote, shared, etc. */ public static enum StorageLocationType { LOCAL, REMOTE, SHARED } public class NNStorageDirectory extends StorageDirectory { final StorageLocationType type; public NNStorageDirectory(File dir, StorageDirType dirType, NNStorageLocation location) { super(dir, dirType, true); if (location == null) { type = null; return; } type = location.type; } } // // The filenames used for storing the images // public enum NameNodeFile { IMAGE ("fsimage"), TIME ("fstime"), // from "old" pre-HDFS-1073 format SEEN_TXID ("seen_txid"), EDITS ("edits"), IMAGE_NEW ("fsimage.ckpt"), EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format EDITS_INPROGRESS ("edits_inprogress"); private String fileName = null; private NameNodeFile(String name) { this.fileName = name; } public String getName() { return fileName; } } /** * Implementation of StorageDirType specific to namenode storage * A Storage directory could be of type IMAGE which stores only fsimage, * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which * stores both fsimage and edits. */ public static enum NameNodeDirType implements StorageDirType { UNDEFINED, IMAGE, EDITS, IMAGE_AND_EDITS; public StorageDirType getStorageDirType() { return this; } public boolean isOfType(StorageDirType type) { if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS)) return true; return this == type; } } private UpgradeManager upgradeManager = null; private Object restorationLock = new Object(); private boolean disablePreUpgradableLayoutCheck = false; /** * TxId of the last transaction that was included in the most * recent fsimage file. This does not include any transactions * that have since been written to the edit log. */ private long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID; // used for webui private long mostRecentCheckpointTime = 0; private final Map<Long, MD5Hash> checkpointImageDigests = new HashMap<Long,MD5Hash>(); /** * list of failed (and thus removed) storages */ final protected List<StorageDirectory> removedStorageDirs = Collections.synchronizedList(new ArrayList<StorageDirectory>()); /** * Properties from old layout versions that may be needed * during upgrade only. */ private HashMap<String, String> deprecatedProperties; private final Configuration conf; final NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); /** * Construct the NNStorage. * @param conf Namenode configuration. * @param imageDirs Directories the image can be stored in. * @param editsDirs Directories the editlog can be stored in. * @throws IOException if any directories are inaccessible. */ public NNStorage(Configuration conf, Collection<URI> imageDirs, Collection<URI> editsDirs, Map<URI, NNStorageLocation> locationMap) throws IOException { super(NodeType.NAME_NODE); storageDirs = Collections.synchronizedList(new ArrayList<StorageDirectory>()); // this may modify the editsDirs, so copy before passing in setStorageDirectories(imageDirs, new ArrayList<URI>(editsDirs), locationMap); this.conf = conf; } public Collection<StorageDirectory> getStorageDirs() { return storageDirs; } void checkpointUploadDone(long txid, MD5Hash checkpointImageMd5) throws IOException { setCheckpointImageDigest(txid, checkpointImageMd5); } /** * For testing * @param storageInfo * @throws IOException */ public NNStorage(StorageInfo storageInfo) throws IOException { super(NodeType.NAME_NODE, storageInfo); this.conf = new Configuration(); } @Override // Storage public boolean isConversionNeeded(StorageDirectory sd) throws IOException { if (disablePreUpgradableLayoutCheck) { return false; } File oldImageDir = new File(sd.getRoot(), "image"); if (!oldImageDir.exists()) { return false; } // check the layout version inside the image file File oldF = new File(oldImageDir, "fsimage"); RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); try { oldFile.seek(0); int oldVersion = oldFile.readInt(); oldFile.close(); oldFile = null; if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION) return false; } finally { IOUtils.cleanup(LOG, oldFile); } return true; } @Override // Closeable public void close() throws IOException { unlockAll(); storageDirs.clear(); } /** * See if any of removed storages is "writable" again, and can be returned * into service. */ void attemptRestoreRemovedStorage() { // if directory is "alive" - copy the images there... if(removedStorageDirs.size() == 0) return; //nothing to restore /* We don't want more than one thread trying to restore at a time */ synchronized (this.restorationLock) { LOG.info("attemptRestoreRemovedStorage: check removed(failed) "+ "storage. removedStorages size = " + removedStorageDirs.size()); for(Iterator<StorageDirectory> it = this.removedStorageDirs.iterator(); it.hasNext();) { StorageDirectory sd = it.next(); File root = sd.getRoot(); LOG.info("attemptRestoreRemovedStorage: currently disabled dir " + root.getAbsolutePath() + "; type=" + sd.getStorageDirType() + ";canwrite=" + root.canWrite()); try { if(root.exists() && root.canWrite()) { LOG.info("attemptRestoreRemovedStorage: restoring dir " + sd.getRoot().getAbsolutePath()); this.addStorageDir(sd); // restore it.remove(); sd.lock(); } } catch(IOException e) { LOG.warn("attemptRestoreRemovedStorage: failed to restore " + sd.getRoot().getAbsolutePath(), e); } } } } /** * @return A list of storage directories which are in the errored state. */ public List<StorageDirectory> getRemovedStorageDirs() { return this.removedStorageDirs; } public synchronized void setStorageDirectories(Collection<URI> fsNameDirs, Collection<URI> fsEditsDirs) throws IOException { setStorageDirectories(fsNameDirs, fsEditsDirs, null); } /** * Set the storage directories which will be used. This should only ever be * called from inside NNStorage. However, it needs to remain package private * for testing, as StorageDirectories need to be reinitialised after using * Mockito.spy() on this class, as Mockito doesn't work well with inner * classes, such as StorageDirectory in this case. * * Synchronized due to initialization of storageDirs and removedStorageDirs. * * @param fsNameDirs Locations to store images. * @param fsEditsDirs Locations to store edit logs. * @param locationMap location descriptors * @throws IOException */ public synchronized void setStorageDirectories(Collection<URI> fsNameDirs, Collection<URI> fsEditsDirs, Map<URI, NNStorageLocation> locationMap) throws IOException { this.storageDirs.clear(); this.removedStorageDirs.clear(); for (URI dirName : fsNameDirs) { boolean isAlsoEdits = false; for (URI editsDirName : fsEditsDirs) { if (editsDirName.compareTo(dirName) == 0) { isAlsoEdits = true; fsEditsDirs.remove(editsDirName); break; } } NameNodeDirType dirType = (isAlsoEdits) ? NameNodeDirType.IMAGE_AND_EDITS : NameNodeDirType.IMAGE; // Add to the list of storage directories, only if the // URI is of type file:// if (dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) == 0) { this.addStorageDir(new NNStorageDirectory(new File(dirName.getPath()), dirType, locationMap == null ? null : locationMap.get(dirName))); } } // Add edits dirs if they are different from name dirs for (URI dirName : fsEditsDirs) { checkSchemeConsistency(dirName); // Add to the list of storage directories, only if the // URI is of type file:// if (dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) == 0) this.addStorageDir(new NNStorageDirectory(new File(dirName.getPath()), NameNodeDirType.EDITS, locationMap == null ? null : locationMap.get(dirName))); } } /** * Return the storage directory corresponding to the passed URI * @param uri URI of a storage directory * @return The matching storage directory or null if none found */ StorageDirectory getStorageDirectory(URI uri) { try { uri = Util.fileAsURI(new File(uri)); Iterator<StorageDirectory> it = dirIterator(); for (; it.hasNext(); ) { StorageDirectory sd = it.next(); if (Util.fileAsURI(sd.getRoot()).equals(uri)) { return sd; } } } catch (IOException ioe) { LOG.warn("Error converting file to URI", ioe); } return null; } /** * Checks the consistency of a URI, in particular if the scheme * is specified * @param u URI whose consistency is being checked. */ private static void checkSchemeConsistency(URI u) throws IOException { String scheme = u.getScheme(); // the URI should have a proper scheme if(scheme == null) { throw new IOException("Undefined scheme for " + u); } } /** * Retrieve current directories of type IMAGE * @return Collection of URI representing image directories * @throws IOException in case of URI processing error */ Collection<File> getImageDirectories() throws IOException { return getDirectories(NameNodeDirType.IMAGE); } /** * Retrieve current directories of type EDITS * @return Collection of URI representing edits directories * @throws IOException in case of URI processing error */ Collection<File> getEditsDirectories() throws IOException { return getDirectories(NameNodeDirType.EDITS); } /** * Return number of storage directories of the given type. * @param dirType directory type * @return number of storage directories of type dirType */ int getNumStorageDirs(NameNodeDirType dirType) { if(dirType == null) return getNumStorageDirs(); Iterator<StorageDirectory> it = dirIterator(dirType); int numDirs = 0; for(; it.hasNext(); it.next()) numDirs++; return numDirs; } /** * Return the list of locations being used for a specific purpose. * i.e. Image or edit log storage. * * @param dirType Purpose of locations requested. * @throws IOException */ Collection<File> getDirectories(NameNodeDirType dirType) throws IOException { ArrayList<File> list = new ArrayList<File>(); Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() : dirIterator(dirType); for ( ;it.hasNext(); ) { StorageDirectory sd = it.next(); list.add(sd.getRoot()); } return list; } /** * Determine the last transaction ID noted in this storage directory. * This txid is stored in a special seen_txid file since it might not * correspond to the latest image or edit log. For example, an image-only * directory will have this txid incremented when edits logs roll, even * though the edits logs are in a different directory. * * @param sd StorageDirectory to check * @return If file exists and can be read, last recorded txid. If not, 0L. * @throws IOException On errors processing file pointed to by sd */ static long readTransactionIdFile(StorageDirectory sd) throws IOException { File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); long txid = 0L; if (txidFile.exists() && txidFile.canRead()) { BufferedReader br = new BufferedReader(new FileReader(txidFile)); try { txid = Long.valueOf(br.readLine()); br.close(); br = null; } finally { IOUtils.cleanup(LOG, br); } } return txid; } /** * Write last checkpoint time into a separate file. * * @param sd * @throws IOException */ void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException { if (txid < -1) { // -1 is valid when formatting throw new IOException("Bad txid: " + txid); } File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); OutputStream fos = new AtomicFileOutputStream(txIdFile); try { fos.write(String.valueOf(txid).getBytes()); fos.write('\n'); fos.close(); fos = null; } finally { IOUtils.cleanup(LOG, fos); } } /** * Set the transaction ID of the last checkpoint */ synchronized void setMostRecentCheckpointTxId(long txid) { if(txid > mostRecentCheckpointTxId) { this.mostRecentCheckpointTxId = txid; this.mostRecentCheckpointTime = FSNamesystem.now(); } } /** * Return the transaction ID of the last checkpoint. */ public long getMostRecentCheckpointTxId() { return mostRecentCheckpointTxId; } /** * Return the time of last successful checkpoint */ public String getMostRecentCheckpointTime() { return new Date(mostRecentCheckpointTime).toString(); } /** * Write a small file in all available storage directories that * indicates that the namespace has reached some given transaction ID. * * This is used when the image is loaded to avoid accidental rollbacks * in the case where an edit log is fully deleted but there is no * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() * @param txid the txid that has been reached */ public void writeTransactionIdFileToStorage(long txid, FSImage image) throws IOException { // Write txid marker in all storage directories List<StorageDirectory> badSDs = new ArrayList<StorageDirectory>(); for (StorageDirectory sd : storageDirs) { try { writeTransactionIdFile(sd, txid); } catch(IOException e) { // Close any edits stream associated with this dir and remove directory LOG.warn("writeTransactionIdToStorage failed on " + sd, e); badSDs.add(sd); } } reportErrorsOnDirectories(badSDs, image); if (image != null) { } } /** * Return the name of the image file that is uploaded by periodic * checkpointing * * @return List of filenames to save checkpoints to. */ public File[] getFsImageNameCheckpoint(long txid) { ArrayList<File> list = new ArrayList<File>(); for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid)); } return list.toArray(new File[list.size()]); } /** * Return the name of the image file, preferring * "type" images. Otherwise, return any image. * * @return The name of the image file. */ public File getFsImageName(StorageLocationType type, long txid) { File lastCandidate = null; for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { StorageDirectory sd = it.next(); File fsImage = getStorageFile(sd, NameNodeFile.IMAGE, txid); if(sd.getRoot().canRead() && fsImage.exists()) { if (isPreferred(type, sd)) { return fsImage; } lastCandidate = fsImage; } } return lastCandidate; } /** * Return all images for given txid, together with their types * (local, shared, remote). */ public Map<File, StorageLocationType> getImages(long txid) { Map<File, StorageLocationType> map = new HashMap<File, StorageLocationType>(); for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { StorageDirectory sd = it.next(); File fsImage = getStorageFile(sd, NameNodeFile.IMAGE, txid); if(sd.getRoot().canRead() && fsImage.exists()) { map.put(fsImage, getType(sd)); } } return map; } /** * Format all available storage directories. */ public void format() throws IOException { this.layoutVersion = FSConstants.LAYOUT_VERSION; this.namespaceID = newNamespaceID(); this.cTime = 0L; for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { StorageDirectory sd = it.next(); format(sd); } } /** Create new dfs name directory. Caution: this destroys all files * in this filesystem. */ private void format(StorageDirectory sd) throws IOException { sd.clearDirectory(); // create current dir sd.write(); writeTransactionIdFile(sd, -1); LOG.info("Storage directory " + sd.getRoot() + " has been successfully formatted."); } /** * Generate new namespaceID. * * namespaceID is a persistent attribute of the namespace. * It is generated when the namenode is formatted and remains the same * during the life cycle of the namenode. * When a datanodes register they receive it as the registrationID, * which is checked every time the datanode is communicating with the * namenode. Datanodes that do not 'know' the namespaceID are rejected. * * @return new namespaceID */ static int newNamespaceID() { Random r = new Random(); r.setSeed(FSNamesystem.now()); int newID = 0; while(newID == 0) newID = r.nextInt(0x7FFFFFFF); // use 31 bits only return newID; } @Override // Storage protected void getFields(Properties props, StorageDirectory sd) throws IOException { super.getFields(props, sd); if (layoutVersion == 0) throw new IOException("NameNode directory " + sd.getRoot() + " is not formatted."); String sDUS, sDUV; sDUS = props.getProperty("distributedUpgradeState"); sDUV = props.getProperty("distributedUpgradeVersion"); setDistributedUpgradeState( sDUS == null ? false : Boolean.parseBoolean(sDUS), sDUV == null ? getLayoutVersion() : Integer.parseInt(sDUV)); setDeprecatedPropertiesForUpgrade(props); } /** * Return a property that was stored in an earlier version of HDFS. * * This should only be used during upgrades. */ String getDeprecatedProperty(String prop) { assert getLayoutVersion() > FSConstants.LAYOUT_VERSION : "getDeprecatedProperty should only be done when loading " + "storage from past versions during upgrade."; return deprecatedProperties.get(prop); } /** * Write version file into the storage directory. * * The version file should always be written last. * Missing or corrupted version file indicates that * the checkpoint is not valid. * * @param sd storage directory * @throws IOException */ @Override // Storage protected void setFields(Properties props, StorageDirectory sd) throws IOException { super.setFields(props, sd); boolean uState = getDistributedUpgradeState(); int uVersion = getDistributedUpgradeVersion(); if (uState && uVersion != getLayoutVersion()) { props.setProperty("distributedUpgradeState", Boolean.toString(uState)); props .setProperty("distributedUpgradeVersion", Integer.toString(uVersion)); } } /** * Pull any properties out of the VERSION file that are from older * versions of HDFS and only necessary during upgrade. */ private void setDeprecatedPropertiesForUpgrade(Properties props) { deprecatedProperties = new HashMap<String, String>(); String md5 = props.getProperty(MESSAGE_DIGEST_PROPERTY); if (md5 != null) { deprecatedProperties.put(MESSAGE_DIGEST_PROPERTY, md5); } } //////////////////////////////////////////////////////////////////////// // names and files for images checkpoint images, edits, etc //////////////////////////////////////////////////////////////////////// static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) { return new File(sd.getCurrentDir(), String.format("%s_%019d", type.getName(), imageTxId)); } /** * Get a storage file for one of the files that doesn't need a txid associated * (e.g version, seen_txid) */ static File getStorageFile(StorageDirectory sd, NameNodeFile type) { return new File(sd.getCurrentDir(), type.getName()); } public static String getCheckpointImageFileName(long txid) { return String.format("%s_%019d", NameNodeFile.IMAGE_NEW.getName(), txid); } public static File getCheckpointImageFile(StorageDirectory sd, long txid) { return new File(sd.getCurrentDir(), getCheckpointImageFileName(txid)); } public static String getImageFileName(long txid) { return String.format("%s_%019d", NameNodeFile.IMAGE.getName(), txid); } public static File getImageFile(StorageDirectory sd, long txid) { return new File(sd.getCurrentDir(), getImageFileName(txid)); } public static String getInProgressEditsFileName(long startTxId) { return String.format("%s_%019d", NameNodeFile.EDITS_INPROGRESS.getName(), startTxId); } static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId)); } static File getFinalizedEditsFile(StorageDirectory sd, long startTxId, long endTxId) { return new File(sd.getCurrentDir(), getFinalizedEditsFileName(startTxId, endTxId)); } public static String getFinalizedEditsFileName(long startTxId, long endTxId) { return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(), startTxId, endTxId); } //////////////////////////////////////////////////////////////////////// /** * Return the first readable finalized edits file for the given txid. */ File findFinalizedEditsFile(long startTxId, long endTxId) throws IOException { File ret = findFile(NameNodeDirType.EDITS, getFinalizedEditsFileName(startTxId, endTxId)); if (ret == null) { throw new IOException( "No edits file for txid " + startTxId + "-" + endTxId + " exists!"); } return ret; } /** * Return the first readable inprogress edits file for the given txid. */ File findInProgressEditsFile(long startTxId) throws IOException { File ret = findFile(NameNodeDirType.EDITS, getInProgressEditsFileName(startTxId)); if (ret == null) { throw new IOException( "No edits file for txid " + startTxId + "-in progress"); } return ret; } /** * Return the first readable image file for the given txid, or null * if no such image can be found */ File findImageFile(long txid) throws IOException { return findFile(NameNodeDirType.IMAGE, getImageFileName(txid)); } /** * Return the first readable storage file of the given name * across any of the 'current' directories in SDs of the * given type, or null if no such file exists. */ private File findFile(NameNodeDirType dirType, String name) { for (StorageDirectory sd : dirIterable(dirType)) { File candidate = new File(sd.getCurrentDir(), name); if (sd.getCurrentDir().canRead() && candidate.exists()) { return candidate; } } return null; } /** * Checks if we have information about this directory * that it is preferred. * @param type preferred type * @param sd storage directory */ static boolean isPreferred(StorageLocationType type, StorageDirectory sd) { if ((sd instanceof NNStorageDirectory)) { return ((NNStorageDirectory) sd).type == type; } // by default all are preferred return true; } /** * Get the type of given directory. */ static StorageLocationType getType(StorageDirectory sd) { if ((sd instanceof NNStorageDirectory)) { return ((NNStorageDirectory) sd).type; } // by default all are local return StorageLocationType.LOCAL; } /** * @return A list of the given File in every available storage directory, * regardless of whether it might exist. */ File[] getFiles(NameNodeDirType dirType, String fileName) { ArrayList<File> list = new ArrayList<File>(); Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() : dirIterator(dirType); for ( ;it.hasNext(); ) { list.add(new File(it.next().getCurrentDir(), fileName)); } return list.toArray(new File[list.size()]); } /** * Set the upgrade manager for use in a distributed upgrade. * @param um The upgrade manager */ void setUpgradeManager(UpgradeManager um) { upgradeManager = um; } /** * @return The current distribued upgrade state. */ boolean getDistributedUpgradeState() { return upgradeManager == null ? false : upgradeManager.getUpgradeState(); } /** * @return The current upgrade version. */ int getDistributedUpgradeVersion() { return upgradeManager == null ? 0 : upgradeManager.getUpgradeVersion(); } /** * Set the upgrade state and version. * @param uState the new state. * @param uVersion the new version. */ private void setDistributedUpgradeState(boolean uState, int uVersion) { if (upgradeManager != null) { upgradeManager.setUpgradeState(uState, uVersion); } } /** * Verify that the distributed upgrade state is valid. * @param startOpt the option the namenode was started with. */ void verifyDistributedUpgradeProgress(StartupOption startOpt ) throws IOException { if(startOpt == StartupOption.ROLLBACK || startOpt == StartupOption.IMPORT) return; assert upgradeManager != null : "FSNameSystem.upgradeManager is null."; if(startOpt != StartupOption.UPGRADE) { if(upgradeManager.getUpgradeState()) throw new IOException( "\n Previous distributed upgrade was not completed. " + "\n Please restart NameNode with -upgrade option."); if(upgradeManager.getDistributedUpgrades() != null) throw new IOException("\n Distributed upgrade for NameNode version " + upgradeManager.getUpgradeVersion() + " to current LV " + layoutVersion + " is required.\n Please restart NameNode" + " with -upgrade option."); } } /** * Initialize a distributed upgrade. */ void initializeDistributedUpgrade() throws IOException { if(! upgradeManager.initializeUpgrade()) return; // write new upgrade state into disk writeAll(); LOG.info("\n Distributed upgrade for NameNode version " + upgradeManager.getUpgradeVersion() + " to current LV " + layoutVersion + " is initialized."); } /** * Disable the check for pre-upgradable layouts. Needed for BackupImage. * @param val Whether to disable the preupgradeable layout check. */ void setDisablePreUpgradableLayoutCheck(boolean val) { disablePreUpgradableLayoutCheck = val; } /** * Marks a list of directories as having experienced an error. * * @param sds A list of storage directories to mark as errored. * @throws IOException */ synchronized void reportErrorsOnDirectories(List<StorageDirectory> sds, FSImage image) throws IOException { for (StorageDirectory sd : sds) { reportErrorsOnDirectory(sd, image); } // check image managers (this will update image metrics) if (image != null) { image.checkImageManagers(); } // only check if something was wrong if(!sds.isEmpty()) { if (this.getNumStorageDirs() == 0) throw new IOException("No more storage directories left"); // check image directories, edits are checked withing FSEditLog.checkJournals if (getNumStorageDirs(NameNodeDirType.IMAGE) == 0) throw new IOException("No more image storage directories left"); } } /** * Reports that a directory has experienced an error. * Notifies listeners that the directory is no longer * available. * * @param sd A storage directory to mark as errored. * @throws IOException */ synchronized void reportErrorsOnDirectory(StorageDirectory sd, FSImage image) { String lsd = listStorageDirectories(); LOG.info("reportErrorsOnDirectory: Current list of storage dirs:" + lsd); LOG.error("reportErrorsOnDirectory: Error reported on storage directory " + sd.getRoot()); if (this.storageDirs.remove(sd)) { try { sd.unlock(); } catch (Exception e) { LOG.warn( "reportErrorsOnDirectory: Unable to unlock bad storage directory: " + sd.getRoot().getPath(), e); } this.removedStorageDirs.add(sd); } if (image != null) { image.reportErrorsOnImageManager(sd); } lsd = listStorageDirectories(); LOG.info("reportErrorsOnDirectory: Current list of storage dirs:" + lsd); } /** * Report that an IOE has occurred on some file which may * or may not be within one of the NN image storage directories. */ public void reportErrorOnFile(File f) { // We use getAbsolutePath here instead of getCanonicalPath since we know // that there is some IO problem on that drive. // getCanonicalPath may need to call stat() or readlink() and it's likely // those calls would fail due to the same underlying IO problem. String absPath = f.getAbsolutePath(); for (StorageDirectory sd : storageDirs) { String dirPath = sd.getRoot().getAbsolutePath(); if (!dirPath.endsWith("/")) { dirPath += "/"; } if (absPath.startsWith(dirPath)) { reportErrorsOnDirectory(sd, null); return; } } } /** * Iterate over all current storage directories, inspecting them * with the given inspector. */ void inspectStorageDirs(FSImageStorageInspector inspector) throws IOException { // Process each of the storage directories to find the pair of // newest image file and edit file for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { StorageDirectory sd = it.next(); inspector.inspectDirectory(sd); } } /** * Iterate over all of the storage dirs, reading their contents to determine * their layout versions. Returns an FSImageStorageInspector which has * inspected each directory. * * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc). * @throws IOException if no valid storage dirs are found */ FSImageStorageInspector readAndInspectDirs() throws IOException { int minLayoutVersion = Integer.MAX_VALUE; // the newest int maxLayoutVersion = Integer.MIN_VALUE; // the oldest // First determine what range of layout versions we're going to inspect for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { StorageDirectory sd = it.next(); if (!sd.getVersionFile().exists()) { FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping..."); continue; } sd.read(); // sets layoutVersion minLayoutVersion = Math.min(minLayoutVersion, getLayoutVersion()); maxLayoutVersion = Math.max(maxLayoutVersion, getLayoutVersion()); } if (minLayoutVersion > maxLayoutVersion) { throw new IOException("No storage directories contained VERSION information"); } assert minLayoutVersion <= maxLayoutVersion; // If we have any storage directories with the new layout version // (ie edits_<txnid>) then use the new inspector, which will ignore // the old format dirs. FSImageStorageInspector inspector; if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, minLayoutVersion)) { inspector = new FSImageTransactionalStorageInspector(); if (!LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, maxLayoutVersion)) { FSImage.LOG.warn("Ignoring one or more storage directories with old layouts"); } } else { inspector = new FSImagePreTransactionalStorageInspector(conf); } inspectStorageDirs(inspector); return inspector; } @Override protected void corruptPreUpgradeStorage(File rootDir) throws IOException { File oldImageDir = new File(rootDir, "image"); if (!oldImageDir.exists()) if (!oldImageDir.mkdir()) throw new IOException("Cannot create directory " + oldImageDir); File oldImage = new File(oldImageDir, "fsimage"); if (!oldImage.exists()) // recreate old image file to let pre-upgrade versions fail if (!oldImage.createNewFile()) throw new IOException("Cannot create file " + oldImage); RandomAccessFile oldFile = new RandomAccessFile(oldImage, "rws"); // write new version into old image file try { writeCorruptedData(oldFile); } finally { oldFile.close(); } } synchronized void setCheckpointImageDigest(long txid, MD5Hash imageDigest) throws IOException{ if(checkpointImageDigests.containsKey(txid)) { MD5Hash existing = checkpointImageDigests.get(txid); if (!existing.equals(imageDigest)) { throw new IOException( "Trying to set checkpoint image digest for txid: " + txid + "=" + imageDigest + " existing " + existing); } } else { checkpointImageDigests.put(txid, imageDigest); } } synchronized void clearCheckpointImageDigest(long txid) throws IOException{ checkpointImageDigests.remove(txid); } synchronized MD5Hash getCheckpointImageDigest(long txid) throws IOException { if (checkpointImageDigests.containsKey(txid)) { return checkpointImageDigests.get(txid); } throw new IOException("Trying to get checkpoint image digest for txid: " + txid + " but it's not stored"); } synchronized void purgeOldStorage(long minImageTxId) { // clear image digests for (Iterator<Map.Entry<Long, MD5Hash>> it = checkpointImageDigests .entrySet().iterator(); it.hasNext();) { Map.Entry<Long, MD5Hash> entry = it.next(); if (entry.getKey() < minImageTxId) { it.remove(); } } } public static boolean recoverDirectory(StorageDirectory sd, StartupOption startOpt, StorageState curState, boolean checkImport) throws IOException { boolean isFormatted = false; // sd is locked but not opened switch (curState) { case NON_EXISTENT: // name-node fails if any of the configured storage dirs are missing throw new InconsistentFSStateException(sd.getRoot(), "storage directory does not exist or is not accessible."); case NOT_FORMATTED: break; case NORMAL: break; default: // recovery is possible sd.doRecover(curState); } if (curState != StorageState.NOT_FORMATTED && startOpt != StartupOption.ROLLBACK) { // read and verify consistency with other directories sd.read(); isFormatted = true; } if (checkImport && startOpt == StartupOption.IMPORT && isFormatted) // import of a checkpoint is allowed only into empty image directories throw new IOException("Cannot import image from a checkpoint. " + " NameNode already contains an image in " + sd.getRoot()); return isFormatted; } public static void finalize(StorageDirectory sd, int layoutVersion, long cTime) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) { // already discarded LOG.info("Directory " + prevDir + " does not exist."); LOG.info("Finalize upgrade for " + sd.getRoot() + " is not required."); return; } LOG.info("Finalizing upgrade for storage directory " + sd.getRoot() + "." + (layoutVersion == 0 ? "" : "\n cur LV = " + layoutVersion + "; cur CTime = " + cTime)); assert sd.getCurrentDir().exists() : "Current directory must exist."; final File tmpDir = sd.getFinalizedTmp(); // rename previous to tmp and remove NNStorage.rename(prevDir, tmpDir); NNStorage.deleteDir(tmpDir); LOG.info("Finalize upgrade for " + sd.getRoot() + " is complete."); } public static boolean canRollBack(StorageDirectory sd, Storage storage) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) { // use current directory then LOG.info("Storage directory " + sd.getRoot() + " does not contain previous fs state."); // read and verify consistency with other directories sd.read(); return false; } // read and verify consistency of the prev dir sd.read(sd.getPreviousVersionFile()); if (storage.getLayoutVersion() != FSConstants.LAYOUT_VERSION) { throw new IOException("Cannot rollback to storage version " + storage.getLayoutVersion() + " using this version of the NameNode, which uses storage version " + FSConstants.LAYOUT_VERSION + ". " + "Please use the previous version of HDFS to perform the rollback."); } return true; } public static void doRollBack(StorageDirectory sd, Storage storage) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) return; LOG.info("Rolling back storage directory " + sd.getRoot() + ".\n new LV = " + storage.getLayoutVersion() + "; new CTime = " + storage.getCTime()); File tmpDir = sd.getRemovedTmp(); assert !tmpDir.exists() : "removed.tmp directory must not exist."; // rename current to tmp File curDir = sd.getCurrentDir(); assert curDir.exists() : "Current directory must exist."; NNStorage.rename(curDir, tmpDir); // rename previous to current NNStorage.rename(prevDir, curDir); // delete tmp dir NNStorage.deleteDir(tmpDir); LOG.info("Rollback of " + sd.getRoot() + " is complete."); } Configuration getConf() { return conf; } }