/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.datanode; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InterruptedIOException; import java.io.RandomAccessFile; import java.nio.channels.FileLock; import java.util.Collection; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.HardLink; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.Storage.StorageState; import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.io.IOUtils; /** * Data storage information file. * <p> * @see Storage */ public class DataStorage extends Storage { // Constants final static String BLOCK_SUBDIR_PREFIX = "subdir"; final static String BLOCK_FILE_PREFIX = "blk_"; final static String COPY_FILE_PREFIX = "dncp_"; final static String STORAGE_DIR_DETACHED = "detach"; public final static String STORAGE_DIR_TMP = "tmp"; private final static String STORAGE_ID = "storageID"; private String storageID; // flag to ensure initialzing storage occurs only once private boolean initialized = false; // NameSpaceStorage is map of <Name Space Id, NameSpaceStorage> private Map<Integer, NameSpaceSliceStorage> nsStorageMap = new HashMap<Integer, NameSpaceSliceStorage>(); private final DataNode datanode; // Map of top level directory to layout version. Map<File, Integer> layoutMap = new HashMap<File, Integer>(); DataStorage(DataNode datanode) { super(NodeType.DATA_NODE); storageID = ""; this.datanode = datanode; } public DataStorage(StorageInfo storageInfo, String strgID, DataNode datanode) { super(NodeType.DATA_NODE, storageInfo); this.storageID = strgID; this.datanode = datanode; } public NameSpaceSliceStorage getNStorage(int namespaceId) { return nsStorageMap.get(namespaceId); } public String getStorageID() { return storageID; } void setStorageID(String newStorageID) { this.storageID = newStorageID; } synchronized void createStorageID(int datanodePort) { if (storageID != null && !storageID.isEmpty()) { return; } storageID = DataNode.createNewStorageId(datanodePort); } ArrayList<StorageDirectory> analyzeStorageDirs(NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt ) throws IOException { if (storageID == null) this.storageID = ""; if (storageDirs == null) { this.storageDirs = new ArrayList<StorageDirectory>(dataDirs.size()); } else { ((ArrayList<StorageDirectory>) storageDirs) .ensureCapacity(storageDirs.size() + dataDirs.size()); } ArrayList<StorageDirectory> newDirs = new ArrayList<StorageDirectory>( dataDirs.size()); ArrayList<StorageState> dataDirStates = new ArrayList<StorageState>(dataDirs.size()); for(Iterator<File> it = dataDirs.iterator(); it.hasNext();) { File dataDir = it.next(); StorageDirectory sd = new StorageDirectory(dataDir); StorageState curState; try { curState = sd.analyzeStorage(startOpt); // sd is locked but not opened switch(curState) { case NORMAL: break; case NON_EXISTENT: // ignore this storage LOG.info("Storage directory " + dataDir + " does not exist."); it.remove(); continue; case NOT_FORMATTED: // format LOG.info("Storage directory " + dataDir + " is not formatted."); if (!sd.isEmpty()) { LOG.error("Storage directory " + dataDir + " is not empty, and will not be formatted! Exiting."); throw new IOException( "Storage directory " + dataDir + " is not empty!"); } LOG.info("Formatting ..."); format(sd, nsInfo); break; default: // recovery part is common sd.doRecover(curState); } } catch (IOException ioe) { try { sd.unlock(); } catch (IOException e) { LOG.warn("Exception when unlocking storage directory", e); } LOG.warn("Ignoring storage directory " + dataDir, ioe); //continue with other good dirs continue; } // add to the storage list addStorageDir(sd); newDirs.add(sd); dataDirStates.add(curState); } if (dataDirs.size() == 0) // none of the data dirs exist throw new IOException( "All specified directories are not accessible or do not exist."); return newDirs; } /** * Analyze storage directories. * Recover from previous transitions if required. * Perform fs state transition if necessary depending on the namespace info. * Read storage info. * * @param nsInfo namespace information * @param dataDirs array of data storage directories * @param startOpt startup option * @throws IOException */ synchronized void recoverTransitionRead(DataNode datanode, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt ) throws IOException { if (initialized) { // DN storage has been initialized, no need to do anything return; } if (FSConstants.LAYOUT_VERSION != nsInfo.getLayoutVersion()) { throw new IOException( "Data-node and name-node layout versions must be the same. Namenode LV: " + nsInfo.getLayoutVersion() + ", current LV: " + FSConstants.LAYOUT_VERSION); } // 1. For each data directory calculate its state and // check whether all is consistent before transitioning. // Format and recover. analyzeStorageDirs(nsInfo, dataDirs, startOpt); // 2. Do transitions // Each storage directory is treated individually. // During startup some of them can upgrade or rollback // while others could be uptodate for the regular startup. doTransition(storageDirs, nsInfo, startOpt); // 3. make sure we have storage id set - if not - generate new one createStorageID(datanode.getPort()); // 4. Update all storages. Some of them might have just been formatted. this.writeAll(); this.initialized = true; } /** * merge the data directory from srcDataDirs to dstDataDirs * @return true if merge succeeds; false if no merge happens */ boolean doMerge(String[] srcDataDirs, Collection<File> dstDataDirs, int namespaceId, NamespaceInfo nsInfo, StartupOption startOpt) throws IOException { HashMap<File, File> dirsToMerge = new HashMap<File, File>(); int i = 0; for (Iterator<File> it = dstDataDirs.iterator(); it.hasNext(); i++) { File dstDataDir = it.next(); if (dstDataDir.exists()) { continue; } File srcDataDir = NameSpaceSliceStorage.getNsRoot( namespaceId, new File(srcDataDirs[i], STORAGE_DIR_CURRENT)); if (!srcDataDir.exists() || !srcDataDir.isDirectory()) { LOG.info("Source data directory " + srcDataDir + " doesn't exist."); continue; } dirsToMerge.put(srcDataDir, dstDataDir); } if (dirsToMerge.size() == 0) //No merge is needed return false; if (dirsToMerge.size() != dstDataDirs.size()) { // Last merge succeeds partially throw new IOException("Merge fail: not all directories are merged successfully."); } MergeThread[] mergeThreads = new MergeThread[dirsToMerge.size()]; // start to merge i = 0; for (Map.Entry<File, File> entry: dirsToMerge.entrySet()) { MergeThread thread = new MergeThread(entry.getKey(), entry.getValue(), nsInfo); thread.start(); mergeThreads[i] = thread; i++; } // wait for merge to be done for (MergeThread thread : mergeThreads) { try { thread.join(); } catch (InterruptedException e) { throw (InterruptedIOException)new InterruptedIOException().initCause(e); } } // check for errors for (MergeThread thread : mergeThreads) { if (thread.error != null) throw new IOException(thread.error); } return true; } /** * recoverTransitionRead for a specific Name Space * * @param datanode DataNode * @param namespaceId name space Id * @param nsInfo Namespace info of namenode corresponding to the Name Space * @param dataDirs Storage directories * @param startOpt startup option * @throws IOException on error */ void recoverTransitionRead(DataNode datanode, int namespaceId, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt, String nameserviceId) throws IOException { // First ensure datanode level format/snapshot/rollback is completed // recoverTransitionRead(datanode, nsInfo, dataDirs, startOpt); // Create list of storage directories for the Name Space Collection<File> nsDataDirs = new ArrayList<File>(); for(Iterator<File> it = dataDirs.iterator(); it.hasNext();) { File dnRoot = it.next(); File nsRoot = NameSpaceSliceStorage.getNsRoot( namespaceId, new File(dnRoot, STORAGE_DIR_CURRENT)); nsDataDirs.add(nsRoot); } boolean merged = false; String[] mergeDataDirs = nameserviceId == null? null: datanode.getConf().getStrings("dfs.merge.data.dir." + nameserviceId); if (startOpt.equals(StartupOption.REGULAR) && mergeDataDirs != null && mergeDataDirs.length > 0) { assert mergeDataDirs.length == dataDirs.size(); merged = doMerge(mergeDataDirs, nsDataDirs, namespaceId, nsInfo, startOpt); } if (!merged) { // mkdir for the list of NameSpaceStorage makeNameSpaceDataDir(nsDataDirs); } NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage( namespaceId, this.getCTime(), layoutMap); nsStorage.recoverTransitionRead(datanode, nsInfo, nsDataDirs, startOpt); addNameSpaceStorage(namespaceId, nsStorage); } /** * Create physical directory for Name Spaces on the data node * * @param dataDirs * List of data directories * @throws IOException on errors */ public static void makeNameSpaceDataDir(Collection<File> dataDirs) throws IOException { for (File data : dataDirs) { try { DiskChecker.checkDir(data); } catch ( IOException e ) { LOG.warn("Invalid directory in: " + data.getCanonicalPath() + ": " + e.getMessage()); } } } synchronized Collection<StorageDirectory> recoverTransitionAdditionalRead(NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt ) throws IOException{ assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() : "Data-node and name-node layout versions must be the same."; // 1. For each data directory calculate its state and // check whether all is consistent before transitioning. // Format and recover. ArrayList<StorageDirectory> newDirs = analyzeStorageDirs(nsInfo, dataDirs, startOpt); // 2. Do transitions // Each storage directory is treated individually. // During startup some of them can upgrade or rollback // while others could be uptodate for the regular startup. doTransition(newDirs, nsInfo, startOpt); assert this.getLayoutVersion() == nsInfo.getLayoutVersion() : "Data-node and name-node layout versions must be the same."; assert this.getCTime() == nsInfo.getCTime() : "Data-node and name-node CTimes must be the same."; // 3. Update all storages. Some of them might have just been formatted. if (this.layoutVersion == 0) { layoutVersion = FSConstants.LAYOUT_VERSION; } for (StorageDirectory sd : newDirs) { sd.write(); } return newDirs; } void format(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException { sd.clearDirectory(); // create directory this.layoutVersion = FSConstants.LAYOUT_VERSION; this.namespaceID = nsInfo.getNamespaceID(); // mother namespaceid this.cTime = 0; // store storageID as it currently is sd.write(); } protected void setFields(Properties props, StorageDirectory sd ) throws IOException { props.setProperty(STORAGE_TYPE, storageType.toString()); props.setProperty(LAYOUT_VERSION, String.valueOf(layoutVersion)); props.setProperty(STORAGE_ID, getStorageID()); // Set NamespaceID in version before federation if (layoutVersion > FSConstants.FEDERATION_VERSION) { props.setProperty(NAMESPACE_ID, String.valueOf(namespaceID)); props.setProperty(CHECK_TIME, String.valueOf(cTime)); } } protected void getFields(Properties props, StorageDirectory sd ) throws IOException { setLayoutVersion(props, sd); setStorageType(props, sd); // Read NamespaceID in version before federation if (layoutVersion > FSConstants.FEDERATION_VERSION) { setNamespaceID(props, sd); setcTime(props, sd); } String ssid = props.getProperty(STORAGE_ID); if (ssid == null || !("".equals(storageID) || "".equals(ssid) || storageID.equals(ssid))) throw new InconsistentFSStateException(sd.getRoot(), "has incompatible storage Id."); if ("".equals(storageID)) // update id only if it was empty storageID = ssid; } public boolean isConversionNeeded(StorageDirectory sd) throws IOException { File oldF = new File(sd.getRoot(), "storage"); if (!oldF.exists()) return false; // check the layout version inside the storage file // Lock and Read old storage file RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); FileLock oldLock = oldFile.getChannel().tryLock(); try { oldFile.seek(0); int oldVersion = oldFile.readInt(); if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION) return false; } finally { oldLock.release(); oldFile.close(); } return true; } private boolean isNsLevelUpgraded(int namespaceId, StorageDirectory sd) { File nsRoot = NameSpaceSliceStorage.getNsRoot(namespaceId, sd.getCurrentDir()); return new File(nsRoot, STORAGE_DIR_PREVIOUS).exists(); } /** * Analyze which and whether a transition of the fs state is required and * perform it if necessary. * * Rollback if (previousLV >= LAYOUT_VERSION && previousLV > * FEDERATION_VERSION) * Upgrade if this.LV > LAYOUT_VERSION && this.LV > FEDERATION_VERSION * Regular startup if this.LV = LAYOUT_VERSION && this.cTime = namenode.cTime * * @param nsInfo * namespace info * @param startOpt * startup option * @throws IOException */ private void doTransition(List<StorageDirectory> sds, NamespaceInfo nsInfo, StartupOption startOpt ) throws IOException { if (startOpt == StartupOption.ROLLBACK) doRollback(nsInfo); // rollback if applicable int numOfDirs = sds.size(); List<StorageDirectory> dirsToUpgrade = new ArrayList<StorageDirectory>(numOfDirs); List<StorageInfo> dirsInfo = new ArrayList<StorageInfo>(numOfDirs); for (StorageDirectory sd : sds) { sd.read(); layoutMap.put(sd.getRoot(), this.layoutVersion); checkVersionUpgradable(this.layoutVersion); assert this.layoutVersion >= FSConstants.LAYOUT_VERSION : "Future version is not allowed"; boolean federationSupported = this.layoutVersion <= FSConstants.FEDERATION_VERSION; // For pre-federation version - validate the namespaceID if (!federationSupported && getNamespaceID() != nsInfo.getNamespaceID()) { sd.unlock(); throw new IOException( "Incompatible namespaceIDs in " + sd.getRoot().getCanonicalPath() + ": namenode namespaceID = " + nsInfo.getNamespaceID() + "; datanode namespaceID = " + getNamespaceID()); } if (this.layoutVersion == FSConstants.LAYOUT_VERSION && this.cTime == nsInfo.getCTime()) continue; // regular startup // verify necessity of a distributed upgrade verifyDistributedUpgradeProgress(nsInfo); // do a global upgrade iff layout version changes and current layout is // older than FEDERATION. if (this.layoutVersion > FSConstants.LAYOUT_VERSION && this.layoutVersion > FSConstants.FEDERATION_VERSION) { if (isNsLevelUpgraded(getNamespaceID(), sd)) { throw new IOException("Ns level directory already upgraded for : " + sd.getRoot() + " ignoring upgrade"); } dirsToUpgrade.add(sd); // upgrade dirsInfo.add(new StorageInfo(this)); continue; } if (this.cTime >= nsInfo.getCTime()) { // layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime // must shutdown sd.unlock(); throw new IOException("Datanode state: LV = " + this.getLayoutVersion() + " CTime = " + this.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); } } // Now do upgrade if dirsToUpgrade is not empty if (!dirsToUpgrade.isEmpty()) { doUpgrade(dirsToUpgrade, dirsInfo, nsInfo); } } /** * A thread that merges a data storage directory from * srcDataDir to dstDataDir */ static class MergeThread extends Thread { private File srcNSDir; private File dstNSDir; private NamespaceInfo nsInfo; volatile Throwable error = null; private static final String STORAGE_DIR_MERGE_TMP = "merge.tmp"; MergeThread(File srcNSDir, File dstNSDir, NamespaceInfo nsInfo) { this.srcNSDir = srcNSDir; this.dstNSDir = dstNSDir; this.nsInfo = nsInfo; this.setName("Merging " + srcNSDir + " to " + dstNSDir); } /* check if the directory is merged */ private boolean isMerged() { return dstNSDir.exists(); } public void run() { try { if (isMerged()) { return; } LOG.info("Merging storage directory " + srcNSDir + " to " + dstNSDir); File mergeTmpDir = new File(dstNSDir.getParent(), STORAGE_DIR_MERGE_TMP); NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage( nsInfo.getNamespaceID(), nsInfo.getCTime()); nsStorage.format(mergeTmpDir, nsInfo); assert srcNSDir.exists() : "Source directory must exist."; File mergeTmpNSDir = nsStorage.getNsRoot(mergeTmpDir); File srcCurNsDir = new File(srcNSDir, STORAGE_DIR_CURRENT); File mergeTmpCurNSDir = new File(mergeTmpNSDir, STORAGE_DIR_CURRENT); // hardlink all blocks HardLink hardLink = new HardLink(); linkBlocks(new File(srcCurNsDir, STORAGE_DIR_FINALIZED), new File(mergeTmpCurNSDir, STORAGE_DIR_FINALIZED), nsInfo.getLayoutVersion(), hardLink, true); linkBlocks(new File(srcCurNsDir, STORAGE_DIR_RBW), new File(mergeTmpCurNSDir, STORAGE_DIR_RBW), nsInfo.getLayoutVersion(), hardLink, true); // finally rename the tmp dir to dst dir if (!mergeTmpNSDir.renameTo(dstNSDir)) { throw new IOException("Cannot rename tmp directory " + mergeTmpNSDir + " to dst directory " + dstNSDir); } } catch (Throwable t) { error = t; } } } /** * A thread that upgrades a data storage directory */ static class UpgradeThread extends Thread { private StorageDirectory sd; private StorageInfo si; private NamespaceInfo nsInfo; volatile Throwable error = null; private File topCurDir; private File[] namespaceDirs; UpgradeThread(StorageDirectory sd, StorageInfo si, NamespaceInfo nsInfo) { this.sd = sd; this.si = si; this.nsInfo = nsInfo; this.topCurDir = sd.getCurrentDir(); this.namespaceDirs = topCurDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String file) { return file.startsWith(NameSpaceSliceStorage.NS_DIR_PREFIX); } }); this.setName("Upgrading " + sd.getRoot()); } /** check if any of the namespace directory has a snapshot */ private boolean isNamespaceUpgraded() { for (File namespaceDir : namespaceDirs) { if (new File(namespaceDir, STORAGE_DIR_PREVIOUS).exists()) { return true; } } return false; } public void run() { try { if (isNamespaceUpgraded()) { /// disallow coexistence of global and per namespace snapshots throw new IOException( "Local snapshot exists. Please either finalize or rollback first!"); } LOG.info("Upgrading storage directory " + sd.getRoot() + ".\n old LV = " + si.getLayoutVersion() + "; old CTime = " + si.getCTime() + ".\n new LV = " + nsInfo.getLayoutVersion() + "; new CTime = " + nsInfo.getCTime()); File curDir = sd.getCurrentDir(); File prevDir = sd.getPreviousDir(); // remove prev dir if it exists if (prevDir.exists()) { deleteDir(prevDir); } assert curDir.exists() : "Current directory must exist."; File tmpDir = sd.getPreviousTmp(); assert !tmpDir.exists() : "previous.tmp directory must not exist."; // rename current to tmp rename(curDir, tmpDir); // hardlink blocks upgrade(si.getLayoutVersion(), nsInfo.getLayoutVersion(), tmpDir, curDir); } catch (Throwable t) { error = t; } } private void upgrade(int oldLayoutVersion, int curLayoutVersion, File tmpDir, File curDir) throws IOException { HardLink hardLink = new HardLink(); if (oldLayoutVersion <= FSConstants.FEDERATION_VERSION) { // upgrade from a federation version to a newer federation version // link top directory linkBlocks(new File(tmpDir, STORAGE_DIR_FINALIZED), new File(curDir, STORAGE_DIR_FINALIZED), curLayoutVersion, hardLink, true); linkBlocks(new File(tmpDir, STORAGE_DIR_RBW), new File(curDir, STORAGE_DIR_RBW), curLayoutVersion, hardLink, true); // link all namespace directories for (File namespaceDir : namespaceDirs) { File tmpNamespaceCurDir = new File( new File(tmpDir, namespaceDir.getName()), STORAGE_DIR_CURRENT); File namespaceDirCur = new File(namespaceDir, STORAGE_DIR_CURRENT); linkBlocks(new File(tmpNamespaceCurDir, STORAGE_DIR_FINALIZED), new File(namespaceDirCur, STORAGE_DIR_FINALIZED), curLayoutVersion, hardLink, true); linkBlocks(new File(tmpNamespaceCurDir, STORAGE_DIR_RBW), new File(namespaceDirCur, STORAGE_DIR_RBW), curLayoutVersion, hardLink, true); //link Version file linkBlocks(new File(tmpNamespaceCurDir, STORAGE_FILE_VERSION), new File(namespaceDirCur, STORAGE_FILE_VERSION), curLayoutVersion, hardLink, true); } } else if (oldLayoutVersion <= FSConstants.RBW_LAYOUT_VERSION) { // upgrade from RBW layout version to Federation. // This is the directory data/current/NS-/ File curNsDir = NameSpaceSliceStorage.getNsRoot( nsInfo.getNamespaceID(), curDir); NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage( nsInfo.getNamespaceID(), nsInfo.getCTime()); nsStorage.format(curDir, nsInfo); // Move all blocks to this namespace directory // This is the directory data/current/NS-/current. File nsCurDir = new File(curNsDir, STORAGE_DIR_CURRENT); File curNsDirFinalized = new File(nsCurDir, STORAGE_DIR_FINALIZED); File curNsDirRbw = new File(nsCurDir, STORAGE_DIR_RBW); linkBlocks(new File(tmpDir, STORAGE_DIR_FINALIZED), curNsDirFinalized, curLayoutVersion, hardLink, false); linkBlocks(new File(tmpDir, STORAGE_DIR_RBW), curNsDirRbw, curLayoutVersion, hardLink, false); } else { // upgrade pre-rbw version to federation version // create the directory for the namespace File curNsDir = NameSpaceSliceStorage.getNsRoot( nsInfo.getNamespaceID(), curDir); NameSpaceSliceStorage nsStorage = new NameSpaceSliceStorage( nsInfo.getNamespaceID(), nsInfo.getCTime()); nsStorage.format(curDir, nsInfo); // Move all blocks to this namespace directory File nsCurDir = new File(curNsDir, STORAGE_DIR_CURRENT); // Move finalized blocks File nsDirFinalized = new File(nsCurDir, STORAGE_DIR_FINALIZED); linkBlocks(tmpDir, nsDirFinalized, curLayoutVersion, hardLink, true); // Move rbw blocks File nsDirRbw = new File(nsCurDir, STORAGE_DIR_RBW); File oldDirRbw = new File(tmpDir.getParentFile(), OLD_STORAGE_DIR_RBW); linkBlocks(oldDirRbw, nsDirRbw, curLayoutVersion, hardLink, true); } LOG.info("Completed upgrading storage directory " + sd.getRoot() + " " + hardLink.linkStats.report()); } } /** * Move current storage into a backup directory, * and hardlink all its blocks into the new current directory. */ private void doUpgrade(List<StorageDirectory> sds, List<StorageInfo> sdsInfo, final NamespaceInfo nsInfo ) throws IOException { assert sds.size() == sdsInfo.size(); UpgradeThread[] upgradeThreads = new UpgradeThread[sds.size()]; // start to upgrade for (int i=0; i<upgradeThreads.length; i++) { final StorageDirectory sd = sds.get(i); final StorageInfo si = sdsInfo.get(i); UpgradeThread thread = new UpgradeThread(sd, si, nsInfo); thread.start(); upgradeThreads[i] = thread; } // wait for upgrade to be done for (UpgradeThread thread : upgradeThreads) { try { thread.join(); } catch (InterruptedException e) { throw (InterruptedIOException)new InterruptedIOException().initCause(e); } } // check for errors for (UpgradeThread thread : upgradeThreads) { if (thread.error != null) throw new IOException(thread.error); } // write version file this.layoutVersion = FSConstants.LAYOUT_VERSION; assert this.namespaceID == nsInfo.getNamespaceID() : "Data-node and name-node layout versions must be the same."; this.cTime = nsInfo.getCTime(); for (StorageDirectory sd :sds) { sd.write(); File prevDir = sd.getPreviousDir(); File tmpDir = sd.getPreviousTmp(); // rename tmp to previous rename(tmpDir, prevDir); LOG.info("Upgrade of " + sd.getRoot()+ " is complete."); } } private void doRollback(NamespaceInfo nsInfo) throws IOException { int numDirs = getNumStorageDirs(); RollbackThread[] rollbackThreads = new RollbackThread[numDirs]; // start to rollback for (int i=0; i<numDirs; i++) { final StorageDirectory sd = this.getStorageDir(i); RollbackThread thread = new RollbackThread(sd, nsInfo, new DataStorage( datanode)); thread.start(); rollbackThreads[i] = thread; } // wait for rollback to be done for (RollbackThread thread : rollbackThreads) { try { thread.join(); } catch (InterruptedException e) { return; } } // check for errors for (RollbackThread thread : rollbackThreads) { if (thread.error != null) throw new IOException(thread.error); } } static class RollbackThread extends Thread { private StorageDirectory sd; private NamespaceInfo nsInfo; volatile Throwable error; private Storage prevInfo; RollbackThread(StorageDirectory sd, NamespaceInfo nsInfo, Storage prevInfo) { this.sd = sd; this.nsInfo = nsInfo; this.setName("Rolling back " + sd.getRoot()); this.prevInfo = prevInfo; } private boolean canRollBack(boolean globalRollback) { if (globalRollback) { return (prevInfo.getLayoutVersion() >= FSConstants.LAYOUT_VERSION); } else { return ((prevInfo.getLayoutVersion() >= FSConstants.LAYOUT_VERSION || prevInfo.getCTime() <= nsInfo.getCTime())); } } public void run() { try { File prevDir = sd.getPreviousDir(); // regular startup if previous dir does not exist if (!prevDir.exists()) { return; } StorageDirectory prevSD = prevInfo.new StorageDirectory(sd.getRoot()); prevSD.read(prevSD.getPreviousVersionFile()); boolean globalRollback = prevInfo instanceof DataStorage; if (!canRollBack(globalRollback)) throw new InconsistentFSStateException(prevSD.getRoot(), "Cannot rollback to a newer state.\nDatanode previous state: LV = " + prevInfo.getLayoutVersion() + " CTime = " + prevInfo.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); LOG.info("Rolling back storage directory " + sd.getRoot() + ".\n target LV = " + nsInfo.getLayoutVersion() + "; target CTime = " + nsInfo.getCTime()); File tmpDir = sd.getRemovedTmp(); assert !tmpDir.exists() : "removed.tmp directory must not exist."; // rename current to tmp File curDir = sd.getCurrentDir(); assert curDir.exists() : "Current directory must exist."; rename(curDir, tmpDir); // rename previous to current rename(prevDir, curDir); // delete tmp dir deleteDir(tmpDir); LOG.info("Rollback of " + sd.getRoot() + " is complete."); } catch (Throwable t) { error = t; } } } void doFinalize(StorageDirectory sd) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) return; // already discarded final String dataDirPath = sd.getRoot().getCanonicalPath(); LOG.info("Finalizing upgrade for storage directory " + dataDirPath + ".\n cur LV = " + this.getLayoutVersion() + "; cur CTime = " + this.getCTime()); assert sd.getCurrentDir().exists() : "Current directory must exist."; final File tmpDir = sd.getFinalizedTmp(); // rename previous to tmp rename(prevDir, tmpDir); // delete tmp dir in a separate thread new Daemon(new Runnable() { public void run() { try { deleteDir(tmpDir); } catch(IOException ex) { LOG.error("Finalize upgrade for " + dataDirPath + " failed.", ex); } LOG.info("Finalize upgrade for " + dataDirPath + " is complete."); } public String toString() { return "Finalize " + dataDirPath; } }).start(); } void finalizeUpgrade() throws IOException { for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) { doFinalize(it.next()); } } void finalizedUpgrade(int namespaceId) throws IOException { // To handle finalizing a snapshot taken at datanode level while // upgrading to federation, if datanode level snapshot previous exists, // then finalize it. Else finalize the corresponding BP. for (StorageDirectory sd : storageDirs) { File prevDir = sd.getPreviousDir(); File curDir = sd.getCurrentDir(); NameSpaceSliceStorage nsStorage = nsStorageMap.get(namespaceId); File nsRoot = nsStorage.getNsRoot(namespaceId, curDir); StorageDirectory nsSd = new StorageDirectory(nsRoot); if (prevDir.exists() && nsSd.getPreviousDir().exists()) { throw new IOException("Top level and NS level previous directories" + " cannot co-exist"); } if (prevDir.exists()) { // data node level storage finalize doFinalize(sd); } else { // Name Space storage finalize using specific namespaceId nsStorage.doFinalize(curDir); } } } static void linkBlocks(File from, File to, int oldLV, HardLink hl, boolean createTo) throws IOException { if (!from.exists()) { LOG.warn(from + " does not exist"); return; } if (!from.isDirectory()) { if (from.getName().startsWith(COPY_FILE_PREFIX) || from.getName().equals(Storage.STORAGE_FILE_VERSION)) { FileInputStream in = new FileInputStream(from); FileOutputStream out = new FileOutputStream(to); try { IOUtils.copyBytes(in, out, 16*1024, true); hl.linkStats.countPhysicalFileCopies++; } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } else { //check if we are upgrading from pre-generation stamp version. if (oldLV >= PRE_GENERATIONSTAMP_LAYOUT_VERSION) { // Link to the new file name. to = new File(convertMetatadataFileName(to.getAbsolutePath())); } HardLink.createHardLink(from, to); hl.linkStats.countSingleLinks++; } return; } // from is a directory hl.linkStats.countDirs++; if (createTo && !to.exists() && !to.mkdirs()) throw new IOException("Cannot create directory " + to); //If upgrading from old stuff, need to munge the filenames. That has to //be done one file at a time, so hardlink them one at a time (slow). if (oldLV >= PRE_GENERATIONSTAMP_LAYOUT_VERSION) { String[] blockNames = from.list(new java.io.FilenameFilter() { public boolean accept(File dir, String name) { return name.startsWith(BLOCK_SUBDIR_PREFIX) || name.startsWith(BLOCK_FILE_PREFIX) || name.startsWith(COPY_FILE_PREFIX); } }); if (blockNames.length == 0) { hl.linkStats.countEmptyDirs++; } else { for(int i = 0; i < blockNames.length; i++) linkBlocks(new File(from, blockNames[i]), new File(to, blockNames[i]), oldLV, hl, true); } } else { //If upgrading from a relatively new version, we only need to create //links with the same filename. This can be done in bulk (much faster). String[] blockNames = from.list(new java.io.FilenameFilter() { public boolean accept(File dir, String name) { return name.startsWith(BLOCK_FILE_PREFIX); } }); if (blockNames.length > 0) { HardLink.createHardLinkMult(from, blockNames, to); hl.linkStats.countMultLinks++; hl.linkStats.countFilesMultLinks += blockNames.length; } else { hl.linkStats.countEmptyDirs++; } //now take care of the rest of the files and subdirectories String[] otherNames = from.list(new java.io.FilenameFilter() { public boolean accept(File dir, String name) { return name.startsWith(BLOCK_SUBDIR_PREFIX) || name.startsWith(COPY_FILE_PREFIX); } }); for(int i = 0; i < otherNames.length; i++) linkBlocks(new File(from, otherNames[i]), new File(to, otherNames[i]), oldLV, hl, true); } } protected void corruptPreUpgradeStorage(File rootDir) throws IOException { File oldF = new File(rootDir, "storage"); if (oldF.exists()) return; // recreate old storage file to let pre-upgrade versions fail if (!oldF.createNewFile()) throw new IOException("Cannot create file " + oldF); RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); // write new version into old storage file try { writeCorruptedData(oldFile); } finally { oldFile.close(); } } private void verifyDistributedUpgradeProgress( NamespaceInfo nsInfo ) throws IOException { UpgradeManagerDatanode um = datanode.getUpgradeManager(nsInfo .getNamespaceID()); assert um != null : "DataNode.upgradeManager is null."; um.setUpgradeState(false, getLayoutVersion()); um.initializeUpgrade(nsInfo); } private static final Pattern PRE_GENSTAMP_META_FILE_PATTERN = Pattern.compile("(.*blk_[-]*\\d+)\\.meta$"); /** * This is invoked on target file names when upgrading from pre generation * stamp version (version -13) to correct the metatadata file name. * @param oldFileName * @return the new metadata file name with the default generation stamp. */ private static String convertMetatadataFileName(String oldFileName) { Matcher matcher = PRE_GENSTAMP_META_FILE_PATTERN.matcher(oldFileName); if (matcher.matches()) { //return the current metadata file name return BlockWithChecksumFileWriter.getMetaFileName(matcher.group(1), Block.GRANDFATHER_GENERATION_STAMP); } return oldFileName; } /** * Add nsStorage into nsStorageMap */ private void addNameSpaceStorage(int nsID, NameSpaceSliceStorage nsStorage) throws IOException { if (!this.nsStorageMap.containsKey(nsID)) { this.nsStorageMap.put(nsID, nsStorage); } } synchronized void removeNamespaceStorage(int nsId) { nsStorageMap.remove(nsId); } /** * Get the data directory name that stores the namespace's blocks * @param namespaceId namespace id * @return the name of the last component of * the given namespace's data directory */ String getNameSpaceDataDir(int namespaceId) { return NameSpaceSliceStorage.getNamespaceDataDirName(namespaceId); } }