/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyDefault; import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicy.NotEnoughReplicasException; import org.apache.hadoop.hdfs.util.InjectionEvent; import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.apache.hadoop.raid.DirectoryStripeReader.BlockInfo; import org.apache.hadoop.raid.RaidNode; import org.apache.hadoop.raid.Codec; import org.apache.hadoop.util.HostsFileReader; import org.apache.hadoop.util.InjectionHandler; import org.apache.hadoop.util.StringUtils; /** * This BlockPlacementPolicy uses a simple heuristic, random placement of * the replicas of a newly-created block, for the purpose of spreading out the * group of blocks which used by RAID for recovering each other. * This is important for the availability of the blocks. * * Replication of an existing block continues to use the default placement * policy. * * This simple block placement policy does not guarantee that * blocks on the RAID stripe are on different nodes. However, BlockMonitor * will periodically scans the raided files and will fix the placement * if it detects violation. * * This class can be used by multiple threads. It has to be thread safe. */ public class BlockPlacementPolicyRaid extends BlockPlacementPolicyDefault { public static final Log LOG = LogFactory.getLog(BlockPlacementPolicyRaid.class); Configuration conf; private FSNamesystem namesystem = null; private CachedLocatedBlocks cachedLocatedBlocks; private CachedFullPathNames cachedFullPathNames; private long minFileSize = RaidNode.MINIMUM_RAIDABLE_FILESIZE; /** {@inheritDoc} */ @Override public void initialize(Configuration conf, FSClusterStats stats, NetworkTopology clusterMap, HostsFileReader hostsReader, DNSToSwitchMapping dnsToSwitchMapping, FSNamesystem namesystem) { super.initialize(conf, stats, clusterMap, hostsReader, dnsToSwitchMapping, namesystem); this.conf = conf; this.minFileSize = conf.getLong(RaidNode.MINIMUM_RAIDABLE_FILESIZE_KEY, RaidNode.MINIMUM_RAIDABLE_FILESIZE); this.namesystem = namesystem; this.cachedLocatedBlocks = new CachedLocatedBlocks(conf); this.cachedFullPathNames = new CachedFullPathNames(conf); } @Override public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas, DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes, long blocksize) { return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, null, blocksize); } @Override protected void place3rdReplicaForInClusterWriter( HashMap<Node, Node> excludedNodes, long blocksize, int maxNodesPerRack,List<DatanodeDescriptor> results ) throws NotEnoughReplicasException { if (results.size() > 2) { return; } HashSet<String> excludedRacks = new HashSet<String>(); for (DatanodeDescriptor node : results) { String rack = node.getNetworkLocation(); excludedRacks.add(rack); } do { String remoteRack = clusterMap.chooseRack(excludedRacks); if (remoteRack == null) { // no more remote rack available // choose a node on the rack where the first replica is located chooseLocalRack( results.get(0), excludedNodes, blocksize, maxNodesPerRack, results); return; } // a remote rack is chosen try { excludedRacks.add(remoteRack); chooseRandom(1, remoteRack, excludedNodes, blocksize, maxNodesPerRack, results); return; } catch (NotEnoughReplicasException ne) { // try again until all remote tracks are exhausted } } while (true); } @Override public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas, DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes, List<Node> exlcNodes, long blocksize) { try { FileInfo info = getFileInfo(null, srcPath); if (LOG.isDebugEnabled()) { LOG.debug("FileType:" + srcPath + " " + info.type.name()); } if (info.type == FileType.NOT_RAID) { return super.chooseTarget( srcPath, numOfReplicas, writer, chosenNodes, exlcNodes, blocksize); } ArrayList<DatanodeDescriptor> results = new ArrayList<DatanodeDescriptor>(); HashMap<Node, Node> excludedNodes = new HashMap<Node, Node>(); if (exlcNodes != null) { for (Node node: exlcNodes) { excludedNodes.put(node, node); } } for (Node node:chosenNodes) { excludedNodes.put(node, node); } chooseRandom(numOfReplicas, Path.SEPARATOR, excludedNodes, blocksize, 1, results); return results.toArray(new DatanodeDescriptor[results.size()]); } catch (Exception e) { FSNamesystem.LOG.debug( "Error happend when choosing datanode to write:" + StringUtils.stringifyException(e)); return super.chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, blocksize); } } /** {@inheritDoc} */ @Override public DatanodeDescriptor chooseReplicaToDelete(FSInodeInfo inode, Block block, short replicationFactor, Collection<DatanodeDescriptor> first, Collection<DatanodeDescriptor> second) { DatanodeDescriptor chosenNode = null; try { String path = getFullPathName(inode); FileInfo info = getFileInfo(inode, path); if (info.type == FileType.NOT_RAID) { return super.chooseReplicaToDelete( inode, block, replicationFactor, first, second); } List<LocatedBlock> companionBlocks = getCompanionBlocks(path, info, block, inode); if (companionBlocks == null || companionBlocks.size() == 0) { // Use the default method if it is not a valid raided or parity file return super.chooseReplicaToDelete( inode, block, replicationFactor, first, second); } // Delete from the first collection first // This ensures the number of unique rack of this block is not reduced Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>(); all.addAll(first); all.addAll(second); chosenNode = chooseReplicaToDelete(companionBlocks, all); if (chosenNode != null) { return chosenNode; } return super.chooseReplicaToDelete( inode, block, replicationFactor, first, second); } catch (Exception e) { LOG.debug("Failed to choose the correct replica to delete", e); return super.chooseReplicaToDelete( inode, block, replicationFactor, first, second); } } private DatanodeDescriptor chooseReplicaToDelete( Collection<LocatedBlock> companionBlocks, Collection<DatanodeDescriptor> dataNodes) throws IOException { if (dataNodes.isEmpty()) { return null; } // Count the number of replicas on each node and rack final Map<String, Integer>[] companionBlockCounts = countCompanionBlocks(companionBlocks); final Map<String, Integer> nodeCompanionBlockCount = companionBlockCounts[0]; final Map<String, Integer> rackCompanionBlockCount = companionBlockCounts[1]; NodeComparator comparator = new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount); return Collections.max(dataNodes, comparator); } /** * Count how many companion blocks are on each datanode or the each rack * @param companionBlocks a collection of all the companion blocks * @param result the map from node name to the number of companion blocks * [0] for datanodes [1] for racks */ @SuppressWarnings("unchecked") static Map<String, Integer>[] countCompanionBlocks( Collection<LocatedBlock> companionBlocks) { Map<String, Integer>[] result = new HashMap[2]; result[0] = new HashMap<String, Integer>(); result[1] = new HashMap<String, Integer>(); for (LocatedBlock block : companionBlocks) { for (DatanodeInfo d : block.getLocations()) { // count the companion blocks on the datanodes String name = d.getName(); Integer currentCount = result[0].get(name); result[0].put(name, currentCount == null ? 1 : currentCount + 1); // count the companion blocks on the racks of datanodes name = d.getParent().getName(); currentCount = result[1].get(name); result[1].put(name, currentCount == null ? 1 : currentCount + 1); } } return result; } /** * Compares the datanodes based on the number of companion blocks on the same * node and rack. If even, compare the remaining space on the datanodes. */ class NodeComparator implements Comparator<DatanodeDescriptor> { private Map<String, Integer> nodeBlockCount; private Map<String, Integer> rackBlockCount; private NodeComparator(Map<String, Integer> nodeBlockCount, Map<String, Integer> rackBlockCount) { this.nodeBlockCount = nodeBlockCount; this.rackBlockCount = rackBlockCount; } @Override public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) { int res = compareBlockCount(d1, d2, nodeBlockCount); if (res != 0) { return res; } res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount); if (res != 0) { return res; } if (d1.getRemaining() > d2.getRemaining()) { return -1; } if (d1.getRemaining() < d2.getRemaining()) { return 1; } return 0; } private int compareBlockCount(Node node1, Node node2, Map<String, Integer> blockCount) { Integer count1 = blockCount.get(node1.getName()); Integer count2 = blockCount.get(node2.getName()); count1 = count1 == null ? 0 : count1; count2 = count2 == null ? 0 : count2; if (count1 > count2) { return 1; } if (count1 < count2) { return -1; } return 0; } } /** * Obtain the companion blocks of the give block * Companion blocks are defined as the blocks that can help recover each * others by using raid decoder. * @param path The path of the file contains the block * @param info The info of this file * @param block The given block * null if it is the block which is currently being written to * @param inode the inode of the path file * @return the block locations of companion blocks */ List<LocatedBlock> getCompanionBlocks(String path, FileInfo info, Block block, FSInodeInfo inode) throws IOException { Codec codec = info.codec; switch (info.type) { case NOT_RAID: return Collections.emptyList(); case HAR_TEMP_PARITY: return getCompanionBlocksForHarParityBlock( path, codec.parityLength, block, inode); case TEMP_PARITY: NameWithINode ni = getSourceFile(path, codec.tmpParityDirectory); return getCompanionBlocksForParityBlock( ni.name, path, codec.parityLength, codec.stripeLength, block, codec.isDirRaid, ni.inode, inode); case PARITY: ni = getSourceFile(path, codec.parityDirectory); return getCompanionBlocksForParityBlock( ni.name, path, codec.parityLength, codec.stripeLength, block, codec.isDirRaid, ni.inode, inode); case SOURCE: return getCompanionBlocksForSourceBlock( path, info.parityName, codec.parityLength, codec.stripeLength, block, codec.isDirRaid, inode, info.parityInode); } return Collections.emptyList(); } private List<LocatedBlock> getCompanionBlocksForHarParityBlock( String parity, int parityLength, Block block, FSInodeInfo inode) throws IOException { int blockIndex = getBlockIndex(parity, block, inode, true); List<LocatedBlock> parityBlocks = getLocatedBlocks(parity, inode); // consider only parity file in this case because source file block // location is not easy to obtain List<LocatedBlock> result = new ArrayList<LocatedBlock>(); int start = Math.max(0, blockIndex - parityLength + 1); int end = Math.min(parityBlocks.size(), blockIndex + parityLength); result.addAll(parityBlocks.subList(start, end)); return result; } private void addCompanionParityBlocks(String parity, INodeFile pinode, int stripeIndex, int parityLength, List<LocatedBlock> blocks) throws IOException { if (pinode == null) return; long parityStartOffset = stripeIndex * parityLength * pinode.getPreferredBlockSize(); long parityFileSize = namesystem.dir.getFileSize(pinode); // for parity, always consider the neighbor blocks as companion blocks if (parityStartOffset < parityFileSize) { blocks.addAll(getLocatedBlocks(pinode, parityStartOffset, parityLength * pinode.getPreferredBlockSize())); } } String getFullPathName(FSInodeInfo inode) throws IOException { String path = cachedFullPathNames.get(inode); if (path != null) { InjectionHandler .processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_PATH); return path; } byte[][] names = null; namesystem.readLock(); try { names = FSDirectory.getINodeByteArray((INode)inode); } finally { namesystem.readUnlock(); } path = FSDirectory.getFullPathName(names); cachedFullPathNames.put(inode, path); return path; } List<LocatedBlock> getLocatedBlocks(String file, FSInodeInfo f) throws IOException { List<LocatedBlock> blocks = cachedLocatedBlocks.get(file); if (blocks != null) { InjectionHandler .processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_BLOCKS); return blocks; } // otherwise populate cache INodeFile inode = (INodeFile) f; // Note that the list is generated. It is not the internal data of inode. List<LocatedBlock> result = inode == null ? new ArrayList<LocatedBlock>() : namesystem.getBlockLocationsInternal(inode, 0, Long.MAX_VALUE, Integer.MAX_VALUE).getLocatedBlocks(); if (result == null) { result = Collections.emptyList(); } else { result = Collections.unmodifiableList(result); } cachedLocatedBlocks.put(file, result); return result; } public List<LocatedBlock> getLocatedBlocks(INodeFile inode, long offset, long length) throws IOException { // Note that the list is generated. It is not the internal data of inode. List<LocatedBlock> result = inode == null ? new ArrayList<LocatedBlock>() : namesystem.getBlockLocationsInternal(inode, offset, length, Integer.MAX_VALUE).getLocatedBlocks(); if (result == null) { return Collections.emptyList(); } return Collections.unmodifiableList(result); } private List<LocatedBlock> getCompanionBlocksForParityBlock( String src, String parity, int parityLength, int stripeLength, Block block, boolean isDirRaid, FSInodeInfo srcinode, FSInodeInfo pinode) throws IOException { int blockIndex = getBlockIndex(parity, block, pinode, false); int stripeIndex = blockIndex / parityLength; List<LocatedBlock> result = new ArrayList<LocatedBlock>(); addCompanionParityBlocks(parity, (INodeFile)pinode, stripeIndex, parityLength, result); if (src == null) { return result; } // get the source blocks. List<LocatedBlock> sourceBlocks; int sourceStart = stripeIndex * stripeLength; int sourceEnd = sourceStart + stripeLength; if (!isDirRaid) { sourceBlocks = getLocatedBlocks(src, srcinode); } else { sourceBlocks = new ArrayList<LocatedBlock>(); INode inode = (INode) srcinode; INodeDirectory srcNode; if (inode.isDirectory()) { srcNode = (INodeDirectory) inode; } else { throw new IOException( "The source should be a directory in Dir-Raiding: " + src); } boolean found = false; String srcPath = src + Path.SEPARATOR; // look for the stripe namesystem.readLock(); namesystem.dir.readLock(); try { for (INode child : srcNode.getChildren()) { if (child.isDirectory()) { throw new IOException("The source is not a leaf directory: " + src + ", contains a subdirectory: " + child.getLocalName()); } INodeFile childInode = (INodeFile)child; long fileSize = namesystem.dir.getFileSize(childInode); // check if we will do dir-raid on this file if (fileSize < minFileSize) { continue; } int numBlocks = childInode.getBlocks().length; if (numBlocks < sourceStart && !found) { sourceStart -= numBlocks; sourceEnd -= numBlocks; continue; } else { String childName = srcPath + child.getLocalName(); List<LocatedBlock> childBlocks = getLocatedBlocks(childName, child); found = true; sourceBlocks.addAll(childBlocks); if (sourceEnd <= sourceBlocks.size()) { break; } } } } finally { namesystem.dir.readUnlock(); namesystem.readUnlock(); } } sourceEnd = Math.min(sourceEnd, sourceBlocks.size()); if (sourceStart < sourceBlocks.size()) { result.addAll(sourceBlocks.subList(sourceStart, sourceEnd)); } return result; } private List<LocatedBlock> getCompanionBlocksForSourceBlock( String src, String parity, int parityLength, int stripeLength, Block block, boolean isDirRaid, FSInodeInfo inode, FSInodeInfo parityInode) throws IOException { List<LocatedBlock> result = new ArrayList<LocatedBlock>(); List<LocatedBlock> sourceBlocks = null; int blockIndex = getBlockIndex(src, block, inode, true); int stripeIndex = 0; int sourceStart = 0; int sourceEnd = 0; if (!isDirRaid) { sourceBlocks = getLocatedBlocks(src, inode); stripeIndex = blockIndex / stripeLength; sourceStart = stripeIndex * stripeLength; sourceEnd = Math.min(sourceStart + stripeLength, sourceBlocks.size()); } else { // cache the candidate blocks. BlockInfo[] tmpStripe = new BlockInfo[stripeLength]; for (int i = 0; i < stripeLength; i++) { tmpStripe[i] = new BlockInfo(0, 0); } int curIdx = 0; boolean found = false; sourceBlocks = new ArrayList<LocatedBlock>(); byte[][] components = INodeDirectory.getPathComponents(src); INodeDirectory srcNode = namesystem.dir.getINode(components).getParent(); String parentPath = getParentPath(src); if (!parentPath.endsWith(Path.SEPARATOR)) { parentPath += Path.SEPARATOR; } namesystem.readLock(); namesystem.dir.readLock(); try { List<INode> children = srcNode.getChildren(); // look for the stripe for (int fid = 0; fid < children.size(); fid++) { INode child = children.get(fid); if (child.isDirectory()) { throw new IOException("The raided-directory is not a leaf directory: " + parentPath + ", contains a subdirectory: " + child.getLocalName()); } INodeFile childInode = (INodeFile)child; long fileSize = namesystem.dir.getFileSize(childInode); // check if we will do dir-raid on this file if (fileSize < minFileSize) { continue; } String childName = parentPath + child.getLocalName(); if (found) { if (sourceEnd <= sourceBlocks.size()) { break; } List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode); sourceBlocks.addAll(childBlocks); } else { int childBlockSize = childInode.getBlocks().length; /** * If we find the target file, we will addAll the * cached blocks and the child blocks. * And update the metrics like stripeIndex, sourceStart and sourceEnd. * */ if (childName.equals(src)) { found = true; List<LocatedBlock> prevChildBlocks = null; for (int i=0; i<curIdx; i++) { if (i == 0 || tmpStripe[i].fileIdx != tmpStripe[i - 1].fileIdx) { INode prevChildInode = children.get(tmpStripe[i].fileIdx); String prevChildName = parentPath + prevChildInode.getLocalName(); prevChildBlocks = getLocatedBlocks(prevChildName, prevChildInode); } sourceBlocks.add(prevChildBlocks.get(tmpStripe[i].blockId)); } List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode); sourceBlocks.addAll(childBlocks); blockIndex += curIdx; stripeIndex += blockIndex / stripeLength; sourceStart = (blockIndex / stripeLength) * stripeLength; sourceEnd = sourceStart + stripeLength; } else { /** * If not find the target file, we will keep the current stripe * in the temp stripe cache. */ /** * the childBlockSize is small, and we can fill them into * current temp stripe cache. */ if (curIdx + childBlockSize < stripeLength) { for (int i=0; i<childBlockSize; i++, curIdx++) { tmpStripe[curIdx].fileIdx = fid; tmpStripe[curIdx].blockId = i; } } else { /** * The childBlockSize is not small, We need to calculate * the place in the stripe cache, and copy the current stripe * into the temp stripe cache. */ stripeIndex += (curIdx + childBlockSize) / stripeLength; int childStart = ((curIdx + childBlockSize) / stripeLength) * stripeLength - curIdx; curIdx = 0; for (; childStart<childBlockSize; childStart++,curIdx++) { tmpStripe[curIdx].fileIdx = fid; tmpStripe[curIdx].blockId = childStart; } curIdx %= stripeLength; } } } } } finally { namesystem.dir.readUnlock(); namesystem.readUnlock(); } sourceEnd = Math.min(sourceEnd, sourceBlocks.size()); } if (sourceStart < sourceBlocks.size()) { for (int i = sourceStart; i < sourceEnd; i++) { result.add(sourceBlocks.get(i)); } } if (parity == null) { return result; } // add the parity blocks. addCompanionParityBlocks(parity, (INodeFile)parityInode, stripeIndex, parityLength, result); return result; } private int getBlockIndex(String file, Block block, FSInodeInfo inode, boolean cacheResult) throws IOException { if (cacheResult) { List<LocatedBlock> blocks = getLocatedBlocks(file, inode); // null indicates that this block is currently added. Return size() // as the index in this case if (block == null) { return blocks.size(); } for (int i = 0; i < blocks.size(); i++) { if (blocks.get(i).getBlock().equals(block)) { return i; } } throw new IOException("Cannot locate " + block + " in file " + file); } else { return namesystem.dir.getBlockIndex((INodeFile)inode, block, file); } } /** * Cache results for FSInodeInfo.getFullPathName() */ static class CachedFullPathNames { private Cache<INodeWithHashCode, String> cacheInternal; CachedFullPathNames(final Configuration conf) { this.cacheInternal = new Cache<INodeWithHashCode, String>(conf); } private static class INodeWithHashCode { FSInodeInfo inode; INodeWithHashCode(FSInodeInfo inode) { this.inode = inode; } @Override public boolean equals(Object obj) { if (!(obj instanceof INodeWithHashCode)) return false; return inode == ((INodeWithHashCode)obj).inode; } @Override public int hashCode() { return System.identityHashCode(inode); } } public String get(FSInodeInfo inode) throws IOException { return cacheInternal.get(new INodeWithHashCode(inode)); } public void put(FSInodeInfo inode, String path) { cacheInternal.put(new INodeWithHashCode(inode), path); } } /** * Cache results for FSNamesystem.getBlockLocations() */ static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> { CachedLocatedBlocks(Configuration conf) { super(conf); } } /** * Generic caching class */ private static class Cache<K, V> { private Map<K, ValueWithTime> cache; final private long cacheTimeout; final private int maxEntries; // The timeout is long but the consequence of stale value is not serious Cache(Configuration conf) { this.cacheTimeout = conf.getLong("raid.blockplacement.cache.timeout", 5000L); // 5 seconds this.maxEntries = conf.getInt("raid.blockplacement.cache.size", 1000); // 1000 entries Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>( 2 * maxEntries, 0.75f, true) { private static final long serialVersionUID = 1L; @Override protected boolean removeEldestEntry( Map.Entry<K, ValueWithTime> eldest) { return size() > maxEntries; } }; this.cache = Collections.synchronizedMap(map); } public V get(K key) throws IOException { // The method is not synchronized so we may get some stale value here but // it's OK. ValueWithTime result = cache.get(key); long now = System.currentTimeMillis(); if (result != null && now - result.cachedTime < cacheTimeout) { return result.value; } return null; } public void put(K key, V value) { ValueWithTime v = new ValueWithTime(); v.value = value; v.cachedTime = System.currentTimeMillis(); cache.put(key, v); } private class ValueWithTime { V value = null; long cachedTime = 0L; } } /** * Get path for the corresponding source file for a valid parity * file. Returns null if it does not exists * @param parity the toUri path of the parity file * @return the toUri path of the source file */ NameWithINode getSourceFile(String parity, String prefix) throws IOException { if (isHarFile(parity)) { return null; } // remove the prefix String src = parity.substring(prefix.length()); byte[][] components = INodeDirectory.getPathComponents(src); INode inode = namesystem.dir.getINode(components); return new NameWithINode(src, inode); } class NameWithINode { String name; INode inode; public NameWithINode(String name, INode inode) { this.name = name; this.inode = inode; } } /** * Get path for the parity file. Returns null if it does not exists * @param codec the codec of the parity file. * @return the toUri path of the parity file */ private NameWithINode getParityFile(Codec codec, String src) throws IOException { String parity; if (codec.isDirRaid) { String parent = getParentPath(src); parity = codec.parityDirectory + parent; } else { parity = codec.parityDirectory + src; } byte[][] components = INodeDirectory.getPathComponents(parity); INode parityInode = namesystem.dir.getINode(components); if (parityInode == null) return null; return new NameWithINode(parity, parityInode); } static String getParentPath(String src) { int precision = 1; if (src.length() > 1 && src.endsWith(Path.SEPARATOR)) { precision = 2; } src = src.substring(0, src.lastIndexOf(Path.SEPARATOR, src.length() - precision)); if (src.isEmpty()) src = Path.SEPARATOR; return src; } private boolean isHarFile(String path) { return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1; } class FileInfo { FileInfo(FileType type, Codec codec) { this.type = type; this.codec = codec; } FileInfo(FileType type, Codec codec, String parityName, INode parityInode) throws IOException { if (type != FileType.SOURCE) { throw new IOException("FileType must be source"); } this.type = type; this.codec = codec; this.parityInode = parityInode; this.parityName = parityName; } final FileType type; final Codec codec; INode parityInode = null; String parityName = null; } enum FileType { NOT_RAID, HAR_TEMP_PARITY, TEMP_PARITY, PARITY, SOURCE, } /** * Return raid information about a file, for example * if this file is the source file, parity file, or not raid * * @param path file name * @return raid information * @throws IOException */ protected FileInfo getFileInfo(FSInodeInfo srcINode, String path) throws IOException { for (Codec c : Codec.getCodecs()) { if (path.startsWith(c.tmpHarDirectoryPS)) { return new FileInfo(FileType.HAR_TEMP_PARITY, c); } if (path.startsWith(c.tmpParityDirectoryPS)) { return new FileInfo(FileType.TEMP_PARITY, c); } if (path.startsWith(c.parityDirectoryPS)) { return new FileInfo(FileType.PARITY, c); } NameWithINode ni = getParityFile(c, path); if (ni != null) { if (c.isDirRaid && srcINode != null && srcINode instanceof INodeFile) { INodeFile inf = (INodeFile)srcINode; if (inf.getFileSize() < this.minFileSize) { // It's too small to be raided return new FileInfo(FileType.NOT_RAID, null); } } return new FileInfo(FileType.SOURCE, c, ni.name, ni.inode); } } return new FileInfo(FileType.NOT_RAID, null); } }