/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.namenode.INodeRaidStorage.RaidBlockInfo; import org.apache.hadoop.hdfs.util.GSet; import org.apache.hadoop.hdfs.util.LightWeightGSet; /** * This class maintains the map from a block to its metadata. * block's metadata currently includes INode it belongs to and * the datanodes that store the block. */ public class BlocksMap { /** * Internal class for block metadata. */ static public class BlockInfo extends Block implements LightWeightGSet.LinkedElement { public static final int NO_BLOCK_CHECKSUM = 0; private INodeFile inode; /** For implementing {@link LightWeightGSet.LinkedElement} interface */ private LightWeightGSet.LinkedElement nextLinkedElement; /** * This array contains triplets of references. * For each i-th data-node the block belongs to * triplets[3*i] is the reference to the DatanodeDescriptor * and triplets[3*i+1] and triplets[3*i+2] are references * to the previous and the next blocks, respectively, in the * list of blocks belonging to this data-node. */ private Object[] triplets; private int checksum; BlockInfo(Block blk, int replication) { super(blk); triplets = new Object[3*replication]; inode = null; checksum = NO_BLOCK_CHECKSUM; } public BlockInfo() { super(); } public void setChecksum(int checksum) { this.checksum = checksum; } public int getChecksum() { return checksum; } public void setReplication(int replication) { triplets = new Object[3*replication]; } INodeFile getINode() { return inode; } public void setINode(INodeFile inode) { this.inode = inode; } DatanodeDescriptor getDatanode(int index) { DatanodeDescriptor node = (DatanodeDescriptor)triplets[index*3]; return node; } BlockInfo getPrevious(int index) { BlockInfo info = (BlockInfo)triplets[index*3+1]; return info; } BlockInfo getNext(int index) { BlockInfo info = (BlockInfo)triplets[index*3+2]; return info; } void setDatanode(int index, DatanodeDescriptor node) { triplets[index*3] = node; } void setPrevious(int index, BlockInfo to) { triplets[index*3+1] = to; } void setNext(int index, BlockInfo to) { triplets[index*3+2] = to; } BlockInfo getSetPrevious(int index, BlockInfo to) { BlockInfo info = (BlockInfo)triplets[index*3+1]; triplets[index*3+1] = to; return info; } BlockInfo getSetNext(int index, BlockInfo to) { BlockInfo info = (BlockInfo)triplets[index*3+2]; triplets[index*3+2] = to; return info; } private int getCapacity() { assert this.triplets != null : "BlockInfo is not initialized"; assert triplets.length % 3 == 0 : "Malformed BlockInfo"; return triplets.length / 3; } /** * Please be cautious when trying to implement this function, it doesn't write checksum whereas * checksum is written separately in FSEditsLog and FSImage */ @Override public void write(DataOutput out) throws IOException { super.write(out); } /** * Please be cautious when trying to implement this function, it doesn't write checksum whereas * checksum is read separately in FSEditsLog and FSImage */ @Override public void readFields(DataInput in) throws IOException { super.readFields(in); } /** * Ensure that there is enough space to include num more triplets. * * @return first free triplet index. */ private int ensureCapacity(int num) { assert this.triplets != null : "BlockInfo is not initialized"; int last = numNodes(); if(triplets.length >= (last+num)*3) return last; /* Not enough space left. Create a new array. Should normally * happen only when replication is manually increased by the user. */ Object[] old = triplets; triplets = new Object[(last+num)*3]; for(int i=0; i < last*3; i++) { triplets[i] = old[i]; } return last; } /** * Count the number of data-nodes the block belongs to. */ int numNodes() { assert this.triplets != null : "BlockInfo is not initialized"; assert triplets.length % 3 == 0 : "Malformed BlockInfo"; for(int idx = getCapacity()-1; idx >= 0; idx--) { if(getDatanode(idx) != null) return idx+1; } return 0; } /** * Add data-node this block belongs to. */ int addNode(DatanodeDescriptor node) { int lastNode = findDatanode(node); if(lastNode >= 0) // the node is already there return -1; // find the last null node lastNode = ensureCapacity(1); setDatanode(lastNode, node); setNext(lastNode, null); setPrevious(lastNode, null); return lastNode; } /** * Remove data-node from the block. */ boolean removeNode(DatanodeDescriptor node) { int dnIndex = findDatanode(node); if(dnIndex < 0) // the node is not found return false; assert getPrevious(dnIndex) == null && getNext(dnIndex) == null : "Block is still in the list and must be removed first."; // find the last not null node int lastNode = numNodes()-1; // replace current node triplet by the lastNode one setDatanode(dnIndex, getDatanode(lastNode)); setNext(dnIndex, getNext(lastNode)); setPrevious(dnIndex, getPrevious(lastNode)); // set the last triplet to null setDatanode(lastNode, null); setNext(lastNode, null); setPrevious(lastNode, null); return true; } /** * Find specified DatanodeDescriptor. * @param dn * @return index or -1 if not found. */ int findDatanode(DatanodeDescriptor dn) { int len = getCapacity(); for(int idx = 0; idx < len; idx++) { DatanodeDescriptor cur = getDatanode(idx); if(cur == dn) return idx; if(cur == null) break; } return -1; } /** * Insert this block into the head of the list of blocks * related to the specified DatanodeDescriptor. * If the head is null then form a new list. * @return current block as the new head of the list. */ BlockInfo listInsert(BlockInfo head, DatanodeDescriptor dn, int dnIndex) { if(dnIndex < 0){ dnIndex = this.findDatanode(dn); } assert dnIndex >= 0 : "Data node is not found: current"; assert getPrevious(dnIndex) == null && getNext(dnIndex) == null : "Block is already in the list and cannot be inserted."; this.setPrevious(dnIndex, null); this.setNext(dnIndex, head); if(head != null) head.setPrevious(head.findDatanode(dn), this); return this; } /** * Remove this block from the list of blocks * related to the specified DatanodeDescriptor. * If this block is the head of the list then return the next block as * the new head. * @return the new head of the list or null if the list becomes * empy after deletion. */ BlockInfo listRemove(BlockInfo head, DatanodeDescriptor dn) { if(head == null) return null; int dnIndex = this.findDatanode(dn); if(dnIndex < 0) // this block is not on the data-node list return head; BlockInfo next = this.getNext(dnIndex); BlockInfo prev = this.getPrevious(dnIndex); this.setNext(dnIndex, null); this.setPrevious(dnIndex, null); if(prev != null) prev.setNext(prev.findDatanode(dn), next); if(next != null) next.setPrevious(next.findDatanode(dn), prev); if(this == head) // removing the head head = next; return head; } int listCount(DatanodeDescriptor dn) { int count = 0; for(BlockInfo cur = this; cur != null; cur = cur.getNext(cur.findDatanode(dn))) count++; return count; } boolean listIsConsistent(DatanodeDescriptor dn) { // going forward int count = 0; BlockInfo next, nextPrev; BlockInfo cur = this; while(cur != null) { next = cur.getNext(cur.findDatanode(dn)); if(next != null) { nextPrev = next.getPrevious(next.findDatanode(dn)); if(cur != nextPrev) { System.out.println("Inconsistent list: cur->next->prev != cur"); return false; } } cur = next; count++; } return true; } @Override public LightWeightGSet.LinkedElement getNext() { return nextLinkedElement; } @Override public void setNext(LightWeightGSet.LinkedElement next) { this.nextLinkedElement = next; } } private static class NodeIterator implements Iterator<DatanodeDescriptor> { private BlockInfo blockInfo; private int nextIdx = 0; NodeIterator(BlockInfo blkInfo) { this.blockInfo = blkInfo; } @Override public boolean hasNext() { return blockInfo != null && nextIdx < blockInfo.getCapacity() && blockInfo.getDatanode(nextIdx) != null; } @Override public DatanodeDescriptor next() { return blockInfo.getDatanode(nextIdx++); } @Override public void remove() { throw new UnsupportedOperationException("Sorry. can't remove."); } } /** Constant {@link LightWeightGSet} capacity. */ private final int capacity; private GSet<Block, BlockInfo> blocks; private final FSNamesystem ns; BlocksMap(int initialCapacity, float loadFactor, FSNamesystem ns) { // Use 2% of total memory this.capacity = LightWeightGSet.computeCapacity(2.0, "BlocksMap"); this.blocks = new LightWeightGSet<Block, BlockInfo>(capacity); this.ns = ns; } public void close() { blocks = null; } /** * All removals from the blocks map goes through this function. * * @param b * block to be removed * @param decrementSafeBlockCount * whether need to decrement safe block count when needed * @return the {@link BlockInfo} for the removed block */ private BlockInfo removeBlockFromMap(Block b) { if (b == null) { return null; } ns.decrementSafeBlockCountForBlockRemoval(b); return blocks.remove(b); } /** * Add BlockInfo if mapping does not exist. */ private BlockInfo checkBlockInfo(Block b, int replication) { // regular update always checks if the block is already in the map! return checkBlockInfo(b, replication, true); } private BlockInfo checkBlockInfo(Block b, int replication, boolean checkExistence) { // when loading regular files, we do not need to check if the blocks are // already in the map - just allocate and insert // for hardlink files, and outside of loading, we need to always check BlockInfo info = checkExistence ? blocks.get(b) : null; if (info == null) { if (b instanceof RaidBlockInfo) { info = new RaidBlockInfo(b, replication, ((RaidBlockInfo)b).getIndex()); } else { info = new BlockInfo(b, replication); } blocks.put(info); } return info; } INodeFile getINode(Block b) { BlockInfo info = blocks.get(b); return (info != null) ? info.inode : null; } BlockInfo getBlockInfo(Block b) { return blocks.get(b); } /** * Add block b belonging to the specified file inode to the map */ BlockInfo addINode(Block b, INodeFile iNode, short replication) { BlockInfo info = checkBlockInfo(b, replication); info.inode = iNode; return info; } /** * Add block b belonging to the specified file inode to the map. * Does not check for block existence, for non-hardlinked files. */ BlockInfo addINodeForLoading(Block b, INodeFile iNode) { // allocate new block when loading the image // for hardlinked files, we need to check if the blocks are already there BlockInfo info = checkBlockInfo(b, iNode.getReplication(), iNode.isHardlinkFile()); info.inode = iNode; return info; } /** * Add block b belonging to the specified file inode to the map, this * overwrites the map with the new block information. */ public BlockInfo updateINode(BlockInfo oldBlock, Block newBlock, INodeFile iNode, short replication, boolean forceUpdate) throws IOException { // If the old block is not same as the new block, probably the GS was // bumped up, hence update the block with new GS/size. // If forceUpdate is true, we will always remove the old block and // update with new block, it's used by raid List<DatanodeDescriptor> locations = null; if (oldBlock != null && (!oldBlock.equals(newBlock) || forceUpdate)) { if (oldBlock.getBlockId() != newBlock.getBlockId()) { throw new IOException("block ids don't match : " + oldBlock + ", " + newBlock); } if (forceUpdate) { // save locations of the old block locations = new ArrayList<DatanodeDescriptor>(); for (int i=0; i<oldBlock.numNodes(); i++) { locations.add(oldBlock.getDatanode(i)); } } else { if (!iNode.isUnderConstruction()) { throw new IOException( "Try to update generation of a finalized block old block: " + oldBlock + ", new block: " + newBlock); } } removeBlock(oldBlock); } BlockInfo info = checkBlockInfo(newBlock, replication); info.set(newBlock.getBlockId(), newBlock.getNumBytes(), newBlock.getGenerationStamp()); info.inode = iNode; if (locations != null) { // add back the locations if needed if (locations != null) { for (DatanodeDescriptor d : locations) { d.addBlock(info); } } } return info; } /** * Remove INode reference from block b. * If it does not belong to any file and data-nodes, * then remove the block from the block map. */ void removeINode(Block b) { BlockInfo info = blocks.get(b); if (info != null) { info.inode = null; if (info.getDatanode(0) == null) { // no datanodes left removeBlockFromMap(b); // remove block from the map } } } /** * Remove the block from the block map; * remove it from all data-node lists it belongs to; * and remove all data-node locations associated with the block. */ void removeBlock(Block block) { BlockInfo blockInfo = removeBlockFromMap(block); if (blockInfo == null) return; blockInfo.inode = null; for(int idx = blockInfo.numNodes()-1; idx >= 0; idx--) { DatanodeDescriptor dn = blockInfo.getDatanode(idx); dn.removeBlock(blockInfo); // remove from the list and wipe the location } } /** Returns the block object it it exists in the map. */ BlockInfo getStoredBlock(Block b) { return blocks.get(b); } /** Return the block object without matching against generation stamp. */ BlockInfo getStoredBlockWithoutMatchingGS(Block b) { return blocks.get(new Block(b.getBlockId())); } /** Returned Iterator does not support. */ Iterator<DatanodeDescriptor> nodeIterator(Block b) { return new NodeIterator(blocks.get(b)); } /** counts number of containing nodes. Better than using iterator. */ int numNodes(Block b) { BlockInfo info = blocks.get(b); return info == null ? 0 : info.numNodes(); } /** returns true if the node does not already exists and is added. * false if the node already exists.*/ boolean addNode(Block b, DatanodeDescriptor node, int replication) { // insert into the map if not there yet BlockInfo info = checkBlockInfo(b, replication); // add block to the data-node list and the node to the block info return node.addBlock(info); } /** * Remove data-node reference from the block. * Remove the block from the block map * only if it does not belong to any file and data-nodes. */ boolean removeNode(Block b, DatanodeDescriptor node) { BlockInfo info = blocks.get(b); if (info == null) return false; // remove block from the data-node list and the node from the block info boolean removed = node.removeBlock(info); if (info.getDatanode(0) == null // no datanodes left && info.inode == null) { // does not belong to a file removeBlockFromMap(b); // remove block from the map } return removed; } int size() { return (blocks == null) ? 0 : blocks.size(); } Iterable<BlockInfo> getBlocks() { return blocks; } /** * Get a list of shard iterators. Each iterator will travers only a part * of the blocks map. * @param numShards desired number of shards * @return list of iterators (size might be smaller than * numShards if blocks map has fewer buckets) */ List<Iterator<BlockInfo>> getBlocksIterarors(int numShards) { List<Iterator<BlockInfo>> iterators = new ArrayList<Iterator<BlockInfo>>(); if (numShards <= 0) { throw new IllegalArgumentException("Number of shards must be greater than 0"); } for (int i = 0; i < numShards; i++) { Iterator<BlockInfo> iterator = blocks.shardIterator(i, numShards); if (iterator != null) { iterators.add(iterator); } } return iterators; } /** * Check if the block exists in map */ boolean contains(Block block) { return blocks.contains(block); } /** * Check if the replica at the given datanode exists in map */ boolean contains(Block block, DatanodeDescriptor datanode) { BlockInfo info = blocks.get(block); if (info == null) return false; if (-1 == info.findDatanode(datanode)) return false; return true; } /** Get the capacity of the HashMap that stores blocks */ public int getCapacity() { return capacity; } }