/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.blockmanagement; import io.hops.exception.StorageException; import io.hops.exception.TransactionContextException; import io.hops.metadata.HdfsStorageFactory; import io.hops.metadata.hdfs.dal.BlockInfoDataAccess; import io.hops.metadata.hdfs.dal.InvalidateBlockDataAccess; import io.hops.metadata.hdfs.dal.ReplicaDataAccess; import io.hops.metadata.hdfs.entity.Replica; import io.hops.transaction.handler.HDFSOperationType; import io.hops.transaction.handler.LightWeightRequestHandler; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.apache.hadoop.util.Time; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; /** * This class extends the DatanodeInfo class with ephemeral information (eg * health, capacity, what blocks are associated with the Datanode) that is * private to the Namenode, ie this class is not exposed to clients. */ @InterfaceAudience.Private @InterfaceStability.Evolving public class DatanodeDescriptor extends DatanodeInfo { // Stores status of decommissioning. // If node is not decommissioning, do not use this object for anything. public DecommissioningStatus decommissioningStatus = new DecommissioningStatus(); /** * Block and targets pair */ @InterfaceAudience.Private @InterfaceStability.Evolving public static class BlockTargetPair { public final Block block; public final DatanodeDescriptor[] targets; BlockTargetPair(Block block, DatanodeDescriptor[] targets) { this.block = block; this.targets = targets; } } /** * A BlockTargetPair queue. */ private static class BlockQueue<E> { private final Queue<E> blockq = new LinkedList<E>(); /** * Size of the queue */ synchronized int size() { return blockq.size(); } /** * Enqueue */ synchronized boolean offer(E e) { return blockq.offer(e); } /** * Dequeue */ synchronized List<E> poll(int numBlocks) { if (numBlocks <= 0 || blockq.isEmpty()) { return null; } List<E> results = new ArrayList<E>(); for (; !blockq.isEmpty() && numBlocks > 0; numBlocks--) { results.add(blockq.poll()); } return results; } /** * Returns <tt>true</tt> if the queue contains the specified element. */ boolean contains(E e) { return blockq.contains(e); } synchronized void clear() { blockq.clear(); } } private int sid = -1; public boolean isAlive = false; public boolean needKeyUpdate = false; /** * Set to false on any NN failover, and reset to true * whenever a block report is received. */ private boolean heartbeatedSinceFailover = false; /** * At startup or at any failover, the DNs in the cluster may * have pending block deletions from a previous incarnation * of the NameNode. Thus, we consider their block contents * stale until we have received a block report. When a DN * is considered stale, any replicas on it are transitively * considered stale. If any block has at least one stale replica, * then no invalidations will be processed for this block. * See HDFS-1972. */ private boolean blockContentsStale = true; // A system administrator can tune the balancer bandwidth parameter // (dfs.balance.bandwidthPerSec) dynamically by calling // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the // following 'bandwidth' variable gets updated with the new value for each // node. Once the heartbeat command is issued to update the value on the // specified datanode, this value will be set back to 0. private long bandwidth; /** * A queue of blocks to be replicated by this datanode */ private BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>(); /** * A queue of blocks to be recovered by this datanode */ private BlockQueue<BlockInfoUnderConstruction> recoverBlocks = new BlockQueue<BlockInfoUnderConstruction>(); /** * A set of blocks to be invalidated by this datanode */ private LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>(); /* Variables for maintaining number of blocks scheduled to be written to * this datanode. This count is approximate and might be slightly bigger * in case of errors (e.g. datanode does not report if an error occurs * while writing the block). */ private int currApproxBlocksScheduled = 0; private int prevApproxBlocksScheduled = 0; private long lastBlocksScheduledRollTime = 0; private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600 * 1000; //10min private int volumeFailures = 0; /** * Set to false after processing first block report */ private boolean firstBlockReport = true; /** * When set to true, the node is not in include list and is not allowed * to communicate with the namenode */ private boolean disallowed = false; /** * DatanodeDescriptor constructor * * @param nodeID * id of the data node */ public DatanodeDescriptor(DatanodeID nodeID) { this(nodeID, 0L, 0L, 0L, 0L, 0, 0); } /** * DatanodeDescriptor constructor * * @param nodeID * id of the data node * @param networkLocation * location of the data node in network */ public DatanodeDescriptor(DatanodeID nodeID, String networkLocation) { this(nodeID, networkLocation, 0L, 0L, 0L, 0L, 0, 0); } /** * DatanodeDescriptor constructor * * @param nodeID * id of the data node * @param capacity * capacity of the data node * @param dfsUsed * space used by the data node * @param remaining * remaining capacity of the data node * @param bpused * space used by the block pool corresponding to this namenode * @param xceiverCount * # of data transfers at the data node */ public DatanodeDescriptor(DatanodeID nodeID, long capacity, long dfsUsed, long remaining, long bpused, int xceiverCount, int failedVolumes) { super(nodeID); updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, failedVolumes); } /** * DatanodeDescriptor constructor * * @param nodeID * id of the data node * @param networkLocation * location of the data node in network * @param capacity * capacity of the data node, including space used by non-dfs * @param dfsUsed * the used space by dfs datanode * @param remaining * remaining capacity of the data node * @param bpused * space used by the block pool corresponding to this namenode * @param xceiverCount * # of data transfers at the data node */ public DatanodeDescriptor(DatanodeID nodeID, String networkLocation, long capacity, long dfsUsed, long remaining, long bpused, int xceiverCount, int failedVolumes) { super(nodeID, networkLocation); updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, failedVolumes); } /** * Add datanode to the block. * Add block to the head of the list of blocks belonging to the data-node. */ public boolean addBlock(BlockInfo b) throws StorageException, TransactionContextException { if (b.hasReplicaIn(this)) { return false; } b.addReplica(this, b); return true; } /** * Remove block from the list of blocks belonging to the data-node. * Remove datanode from the block. */ public boolean removeBlock(BlockInfo b) throws StorageException, TransactionContextException { if (b.removeReplica(this) != null) { return true; } else { return false; } } public void setSId(int sid) { this.sid = sid; } public int getSId() { return this.sid; } public void resetBlocks() { setCapacity(0); setRemaining(0); setBlockPoolUsed(0); setDfsUsed(0); setXceiverCount(0); this.invalidateBlocks.clear(); this.volumeFailures = 0; } public void clearBlockQueues() { synchronized (invalidateBlocks) { this.invalidateBlocks.clear(); this.recoverBlocks.clear(); this.replicateBlocks.clear(); } } public int numBlocks() throws IOException { return (Integer) new LightWeightRequestHandler( HDFSOperationType.COUNT_REPLICAS_ON_NODE) { @Override public Object performTask() throws StorageException, IOException { ReplicaDataAccess da = (ReplicaDataAccess) HdfsStorageFactory .getDataAccess(ReplicaDataAccess.class); return da.countAllReplicasForStorageId(getSId()); } }.handle(); } /** * Updates stats from datanode heartbeat. */ public void updateHeartbeat(long capacity, long dfsUsed, long remaining, long blockPoolUsed, int xceiverCount, int volFailures) { setCapacity(capacity); setRemaining(remaining); setBlockPoolUsed(blockPoolUsed); setDfsUsed(dfsUsed); setXceiverCount(xceiverCount); setLastUpdate(Time.now()); this.volumeFailures = volFailures; this.heartbeatedSinceFailover = true; rollBlocksScheduled(getLastUpdate()); } public Iterator<BlockInfo> getBlockIterator() throws IOException { return getAllMachineBlockInfos().iterator(); } private List<BlockInfo> getAllMachineBlockInfos() throws IOException { LightWeightRequestHandler findBlocksHandler = new LightWeightRequestHandler( HDFSOperationType.GET_ALL_MACHINE_BLOCKS) { @Override public Object performTask() throws StorageException, IOException { BlockInfoDataAccess da = (BlockInfoDataAccess) HdfsStorageFactory .getDataAccess(BlockInfoDataAccess.class); HdfsStorageFactory.getConnector().beginTransaction(); List<BlockInfo> list = da.findBlockInfosByStorageId(getSId()); HdfsStorageFactory.getConnector().commit(); return list; } }; return (List<BlockInfo>) findBlocksHandler.handle(); } public Map<Long,Integer> getAllMachineReplicas() throws IOException { LightWeightRequestHandler findBlocksHandler = new LightWeightRequestHandler( HDFSOperationType.GET_ALL_MACHINE_BLOCKS_IDS) { @Override public Object performTask() throws StorageException, IOException { ReplicaDataAccess da = (ReplicaDataAccess) HdfsStorageFactory .getDataAccess(ReplicaDataAccess.class); return da.findBlockAndInodeIdsByStorageId(getSId()); } }; return (Map<Long,Integer>)findBlocksHandler.handle(); } public Map<Long,Long> getAllMachineInvalidatedReplicasWithGenStamp() throws IOException { LightWeightRequestHandler findBlocksHandler = new LightWeightRequestHandler( HDFSOperationType.GET_ALL_MACHINE_BLOCKS_IDS) { @Override public Object performTask() throws StorageException, IOException { InvalidateBlockDataAccess da = (InvalidateBlockDataAccess) HdfsStorageFactory .getDataAccess(InvalidateBlockDataAccess.class); return da.findInvalidatedBlockAndGenStampByStorageId(getSId()); } }; return (Map<Long,Long>)findBlocksHandler.handle(); } /** * Store block replication work. */ void addBlockToBeReplicated(Block block, DatanodeDescriptor[] targets) { assert (block != null && targets != null && targets.length > 0); replicateBlocks.offer(new BlockTargetPair(block, targets)); } /** * Store block recovery work. */ void addBlockToBeRecovered(BlockInfoUnderConstruction block) { if (recoverBlocks.contains(block)) { // this prevents adding the same block twice to the recovery queue BlockManager.LOG.info(block + " is already in the recovery queue"); return; } recoverBlocks.offer(block); } /** * Store block invalidation work. */ void addBlocksToBeInvalidated(List<Block> blocklist) { assert (blocklist != null && blocklist.size() > 0); synchronized (invalidateBlocks) { for (Block blk : blocklist) { invalidateBlocks.add(blk); } } } /** * The number of work items that are pending to be replicated */ int getNumberOfBlocksToBeReplicated() { return replicateBlocks.size(); } /** * The number of block invalidation items that are pending to * be sent to the datanode */ int getNumberOfBlocksToBeInvalidated() { synchronized (invalidateBlocks) { return invalidateBlocks.size(); } } public List<BlockTargetPair> getReplicationCommand(int maxTransfers) { return replicateBlocks.poll(maxTransfers); } public BlockInfoUnderConstruction[] getLeaseRecoveryCommand( int maxTransfers) { List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers); if (blocks == null) { return null; } return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]); } /** * Remove the specified number of blocks to be invalidated */ public Block[] getInvalidateBlocks(int maxblocks) { synchronized (invalidateBlocks) { Block[] deleteList = invalidateBlocks .pollToArray(new Block[Math.min(invalidateBlocks.size(), maxblocks)]); return deleteList.length == 0 ? null : deleteList; } } /** * @return Approximate number of blocks currently scheduled to be written * to this datanode. */ public int getBlocksScheduled() { return currApproxBlocksScheduled + prevApproxBlocksScheduled; } /** * Increments counter for number of blocks scheduled. */ public void incBlocksScheduled() { currApproxBlocksScheduled++; } /** * Decrements counter for number of blocks scheduled. */ void decBlocksScheduled() { if (prevApproxBlocksScheduled > 0) { prevApproxBlocksScheduled--; } else if (currApproxBlocksScheduled > 0) { currApproxBlocksScheduled--; } // its ok if both counters are zero. } /** * Adjusts curr and prev number of blocks scheduled every few minutes. */ private void rollBlocksScheduled(long now) { if ((now - lastBlocksScheduledRollTime) > BLOCKS_SCHEDULED_ROLL_INTERVAL) { prevApproxBlocksScheduled = currApproxBlocksScheduled; currApproxBlocksScheduled = 0; lastBlocksScheduledRollTime = now; } } @Override public int hashCode() { // Super implementation is sufficient return super.hashCode(); } @Override public boolean equals(Object obj) { // Sufficient to use super equality as datanodes are uniquely identified // by DatanodeID return (this == obj) || super.equals(obj); } /** * Decommissioning status */ public class DecommissioningStatus { private int underReplicatedBlocks; private int decommissionOnlyReplicas; private int underReplicatedInOpenFiles; private long startTime; synchronized void set(int underRep, int onlyRep, int underConstruction) { if (isDecommissionInProgress() == false) { return; } underReplicatedBlocks = underRep; decommissionOnlyReplicas = onlyRep; underReplicatedInOpenFiles = underConstruction; } /** * @return the number of under-replicated blocks */ public synchronized int getUnderReplicatedBlocks() { if (isDecommissionInProgress() == false) { return 0; } return underReplicatedBlocks; } /** * @return the number of decommission-only replicas */ public synchronized int getDecommissionOnlyReplicas() { if (isDecommissionInProgress() == false) { return 0; } return decommissionOnlyReplicas; } /** * @return the number of under-replicated blocks in open files */ public synchronized int getUnderReplicatedInOpenFiles() { if (isDecommissionInProgress() == false) { return 0; } return underReplicatedInOpenFiles; } /** * Set start time */ public synchronized void setStartTime(long time) { startTime = time; } /** * @return start time */ public synchronized long getStartTime() { if (isDecommissionInProgress() == false) { return 0; } return startTime; } } // End of class DecommissioningStatus /** * Set the flag to indicate if this datanode is disallowed from communicating * with the namenode. */ public void setDisallowed(boolean flag) { disallowed = flag; } /** * Is the datanode disallowed from communicating with the namenode? */ public boolean isDisallowed() { return disallowed; } /** * @return number of failed volumes in the datanode. */ public int getVolumeFailures() { return volumeFailures; } /** * @param nodeReg * DatanodeID to update registration for. */ @Override public void updateRegInfo(DatanodeID nodeReg) { super.updateRegInfo(nodeReg); firstBlockReport = true; // must re-process IBR after re-registration } /** * @return balancer bandwidth in bytes per second for this datanode */ public long getBalancerBandwidth() { return this.bandwidth; } /** * @param bandwidth * balancer bandwidth in bytes per second for this datanode */ public void setBalancerBandwidth(long bandwidth) { this.bandwidth = bandwidth; } public boolean areBlockContentsStale() { return blockContentsStale; } public void markStaleAfterFailover() { heartbeatedSinceFailover = false; blockContentsStale = true; } public void receivedBlockReport() { if (heartbeatedSinceFailover) { blockContentsStale = false; } firstBlockReport = false; } boolean isFirstBlockReport() { return firstBlockReport; } @Override public String dumpDatanode() { StringBuilder sb = new StringBuilder(super.dumpDatanode()); int repl = replicateBlocks.size(); if (repl > 0) { sb.append(" ").append(repl).append(" blocks to be replicated;"); } int inval = invalidateBlocks.size(); if (inval > 0) { sb.append(" ").append(inval).append(" blocks to be invalidated;"); } int recover = recoverBlocks.size(); if (recover > 0) { sb.append(" ").append(recover).append(" blocks to be recovered;"); } return sb.toString(); } }