/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.blockmanagement; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.namenode.NameNode; /** * Represents a block that is currently being constructed.<br> * This is usually the last block of a file opened for write or append. */ public class BlockInfoUnderConstruction extends BlockInfo { /** Block state. See {@link BlockUCState} */ private BlockUCState blockUCState; /** * Block replicas as assigned when the block was allocated. * This defines the pipeline order. */ private List<ReplicaUnderConstruction> replicas; /** * Index of the primary data node doing the recovery. Useful for log * messages. */ private int primaryNodeIndex = -1; /** * The new generation stamp, which this block will have * after the recovery succeeds. Also used as a recovery id to identify * the right recovery if any of the abandoned recoveries re-appear. */ private long blockRecoveryId = 0; /** * ReplicaUnderConstruction contains information about replicas while * they are under construction. * The GS, the length and the state of the replica is as reported by * the data-node. * It is not guaranteed, but expected, that data-nodes actually have * corresponding replicas. */ static class ReplicaUnderConstruction extends Block { private final DatanodeStorageInfo expectedLocation; private ReplicaState state; private boolean chosenAsPrimary; ReplicaUnderConstruction(Block block, DatanodeStorageInfo target, ReplicaState state) { super(block); this.expectedLocation = target; this.state = state; this.chosenAsPrimary = false; } /** * Expected block replica location as assigned when the block was allocated. * This defines the pipeline order. * It is not guaranteed, but expected, that the data-node actually has * the replica. */ private DatanodeStorageInfo getExpectedStorageLocation() { return expectedLocation; } /** * Get replica state as reported by the data-node. */ ReplicaState getState() { return state; } /** * Whether the replica was chosen for recovery. */ boolean getChosenAsPrimary() { return chosenAsPrimary; } /** * Set replica state. */ void setState(ReplicaState s) { state = s; } /** * Set whether this replica was chosen for recovery. */ void setChosenAsPrimary(boolean chosenAsPrimary) { this.chosenAsPrimary = chosenAsPrimary; } /** * Is data-node the replica belongs to alive. */ boolean isAlive() { return expectedLocation.getDatanodeDescriptor().isAlive; } @Override // Block public int hashCode() { return super.hashCode(); } @Override // Block public boolean equals(Object obj) { // Sufficient to rely on super's implementation return (this == obj) || super.equals(obj); } @Override public String toString() { final StringBuilder b = new StringBuilder(50); appendStringTo(b); return b.toString(); } @Override public void appendStringTo(StringBuilder sb) { sb.append("ReplicaUnderConstruction[") .append(expectedLocation) .append("|") .append(state) .append("]"); } } /** * Create block and set its state to * {@link BlockUCState#UNDER_CONSTRUCTION}. */ public BlockInfoUnderConstruction(Block blk, int replication) { this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null); } /** * Create a block that is currently being constructed. */ public BlockInfoUnderConstruction(Block blk, int replication, BlockUCState state, DatanodeStorageInfo[] targets) { super(blk, replication); assert getBlockUCState() != BlockUCState.COMPLETE : "BlockInfoUnderConstruction cannot be in COMPLETE state"; this.blockUCState = state; setExpectedLocations(targets); } /** * Convert an under construction block to a complete block. * * @return BlockInfo - a complete block. * @throws IOException if the state of the block * (the generation stamp and the length) has not been committed by * the client or it does not have at least a minimal number of replicas * reported from data-nodes. */ BlockInfo convertToCompleteBlock() throws IOException { assert getBlockUCState() != BlockUCState.COMPLETE : "Trying to convert a COMPLETE block"; return new BlockInfo(this); } /** Set expected locations */ public void setExpectedLocations(DatanodeStorageInfo[] targets) { int numLocations = targets == null ? 0 : targets.length; this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations); for(int i = 0; i < numLocations; i++) replicas.add( new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW)); } /** * Create array of expected replica locations * (as has been assigned by chooseTargets()). */ public DatanodeStorageInfo[] getExpectedStorageLocations() { int numLocations = replicas == null ? 0 : replicas.size(); DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations]; for(int i = 0; i < numLocations; i++) storages[i] = replicas.get(i).getExpectedStorageLocation(); return storages; } /** Get the number of expected locations */ public int getNumExpectedLocations() { return replicas == null ? 0 : replicas.size(); } /** * Return the state of the block under construction. * @see BlockUCState */ @Override // BlockInfo public BlockUCState getBlockUCState() { return blockUCState; } void setBlockUCState(BlockUCState s) { blockUCState = s; } /** Get block recovery ID */ public long getBlockRecoveryId() { return blockRecoveryId; } /** * Process the recorded replicas. When about to commit or finish the * pipeline recovery sort out bad replicas. * @param genStamp The final generation stamp for the block. */ public void setGenerationStampAndVerifyReplicas(long genStamp) { // Set the generation stamp for the block. setGenerationStamp(genStamp); if (replicas == null) return; // Remove the replicas with wrong gen stamp. // The replica list is unchanged. for (ReplicaUnderConstruction r : replicas) { if (genStamp != r.getGenerationStamp()) { r.getExpectedStorageLocation().removeBlock(this); NameNode.blockStateChangeLog.info("BLOCK* Removing stale replica " + "from location: " + r.getExpectedStorageLocation()); } } } /** * Commit block's length and generation stamp as reported by the client. * Set block state to {@link BlockUCState#COMMITTED}. * @param block - contains client reported block length and generation * @throws IOException if block ids are inconsistent. */ void commitBlock(Block block) throws IOException { if(getBlockId() != block.getBlockId()) throw new IOException("Trying to commit inconsistent block: id = " + block.getBlockId() + ", expected id = " + getBlockId()); blockUCState = BlockUCState.COMMITTED; this.set(getBlockId(), block.getNumBytes(), block.getGenerationStamp()); // Sort out invalid replicas. setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); } /** * Initialize lease recovery for this block. * Find the first alive data-node starting from the previous primary and * make it primary. */ public void initializeBlockRecovery(long recoveryId) { setBlockUCState(BlockUCState.UNDER_RECOVERY); blockRecoveryId = recoveryId; if (replicas.size() == 0) { NameNode.blockStateChangeLog.warn("BLOCK*" + " BlockInfoUnderConstruction.initLeaseRecovery:" + " No blocks found, lease removed."); } boolean allLiveReplicasTriedAsPrimary = true; for (int i = 0; i < replicas.size(); i++) { // Check if all replicas have been tried or not. if (replicas.get(i).isAlive()) { allLiveReplicasTriedAsPrimary = (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary()); } } if (allLiveReplicasTriedAsPrimary) { // Just set all the replicas to be chosen whether they are alive or not. for (int i = 0; i < replicas.size(); i++) { replicas.get(i).setChosenAsPrimary(false); } } long mostRecentLastUpdate = 0; ReplicaUnderConstruction primary = null; primaryNodeIndex = -1; for(int i = 0; i < replicas.size(); i++) { // Skip alive replicas which have been chosen for recovery. if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) { continue; } final ReplicaUnderConstruction ruc = replicas.get(i); final long lastUpdate = ruc.getExpectedStorageLocation().getDatanodeDescriptor().getLastUpdate(); if (lastUpdate > mostRecentLastUpdate) { primaryNodeIndex = i; primary = ruc; mostRecentLastUpdate = lastUpdate; } } if (primary != null) { primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this); primary.setChosenAsPrimary(true); NameNode.blockStateChangeLog.info("BLOCK* " + this + " recovery started, primary=" + primary); } } void addReplicaIfNotPresent(DatanodeStorageInfo storage, Block block, ReplicaState rState) { Iterator<ReplicaUnderConstruction> it = replicas.iterator(); while (it.hasNext()) { ReplicaUnderConstruction r = it.next(); DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation(); if(expectedLocation == storage) { // Record the gen stamp from the report r.setGenerationStamp(block.getGenerationStamp()); return; } else if (expectedLocation != null && expectedLocation.getDatanodeDescriptor() == storage.getDatanodeDescriptor()) { // The Datanode reported that the block is on a different storage // than the one chosen by BlockPlacementPolicy. This can occur as // we allow Datanodes to choose the target storage. Update our // state by removing the stale entry and adding a new one. it.remove(); break; } } replicas.add(new ReplicaUnderConstruction(block, storage, rState)); } @Override // BlockInfo // BlockInfoUnderConstruction participates in maps the same way as BlockInfo public int hashCode() { return super.hashCode(); } @Override // BlockInfo public boolean equals(Object obj) { // Sufficient to rely on super's implementation return (this == obj) || super.equals(obj); } @Override public String toString() { final StringBuilder b = new StringBuilder(100); appendStringTo(b); return b.toString(); } @Override public void appendStringTo(StringBuilder sb) { super.appendStringTo(sb); appendUCParts(sb); } private void appendUCParts(StringBuilder sb) { sb.append("{blockUCState=").append(blockUCState) .append(", primaryNodeIndex=").append(primaryNodeIndex) .append(", replicas=["); Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); if (iter.hasNext()) { iter.next().appendStringTo(sb); while (iter.hasNext()) { sb.append(", "); iter.next().appendStringTo(sb); } } sb.append("]}"); } }