FaultTolerantBlockPlacementPolicy.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.net.NodeBase;
import org.apache.hadoop.util.HostsFileReader;
import org.apache.hadoop.raid.Codec;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyDefault;

import java.io.IOException;
import java.util.*;
import org.apache.commons.lang.ArrayUtils;

/**
 * This block placement policy tries (best effort) to the following:
 * 
 * If the file is under the staging directory (a specially named directory)
 * then all blocks of the file is kept on the same host. Additionally, all
 * the raid blocks (if any) for the same file is also kept on the same host.
 *
 * If the file is not under the staging directory then blocks are put in such
 * a way that all blocks within the same stripe end up on random hosts in
 * different racks. For example, the 10 data blocks and 4 parity blocks in a
 * stripe should end up in different racks.
 */
public class FaultTolerantBlockPlacementPolicy extends BlockPlacementPolicyRaid {
  private int stripeLen;
  private String stagingDir;
  private String localDir;
  private FSNamesystem namesystem = null;
  private boolean considerLoad;
  private List<Codec> acceptedCodecs = new ArrayList<Codec>();

  private static Set<String> badRacks = new HashSet<String>();
  private static Set<String> badHosts = new HashSet<String>();
  FaultTolerantBlockPlacementPolicy(Configuration conf,
                         FSClusterStats stats,
                               NetworkTopology clusterMap) {
    initialize(conf, stats, clusterMap, null, null, null);
  }

  FaultTolerantBlockPlacementPolicy() {
  }

  /** A function to be used by unit tests only */
  public static void setBadHostsAndRacks(Set<String> racks,
                                         Set<String> hosts) {
    badRacks = racks;
    badHosts = hosts;
  }

  /** {@inheritDoc} */
  public void initialize(Configuration conf,
                         FSClusterStats stats,
                         NetworkTopology clusterMap,
                         HostsFileReader hostsReader,
                         DNSToSwitchMapping dnsToSwitchMapping,
                         FSNamesystem ns) {
    super.initialize(
      conf, stats, clusterMap, hostsReader, dnsToSwitchMapping, ns);
    this.namesystem = ns;
    // Default
    this.stripeLen = 0;
    this.considerLoad = conf.getBoolean("dfs.replication.considerLoad", true);
    FSNamesystem.LOG.info("F4: Block placement will consider load: "
      + this.considerLoad);
    initParityConfigs();
    this.stagingDir = conf.get("dfs.f4.staging", "/staging");
    this.localDir = conf.get("dfs.f4.local", "/local");
  }

  /**
   * This function initializes configuration for the supported parities.
   *
   * Currently, we support RS and XOR. Those two can have different
   * configurations individually. Respective configurations will be used when
   * placing the parity files. There is one exception. The stripe length is
   * calculated based on the maximum of the stripe lengths of the individual
   * parities.
   */
  private void initParityConfigs() {
    Set<String> acceptedCodecIds = new HashSet<String>();
    for (String s : conf.get("dfs.f4.accepted.codecs", "rs,xor").split(",")) {
      acceptedCodecIds.add(s);
    }
    for (Codec c : Codec.getCodecs()) {
      if (acceptedCodecIds.contains(c.id)) {
        FSNamesystem.LOG.info("F4: Parity info."
          + " Id: " + c.id
          + " Parity Length: " + c.parityLength
          + " Parity Stripe Length: " + c.stripeLength
          + " Parity directory: " + c.parityDirectory
          + " Parity temp directory: " + c.tmpParityDirectory);
        acceptedCodecs.add(c);
        if (c.stripeLength > this.stripeLen) {
          // Use the max stripe length
          this.stripeLen = c.stripeLength;
        }
      }
    }
    FSNamesystem.LOG.info("F4: Initialized stripe len to: " + this.stripeLen);
  }

  private Codec getCodec(String fileName) {
    for (Codec c : this.acceptedCodecs) {
      // This should be "/raidrs/" or /"raid/". If any of these two is
      // is present in the file path, we will assume that is the parity type.
      String uniqueSubtringId = c.parityDirectory + "/";
      if (fileName.contains(uniqueSubtringId)) {
        return c;
      }
    }
    Codec c = this.acceptedCodecs.get(0);
    FSNamesystem.LOG.error("F4: Could not find any valid codec for the file: "
     + fileName + ", hence returning the first one: " + c.id);
    return c;
  }

  private String getParityStagingDir(String parityFileName) {
    Codec c  = getCodec(parityFileName);
    return c.parityDirectory + this.stagingDir;
  }

  private boolean isStaging(String fileName) {
    return fileName.startsWith(this.stagingDir) ||
      fileName.startsWith(this.getParityStagingDir(fileName));
  }
  private boolean isLocal(String fileName) {
    return fileName.startsWith(this.localDir);
  }

  @Override
  public DatanodeDescriptor[] chooseTarget(
      String srcPath,
      int numOfReplicas,
      DatanodeDescriptor writer,
      List<DatanodeDescriptor> chosenNodes,
      long blocksize) {
    return chooseTargetF4(
      srcPath, numOfReplicas, writer, chosenNodes, null, blocksize);
  }

  /**
   * This function finds a node where to place a block of a file under the
   * "local" directory. The basic idea is to have as few locations (preferably
   * one, and preferably on the writer node)
   *
   * 1) Choose a node that contains one of the blocks in the blocks argument.
   * 2) If there are multiple such nodes, choose one of them (in some order).
   * 3) If this is the first block, then choose the the writer node.
   * 4) If the writer node is not good, choose a random node within the same
   *    rack as the writer node.
   * 5) If the writer node is null or if all of the above tries fail, then
   *    just choose based on the the parent class's policy.
   *
   * @param fileName       The name of the file for which the block is to be
   *                       placed.
   * @param writer         The writer node.
   * @param blocks         The block locations that are to be used as reference
   *                       for placing the current block. For a data file, it
   *                       is the blocks for that file itself. For a raid file,
   *                       it is the blocks of the source file.
   * @param chosenNodes    @see chooseTarget
   * @param excludedNodes  @see chooseTarget
   * @param blocksize      @see chooseTarget
   */
  private DatanodeDescriptor[] chooseLocalTarget(
      String fileName,
      DatanodeDescriptor writer,
      LocatedBlocks blocks,
      List<Node> excludedNodes,
      List<DatanodeDescriptor> chosenNodes,
      long blocksize) throws IOException, NotEnoughReplicasException {
    // First try the same node as the one where other blocks reside.
    HashMap<String, DatanodeInfo> hostMap =
      new HashMap<String, DatanodeInfo>();
    for (LocatedBlock b : blocks.getLocatedBlocks()) {
      for (DatanodeInfo i : b.getLocations()) {
        hostMap.put(i.getNetworkLocation() + "/" + i.getName(), i);
      }
    }

    for (Map.Entry<String, DatanodeInfo> entry : hostMap.entrySet()) {
      DatanodeDescriptor result = null;
      DatanodeInfo i = entry.getValue();
      result = new DatanodeDescriptor(i,
                                      i.getNetworkLocation(),
                                      i.getHostName(),
                                      i.getCapacity(),
                                      i.getDfsUsed(),
                                      i.getRemaining(),
                                      i.getNamespaceUsed(),
                                      i.getXceiverCount());
      if (this.isGoodTarget(result,
                            blocksize,
                            Integer.MAX_VALUE,
                            this.considerLoad,
                            new ArrayList<DatanodeDescriptor>())) {
          // I dont care about per rack load.
        DatanodeDescriptor[] r = {result};
        return r;
      }
    }
    // Try something in the same rack as the writer.
    if (writer == null) {
      return super.chooseTarget(
        fileName, 1, writer, chosenNodes, excludedNodes, blocksize);
    } else if (this.isGoodTarget(writer,
                                 blocksize,
                                 Integer.MAX_VALUE,
                                 this.considerLoad,
                                 new ArrayList<DatanodeDescriptor>())) {
      DatanodeDescriptor[] r = {writer};
      return r;
    }
    HashMap<Node, Node> exclNodes = new HashMap<Node, Node>();
    for (Node n : excludedNodes) {
      exclNodes.put(n, n);
    }
    List<DatanodeDescriptor> results = new ArrayList<DatanodeDescriptor>();
    chooseRandom(
      1, writer.getNetworkLocation(), exclNodes, blocksize, 1, results);
    return results.toArray(new DatanodeDescriptor[results.size()]);
  }

  /// A helper function that says some hosts are bad based on test config.
  @Override
  protected boolean isGoodTarget(DatanodeDescriptor node,
                                 long blockSize,
                                 int maxPerRack,
                                 boolean considerLoad,
                                 List<DatanodeDescriptor> results) {
    if (badRacks.contains(node.getNetworkLocation()) ||
        badHosts.contains(node.getName())) {
      return false;
    }
    return super.isGoodTarget(
      node, blockSize, maxPerRack, considerLoad, results);
  }

  @Override
  public DatanodeDescriptor[] chooseTarget(
      String srcInode,
      int numOfReplicas,
      DatanodeDescriptor writer,
      List<DatanodeDescriptor> chosenNodes,
      List<Node> excludesNodes,
      long blocksize) {
    return chooseTargetF4(
      srcInode, numOfReplicas, writer, chosenNodes, excludesNodes, blocksize);
  }

  private String getSourceFileFromParity(String fileName,
                                         FileInfo info) 
      throws IOException {
    NameWithINode nameWithINode;
    switch (info.type) {
      case PARITY:
        // We need to support the following cases
        // parity = /raidrs/staging/X, source = /X
        // parity = /raidrs/X, source = /X
        nameWithINode = null;
        if (isStaging(fileName)) {
          nameWithINode = getSourceFile(fileName,
                                        getParityStagingDir(fileName));
        }
        if (nameWithINode == null) {
          Codec c = getCodec(fileName);
          nameWithINode = getSourceFile(fileName, c.parityDirectory);
        }
        return ((nameWithINode ==  null) ? null : nameWithINode.name);
      case TEMP_PARITY:
        Codec c = getCodec(fileName);
        nameWithINode = getSourceFile(fileName, c.tmpParityDirectory);
        return ((nameWithINode ==  null) ? null : nameWithINode.name);
      default:
        FSNamesystem.LOG.error("file type bad");
        return null;
    }
  }


  /**
   * This is the main driver function that dictates block placement.
   *
   * This function figures out the kind of file (staging or not, raid or not)
   * and invokes the appropriate functions
   */
  private DatanodeDescriptor[] chooseTargetF4(
      String fileName,
      int numOfReplicas,
      DatanodeDescriptor writer,
      List<DatanodeDescriptor> chosenNodes,
      List<Node> exclNodes,
      long blocksize) {
    FSNamesystem.LOG.info("F4: F4 policy invoked for file: " + fileName +
      ", with replica count: " + numOfReplicas);
    // If replica>1 then just default back to RAID
    if (numOfReplicas > 1) {
      return super.chooseTarget(
        numOfReplicas, writer, chosenNodes, exclNodes, blocksize);
    }
    FileInfo info;
    LocatedBlocks blocks;
    int blockIndex = -1;
    try {
      blocks = this.namesystem.getBlockLocations(fileName, 0, Long.MAX_VALUE);
      info = getFileInfo(null, fileName);
      blockIndex = blocks.getLocatedBlocks().size();
    } catch (IOException e) {
      FSNamesystem.LOG.error(
        "F4: Error happened when calling getFileInfo/getBlockLocations");
      return super.chooseTarget(
        fileName, numOfReplicas, writer, chosenNodes, exclNodes, blocksize);
    }
    FSNamesystem.LOG.info(
      "F4: The file: " + fileName + " has a type: " + info.type);
    HashMap<String, HashSet<Node>> rackToHosts =
      new HashMap<String, HashSet<Node>>();
    try {

      // First handle the "localdir" case
      if (isLocal(fileName)) {
        return chooseLocalTarget(fileName,
                                 writer,
                                 blocks,
                                 exclNodes,
                                 chosenNodes,
                                 blocksize);
      }

      // For a data file, the locations of its own blocks as the reference
      int stripeIndex = -1;
      String srcFileName = null;
      String parityFileName = null;
      int parityLength = 0;
      int stripeLength = 0;
      switch (info.type) {
        case NOT_RAID:
        case SOURCE:
          srcFileName = fileName;
          parityFileName = null;
          stripeLength = this.stripeLen;
          stripeIndex = blockIndex / stripeLength;
          break;
        case TEMP_PARITY:
        case PARITY:
          srcFileName = getSourceFileFromParity(fileName, info);
          parityFileName = fileName;
          if (srcFileName == null ||
              this.namesystem.getHdfsFileInfo(srcFileName) == null) {
            srcFileName = null;
            FSNamesystem.LOG.error("F4: " + srcFileName + " does not exist");
          }
          Codec c = getCodec(fileName);
          parityLength = c.parityLength;
          stripeLength = c.stripeLength;
          stripeIndex = blockIndex / parityLength;
          break;
        default:
          return super.chooseTarget(
            numOfReplicas, writer, chosenNodes, exclNodes, blocksize);
      }

      rackToHosts = getRackToHostsMapForStripe(srcFileName,
                                               parityFileName,
                                               stripeLength,
                                               parityLength,
                                               stripeIndex);
    } catch (IOException e) {
      FSNamesystem.LOG.error("F4: Error happened when calling "
        + "getParityFile/getSourceFileFromParity");
      return super.chooseTarget(
          numOfReplicas, writer, chosenNodes, exclNodes, blocksize);
    } catch (NotEnoughReplicasException e) {
      FSNamesystem.LOG.error("F4: Error happend when calling "
        + "getCompanionSourceNodes/getSourceFile");
      return super.chooseTarget(
          numOfReplicas, writer, chosenNodes, exclNodes, blocksize);
    }
    return chooseTargetOnNewFailureDomain(fileName,
                                          writer,
                                          chosenNodes,
                                          exclNodes,
                                          rackToHosts,
                                          blocksize);
  }

  // Given a stripe index returns all racks in which the blocks of the stripe
  // reside and the hosts within those racks that host those blocks
  private HashMap<String, HashSet<Node>> getRackToHostsMapForStripe(
      String srcFileName,
      String parityFileName,
      int stripeLen,
      int parityLen,
      int stripeIndex) throws IOException {
    HashMap<String, HashSet<Node>> rackToHosts =
      new HashMap<String, HashSet<Node>>();
    if (srcFileName != null) {
      rackToHosts = getRackToHostsMapForStripe(srcFileName,
                                               stripeIndex,
                                               stripeLen);
    }
    if (parityFileName != null) {
      HashMap<String, HashSet<Node>> rackToHostsForParity =
        getRackToHostsMapForStripe(parityFileName,
                                   stripeIndex,
                                   parityLen);
      for (Map.Entry<String, HashSet<Node>> e :
           rackToHostsForParity.entrySet()) {
        HashSet<Node> nodes = rackToHosts.get(e.getKey());
        if (nodes == null) {
          nodes = new HashSet<Node>();
          rackToHosts.put(e.getKey(), nodes);
        }
        for (Node n : e.getValue()) {
          nodes.add(n);
        }
      }
    }
    for (Map.Entry<String, HashSet<Node>> e : rackToHosts.entrySet()) {
      if (e.getValue().size() > 1) {
        FSNamesystem.LOG.warn("F4: Rack " + e.getKey() +
          " being overused for stripe: " + stripeIndex);
      }
    }
    return rackToHosts;
  }

  private HashMap<String, HashSet<Node>> getRackToHostsMapForStripe(
      String src,
      int stripeIndex,
      int stripeLen) throws IOException {
    int sourceStart = stripeIndex * stripeLen;
    int sourceEnd = sourceStart + stripeLen;
    LocatedBlocks blocks = this.namesystem.getBlockLocations(src,
                                                             0,
                                                             Long.MAX_VALUE);
    List<LocatedBlock> sourceBlocks = blocks.getLocatedBlocks();
    sourceEnd = Math.min(sourceEnd, sourceBlocks.size()); 
    HashMap<String, HashSet<Node>> rackNodes =
      new HashMap<String, HashSet<Node>>();
    if (sourceStart < sourceBlocks.size()) {
      for (LocatedBlock b : sourceBlocks.subList(sourceStart, sourceEnd)) {
        for (Node n : b.getLocations()) {
          String rack = n.getNetworkLocation();
          FSNamesystem.LOG.info("F4: Block info for file: " + src
            + ", offset: " + b.getStartOffset() + ", rack: " + rack);
          HashSet<Node> nodes = rackNodes.get(rack);
          if (nodes == null) {
            nodes = new HashSet<Node>();
            rackNodes.put(rack, nodes);
          }
          nodes.add(n);
        }
      }
    }
    return rackNodes;
  }

  /**
   * This function uses the rackToHosts map (that contains the rack and the
   * corresponding nodes in those racks that contain the relevant blocks).
   *
   * The definition of "relevant blocks" is flexible. It can be used in a
   * variety of contexts. In the F4 placement policy, the relevant blocks
   * are all the peer blocks of the block to be placed. The peer blocks would
   * be all blocks in the raid stripe (data and parity included).
   *
   * It gets the racks that contain the least number of blocks for the stripe.
   * it gets the nodes within those racks and tries one-by-one all such
   * hosts as potential locations for the blocks. The check is based on
   * the host:
   * 1) The host passing the isGoodTarget check.
   * 2) If 1) fails and the "considerLoad" is true, then the same check is
   *    done with considerLoad = false.
   * 3) If 2) fails, then a node is chosen randomly while excluding any hosts
   *    that contain a block in the same stripe as the block to be placed.
   */
  private DatanodeDescriptor[] chooseTargetOnNewFailureDomain(
      String fileName,
      DatanodeDescriptor writer,
      List<DatanodeDescriptor> chosenNodes,
      List<Node> exclNodes,
      HashMap<String, HashSet<Node>> rackToHosts,
      long blockSize) {

    HashMap<Node, Node> excludedNodes = new HashMap<Node, Node>();
    for (String rack : this.clusterMap.getAllRacks()) {
      if (rackToHosts.get(rack) == null) {
        rackToHosts.put(rack, new HashSet<Node>());
      }
    }
    // Get the min occupancy in the racks.
    int minCount = Integer.MAX_VALUE;
    for (Map.Entry<String, HashSet<Node>> entry : rackToHosts.entrySet()) {
      if (entry.getValue().size() < minCount) {
        minCount = entry.getValue().size();
      }
      // DO NOT choose a host that has already been chosen for this stripe.
      for (Node n : entry.getValue()) {
        excludedNodes.put(n, n);
      }
    }

    if (exclNodes != null) {
      for (Node node:exclNodes) {
        excludedNodes.put(node, node);
      }
    }

    HashMap<String, HashSet<Node>> candidateNodesByRacks =
      new HashMap<String, HashSet<Node>>();
    for (Map.Entry<String, HashSet<Node>> entry : rackToHosts.entrySet()) {
      if (entry.getValue().size() == minCount) {
        for (Node n : this.clusterMap.getDatanodesInRack(entry.getKey())) {
          if (excludedNodes.get(n) == null) {
            HashSet<Node> candidateNodes =
              candidateNodesByRacks.get(entry.getKey());
            if (candidateNodes == null) {
              candidateNodes = new HashSet<Node>();
              candidateNodesByRacks.put(entry.getKey(), candidateNodes);
            }
            candidateNodes.add(n);
          }
        }
      }
    }

    List<DatanodeDescriptor> results = new ArrayList<DatanodeDescriptor>();
    if (getGoodNode(candidateNodesByRacks,
                    this.considerLoad,
                    blockSize,
                    results)) {
      return results.toArray(new DatanodeDescriptor[results.size()]);
    }
    if (this.considerLoad) {
      FSNamesystem.LOG.info("F4: Retrying without considering load for file: "
        + fileName);
      if (getGoodNode(candidateNodesByRacks, false, blockSize, results)) {
        return results.toArray(new DatanodeDescriptor[results.size()]);
      }
    }
    FSNamesystem.LOG.error("F4: No datanode in a non-overlapping rack for file:"
      + fileName);
    // Final effort to get something. But it will always try to get something
    // that is not a host that contains a peer block (block in the same stripe)
    // We assume that this step should succeed. In this step all nodes in the
    // cluster are available except for atmost 13 hosts for placement. So it is
    // highly unlikely that this step would fail.
    try {
      super.chooseRandom(
        1, NodeBase.ROOT, excludedNodes, blockSize, 1, results);
      return results.toArray(new DatanodeDescriptor[results.size()]);
    } catch (Exception e) {
      FSNamesystem.LOG.error("F4: Could not find a data node using "
        + "the normal F4 policy. Switching to default of parent");
      return super.chooseTarget(fileName, 1, writer,
                                chosenNodes, null, blockSize);
    }
  }

  private class RackComparator 
      implements Comparator<Map.Entry<String, HashSet<Node>>> {
    public RackComparator(long blockSize) {
      this.blockSize = blockSize;
    }

    public int compare(Map.Entry<String, HashSet<Node>> o1,
                       Map.Entry<String, HashSet<Node>> o2) {
      long ret = 0;
      for (Node node : o1.getValue()) {
        DatanodeDescriptor n = (DatanodeDescriptor)node;
        ret += (n.getRemaining() - (n.getBlocksScheduled() * this.blockSize));
      }
      for (Node node : o2.getValue()) {
        DatanodeDescriptor n = (DatanodeDescriptor)node;
        ret -= (n.getRemaining() - (n.getBlocksScheduled() * this.blockSize));
      }
      return ret == 0 ? 0 : (ret > 0) ? -1 : 1;
    }
    private long blockSize;
  }

  // Helper function to choose less occupied racks first.
  private boolean getGoodNode(
      HashMap<String, HashSet<Node>> candidateNodesByRacks,
      boolean considerLoad,
      long blockSize,
      List<DatanodeDescriptor> results) {
    List<Map.Entry<String, HashSet<Node>>> sorted =
      new ArrayList<Map.Entry<String, HashSet<Node>>>();
    for (Map.Entry<String, HashSet<Node>> entry :
           candidateNodesByRacks.entrySet()) {
      sorted.add(entry);
    }
    Collections.sort(sorted, new RackComparator(blockSize));
    int count = sorted.size() / 4;
    Collections.shuffle(sorted.subList(0, count));
    for (Map.Entry<String, HashSet<Node>> e : sorted) {
      if (getGoodNode(e.getValue(), considerLoad, blockSize, results)) {
        return true;
      }
    }
    return false;
  }
 
  // Helper function to find a good node. Returns true if found.
  private boolean getGoodNode(Set<Node> candidateNodes,
                              boolean considerLoad,
                              long blockSize,
                              List<DatanodeDescriptor> results) {
    List<DatanodeDescriptor> sorted = new ArrayList<DatanodeDescriptor>();
    for (Node n : candidateNodes) {
      sorted.add((DatanodeDescriptor)n);
    }
    final long blocksize = blockSize;
    Collections.sort(sorted, new Comparator<DatanodeDescriptor>() {
      public int compare(DatanodeDescriptor n1, DatanodeDescriptor n2) {
        long ret = (n2.getRemaining() - (n2.getBlocksScheduled() * blocksize)) -
                   (n1.getRemaining() - (n1.getBlocksScheduled() * blocksize));
        return ret == 0 ? 0 : (ret > 0) ? 1 : -1;
      }
    });
    // Also, add some randomness. We are doing so because it seems
    // that if there are many copies scheduled at the same time, namenode
    // does not have the uptodate information. So, we need to add some
    // randomness so that there is not a lot of copies targeted to
    // the same node, which will overload the hosts and may lead to
    // timeouts.
    int count = sorted.size() / 2;
    Collections.shuffle(sorted.subList(0, count));
    for (DatanodeDescriptor n : sorted) {
      if (this.isGoodTarget((DatanodeDescriptor)n,
                            blocksize,
                            1, // MaxTargerPerLoc (per rack)
                            considerLoad,
                            results)) {
        results.add((DatanodeDescriptor)n);
        return true;
      }
    }
    return false;
  }
}