/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.util; import java.io.IOException; import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.commoncrawl.crawl.common.internal.CrawlEnvironment; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.google.common.collect.TreeMultimap; /** * * @author rana * */ public class NodeAffinityMaskBuilder { private static final Log LOG = LogFactory.getLog(NodeAffinityMaskBuilder.class); private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); static { NUMBER_FORMAT.setMinimumIntegerDigits(5); NUMBER_FORMAT.setGroupingUsed(false); } public static void setNodeAffinityMask(Configuration jobConfig,String mask) { jobConfig.set("mapred.node.affinity.mask", mask); } public static String getNodeAffinityMask(Configuration jobConfig) { return jobConfig.get("mapred.node.affinity.mask"); } public static String buildNodeAffinityMask(FileSystem fileSystem,Path partFileDirectory,Map<Integer,String> optionalRootMapHint)throws IOException { return buildNodeAffinityMask(fileSystem, partFileDirectory, optionalRootMapHint,null); } public static String buildNodeAffinityMask(FileSystem fileSystem,Path partFileDirectory,Map<Integer,String> optionalRootMapHint,Set<String> excludedNodeList)throws IOException { return buildNodeAffinityMask(fileSystem, partFileDirectory, optionalRootMapHint, excludedNodeList, -1, false); } public static String buildNodeAffinityMask(FileSystem fileSystem,Path partFileDirectory,Map<Integer,String> optionalRootMapHint,Set<String> excludedNodeList,int maxReducersPerNode,boolean skipBalance)throws IOException { TreeMap<Integer,String> partitionToNodeMap = new TreeMap<Integer,String>(); FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory,"part-*")); if (paths.length == 0) { throw new IOException("Invalid source Path:" + partFileDirectory); } Multimap<String,Integer> inverseMap = TreeMultimap.create(); Map<Integer,List<String>> paritionToDesiredCandidateList = new TreeMap<Integer,List<String>>(); // iterate paths for (FileStatus path : paths) { String currentFile = path.getPath().getName(); int partitionNumber; try { if (currentFile.startsWith("part-r")) { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue(); } else { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue(); } } catch (ParseException e) { throw new IOException("Invalid Part Name Encountered:" + currentFile); } // get block locations BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen()); // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node if (optionalRootMapHint != null) { // the host all blocks should reside on String desiredHost = optionalRootMapHint.get(partitionNumber); ArrayList<String> misplacedBlocks = new ArrayList<String>(); // ok walk all blocks for (BlockLocation location : locations) { boolean found = false; for (String host : location.getHosts()) { if (host.compareTo(desiredHost) == 0) { found = true; break; } } if (!found) { misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath() + " did not contain desired location:" + desiredHost); } } // ok pass test at a certain threshold if (misplacedBlocks.size() != 0 && ((float)misplacedBlocks.size() / (float)locations.length) > .50f) { LOG.error("Misplaced Blocks Exceed Threshold"); for (String misplacedBlock : misplacedBlocks) { LOG.error(misplacedBlock); } // TODO: SKIP THIS STEP FOR NOW ??? //throw new IOException("Misplaced Blocks Exceed Threshold!"); } partitionToNodeMap.put(partitionNumber, desiredHost); } else { if (excludedNodeList != null) { // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString()); } // ok ask file system for block locations TreeMap<String,Integer> nodeToBlockCount = new TreeMap<String,Integer>(); for (BlockLocation location : locations) { for (String host : location.getHosts()) { if (excludedNodeList == null || !excludedNodeList.contains(host)) { Integer nodeHitCount = nodeToBlockCount.get(host); if (nodeHitCount == null) { nodeToBlockCount.put(host, 1); } else { nodeToBlockCount.put(host, nodeHitCount.intValue() + 1); } } } } if (nodeToBlockCount.size() == 0) { throw new IOException("No valid nodes found for partition number:" + path); } Map.Entry<String,Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]); Arrays.sort(entries, new Comparator<Map.Entry<String,Integer> >() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { return o1.getValue().intValue() < o2.getValue().intValue() ? 1 : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1; } }); // build a list of nodes by priority ... List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries), new Function<Map.Entry<String,Integer>,String>() { @Override public String apply(Entry<String, Integer> entry) { return entry.getKey(); } }); // stash it away ... paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority); //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue()); partitionToNodeMap.put(partitionNumber, entries[0].getKey()); // store the inverse mapping ... inverseMap.put(entries[0].getKey(),partitionNumber); } } if (skipBalance) { // walk partition map to make sure everything is assigned ... /* for (String node : inverseMap.keys()) { if (inverseMap.get(node).size() > maxReducersPerNode) { throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size()); } } */ } // now if optional root map hint is null if (optionalRootMapHint == null && !skipBalance) { // figure out if there is an imbalance int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size()); int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size()); LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode); // two passes .. for (int pass=0;pass<2;++pass) { LOG.info("Pass:" + pass); // iterate nodes ... for (String node : ImmutableSet.copyOf(inverseMap.keySet())) { // get paritions in map Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node)); // if parition count exceeds desired average ... if (paritions.size() > maxRegionsPerNode) { // first pass, assign based on preference if (pass == 0) { LOG.info("Node:" + node + " parition count:" + paritions.size() +" exceeds avg:" + avgRegionsPerNode); // walk partitions trying to find a node to discrard the parition to for (int partition : paritions) { for (String candidate : paritionToDesiredCandidateList.get(partition)) { if (!candidate.equals(node)) { // see if this candidate has room .. if (inverseMap.get(candidate).size() < avgRegionsPerNode) { LOG.info("REASSIGNING parition:" + partition + " from Node:" + node + " to Node:" + candidate); // found match reassign it ... inverseMap.remove(node,partition); inverseMap.put(candidate, partition); break; } } } // break out if reach our desired number of paritions for this node if (inverseMap.get(node).size() == avgRegionsPerNode) break; } } // second pass ... assign based on least loaded node ... else { int desiredRelocations = paritions.size() - maxRegionsPerNode; LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations + " partitions:"+ paritions.size()); for (int i=0;i<desiredRelocations;++i) { String leastLoadedNode = null; int leastLoadedNodePartitionCount = 0; for (String candidateNode : inverseMap.keySet()) { if (leastLoadedNode == null || inverseMap.get(candidateNode).size() < leastLoadedNodePartitionCount) { leastLoadedNode = candidateNode; leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size(); } } int bestPartition = -1; int bestParitionOffset = -1; for (int candidateParition : inverseMap.get(node)) { int offset = 0; for (String nodeCandidate : paritionToDesiredCandidateList.get(candidateParition)) { if (nodeCandidate.equals(leastLoadedNode)) { if (bestPartition == -1 || bestParitionOffset > offset) { bestPartition = candidateParition; bestParitionOffset = offset; } break; } offset++; } } if (bestPartition == -1) { bestPartition = Iterables.get(inverseMap.get(node), 0); } LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node + " to Node:" + leastLoadedNode); // found match reassign it ... inverseMap.remove(node,bestPartition); inverseMap.put(leastLoadedNode, bestPartition); } } } } } LOG.info("Rebuilding parition to node map based on ideal balance"); for (String node : inverseMap.keySet()) { LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:" + inverseMap.get(node).toString()); } partitionToNodeMap.clear(); for (Map.Entry<String, Integer> entry : inverseMap.entries()) { partitionToNodeMap.put(entry.getValue(), entry.getKey()); } } StringBuilder builder = new StringBuilder(); int itemCount =0; for (Map.Entry<Integer,String> entry : partitionToNodeMap.entrySet()) { if (itemCount++ != 0) builder.append("\t"); builder.append(entry.getKey().intValue()+"," +entry.getValue()); } return builder.toString(); } static public Map<Integer,String> parseAffinityMask(String mask) { HashMap<Integer,String> mapOut =new HashMap<Integer,String>(); String parts[] = mask.split("\t"); for (String part : parts) { String partitionAndHostName[] = part.split(","); if (partitionAndHostName.length == 2) { mapOut.put(Integer.parseInt(partitionAndHostName[0]),partitionAndHostName[1]); } } return mapOut; } public static void main(String[] args) { LOG.info("Initializing Hadoop Config"); Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("mapred-site.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("commoncrawl-default.xml"); conf.addResource("commoncrawl-site.xml"); CrawlEnvironment.setHadoopConfig(conf); CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/"); try { FileSystem fs = CrawlEnvironment.getDefaultFileSystem(); String affinityMask = buildNodeAffinityMask(fs,new Path(args[0]),null,Sets.newHashSet("ccd001.commoncrawl.org")); Map<Integer,String> affinityMap = parseAffinityMask(affinityMask); for (Map.Entry<Integer,String> entry : affinityMap.entrySet()){ LOG.info("Parition:" + entry.getKey().intValue() + " Host:" + entry.getValue()); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } } }