/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.zebra.io;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.hadoop.fs.BlockLocation;
/**
* Class used to convey the information of how on-disk data that fall in a
* specific split are distributed across hosts. This class is used by the
* MapReduce layer to calculate intelligent splits.
*
* @see BasicTable.Reader#getBlockDistribution(BasicTable.Reader.RangeSplit)
* @see KeyDistribution#getBlockDistribution(BytesWritable)
*/
public class BlockDistribution {
private long uniqueBytes;
private Map<String, Long> dataDistri; // map from host names to bytes.
public BlockDistribution() {
dataDistri = new HashMap<String, Long>();
}
void add(long bytes, Map<String, Long> distri) {
this.uniqueBytes += bytes;
reduceDataDistri(dataDistri, distri);
}
static void reduceDataDistri(Map<String, Long> lv, Map<String, Long> rv) {
for (Iterator<Map.Entry<String, Long>> it = rv.entrySet().iterator(); it
.hasNext();) {
Map.Entry<String, Long> e = it.next();
String key = e.getKey();
Long sum = lv.get(key);
Long delta = e.getValue();
lv.put(key, (sum == null) ? delta : sum + delta);
}
}
void add(BlockLocation blkLocation) throws IOException {
long blkLen = blkLocation.getLength();
Map<String, Long> tmp = new HashMap<String, Long>();
for (String host : blkLocation.getHosts()) {
tmp.put(host, blkLen);
}
add(blkLen, tmp);
}
/**
* Add a partial block.
*
* @param blkLocation
* @param length
* @throws IOException
*/
void add(BlockLocation blkLocation, long length) throws IOException {
Map<String, Long> tmp = new HashMap<String, Long>();
for (String host : blkLocation.getHosts()) {
tmp.put(host, length);
}
add(length, tmp);
}
/**
* Add another block distribution to this one.
*
* @param other
* The other block distribution.
*/
public void add(BlockDistribution other) {
add(other.uniqueBytes, other.dataDistri);
}
/**
* Sum up two block distributions together.
*
* @param a
* first block distribution
* @param b
* second block distribution
* @return aggregated block distribution. The input objects may no longer be
* held.
*/
public static BlockDistribution sum(BlockDistribution a, BlockDistribution b) {
if (a == null) return b;
if (b == null) return a;
a.add(b);
return a;
}
/**
* Get the total number of bytes of all the blocks.
*
* @return total number of bytes for the blocks.
*/
public long getLength() {
return uniqueBytes;
}
/**
* Get up to n hosts that own the most bytes.
*
* @param n
* targeted number of hosts.
* @return A list of host names (up to n).
*/
@SuppressWarnings("unchecked")
public String[] getHosts(int n) {
Set<Map.Entry<String, Long>> entrySet = dataDistri.entrySet();
Map.Entry<String, Long>[] hostSize =
entrySet.toArray(new Map.Entry[entrySet.size()]);
Arrays.sort(hostSize, new Comparator<Map.Entry<String, Long>>() {
@Override
public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
long diff = o1.getValue() - o2.getValue();
if (diff < 0) return 1;
if (diff > 0) return -1;
return 0;
}
});
int nHost = Math.min(hostSize.length, n);
String[] ret = new String[nHost];
for (int i = 0; i < nHost; ++i) {
ret[i] = hostSize[i].getKey();
}
return ret;
}
}