/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Map.Entry; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; /** * This is a generic region split calculator. It requires Ranges that provide * start, end, and a comparator. It works in two phases -- the first adds ranges * and rejects backwards ranges. Then one calls calcRegions to generate the * multimap that has a start split key as a key and possibly multiple Ranges as * members. * * To traverse, one normally would get the split set, and iterate through the * calcRegions. Normal regions would have only one entry, holes would have zero, * and any overlaps would have multiple entries. * * The interface is a bit cumbersome currently but is exposed this way so that * clients can choose how to iterate through the region splits. * * @param <R> */ @InterfaceAudience.Private public class RegionSplitCalculator<R extends KeyRange> { final static Log LOG = LogFactory.getLog(RegionSplitCalculator.class); private final Comparator<R> rangeCmp; /** * This contains a sorted set of all the possible split points * * Invariant: once populated this has 0 entries if empty or at most n+1 values * where n == number of added ranges. */ private final TreeSet<byte[]> splits = new TreeSet<byte[]>(BYTES_COMPARATOR); /** * This is a map from start key to regions with the same start key. * * Invariant: This always have n values in total */ private final Multimap<byte[], R> starts = ArrayListMultimap.create(); /** * SPECIAL CASE */ private final static byte[] ENDKEY = null; public RegionSplitCalculator(Comparator<R> cmp) { rangeCmp = cmp; } public final static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator() { @Override public int compare(byte[] l, byte[] r) { if (l == null && r == null) return 0; if (l == null) return 1; if (r == null) return -1; return super.compare(l, r); } }; /** * SPECIAL CASE wrapper for empty end key * * @return ENDKEY if end key is empty, else normal endkey. */ private static <R extends KeyRange> byte[] specialEndKey(R range) { byte[] end = range.getEndKey(); if (end.length == 0) { return ENDKEY; } return end; } /** * Adds an edge to the split calculator * * @return true if is included, false if backwards/invalid */ public boolean add(R range) { byte[] start = range.getStartKey(); byte[] end = specialEndKey(range); if (end != ENDKEY && Bytes.compareTo(start, end) > 0) { // don't allow backwards edges LOG.debug("attempted to add backwards edge: " + Bytes.toString(start) + " " + Bytes.toString(end)); return false; } splits.add(start); splits.add(end); starts.put(start, range); return true; } /** * Generates a coverage multimap from split key to Regions that start with the * split key. * * @return coverage multimap */ public Multimap<byte[], R> calcCoverage() { // This needs to be sorted to force the use of the comparator on the values, // otherwise byte array comparison isn't used Multimap<byte[], R> regions = TreeMultimap.create(BYTES_COMPARATOR, rangeCmp); // march through all splits from the start points for (Entry<byte[], Collection<R>> start : starts.asMap().entrySet()) { byte[] key = start.getKey(); for (R r : start.getValue()) { regions.put(key, r); for (byte[] coveredSplit : splits.subSet(r.getStartKey(), specialEndKey(r))) { regions.put(coveredSplit, r); } } } return regions; } public TreeSet<byte[]> getSplits() { return splits; } public Multimap<byte[], R> getStarts() { return starts; } /** * Find specified number of top ranges in a big overlap group. * It could return less if there are not that many top ranges. * Once these top ranges are excluded, the big overlap group will * be broken into ranges with no overlapping, or smaller overlapped * groups, and most likely some holes. * * @param bigOverlap a list of ranges that overlap with each other * @param count the max number of ranges to find * @return a list of ranges that overlap with most others */ public static <R extends KeyRange> List<R> findBigRanges(Collection<R> bigOverlap, int count) { List<R> bigRanges = new ArrayList<R>(); // The key is the count of overlaps, // The value is a list of ranges that have that many overlaps TreeMap<Integer, List<R>> overlapRangeMap = new TreeMap<Integer, List<R>>(); for (R r: bigOverlap) { // Calculates the # of overlaps for each region // and populates rangeOverlapMap byte[] startKey = r.getStartKey(); byte[] endKey = specialEndKey(r); int overlappedRegions = 0; for (R rr: bigOverlap) { byte[] start = rr.getStartKey(); byte[] end = specialEndKey(rr); if (BYTES_COMPARATOR.compare(startKey, end) < 0 && BYTES_COMPARATOR.compare(endKey, start) > 0) { overlappedRegions++; } } // One region always overlaps with itself, // so overlappedRegions should be more than 1 // for actual overlaps. if (overlappedRegions > 1) { Integer key = Integer.valueOf(overlappedRegions); List<R> ranges = overlapRangeMap.get(key); if (ranges == null) { ranges = new ArrayList<R>(); overlapRangeMap.put(key, ranges); } ranges.add(r); } } int toBeAdded = count; for (Integer key: overlapRangeMap.descendingKeySet()) { List<R> chunk = overlapRangeMap.get(key); int chunkSize = chunk.size(); if (chunkSize <= toBeAdded) { bigRanges.addAll(chunk); toBeAdded -= chunkSize; if (toBeAdded > 0) continue; } else { // Try to use the middle chunk in case the overlapping is // chained, for example: [a, c), [b, e), [d, g), [f h)... // In such a case, sideline the middle chunk will break // the group efficiently. int start = (chunkSize - toBeAdded)/2; int end = start + toBeAdded; for (int i = start; i < end; i++) { bigRanges.add(chunk.get(i)); } } break; } return bigRanges; } }