/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.zebra.io; import java.util.Comparator; import java.util.Iterator; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import java.io.IOException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.zebra.tfile.RawComparable; import org.apache.hadoop.zebra.tfile.ByteArray; /** * Class used to convey the information of how on-disk data are distributed * among key-partitioned buckets. This class is used by the MapReduce layer to * calculate intelligent splits. */ public class KeyDistribution { private long uniqueBytes; private long minStepSize = -1; private SortedMap<RawComparable, BlockDistribution> data; KeyDistribution(Comparator<? super RawComparable> comparator) { data = new TreeMap<RawComparable, BlockDistribution>(comparator); } void add(RawComparable key) { data.put(key, null); } void add(RawComparable key, BlockDistribution bucket) { uniqueBytes += bucket.getLength(); data.put(key, BlockDistribution.sum(data.get(key), bucket)); } void setMinStepSize(long minStepSize) { this.minStepSize = minStepSize; } /** * Get the total unique bytes contained in the key-partitioned buckets. * * @return The total number of bytes contained in the key-partitioned buckets. */ public long length() { return uniqueBytes; } /** * Get the size of the key sampling. * * @return Number of key samples. */ public int size() { return data.size(); } /** * Get the minimum split step size from all tables in union */ public long getMinStepSize() { return minStepSize; } /** Get the list of sampling keys * * @return A list of sampling keys */ public RawComparable[] getKeys() { RawComparable[] ret = new RawComparable[data.size()]; return data.keySet().toArray(ret); } public BlockDistribution getBlockDistribution(RawComparable key) { return data.get(key); } /** * Merge the key samples * * Algorithm: select the smallest key from all clean source ranges and ranges subsequent to * respective dirty ranges. A dirty range is a range that has been partially needed * by one or more of the previous final ranges. * * @param sourceKeys * key samples to be merged * @return the merged key samples */ public static KeyDistribution merge(KeyDistribution[] sourceKeys) throws IOException { if (sourceKeys == null || sourceKeys.length == 0) return null; int srcSize = sourceKeys.length; if (srcSize == 1) return sourceKeys[0]; Comparator<? super RawComparable> comp = sourceKeys[0].data.comparator(); // TODO check the identical comparators used in the source keys /* for (int i = 1; i < srcSize; i++) if (!comp.equals(sourceKeys[i].data.comparator())) throw new IOException("Incompatible sort keys found:" + comp.toString() + " vs. "+ sourceKeys[i].data.comparator().toString()); */ KeyDistribution result = new KeyDistribution(comp); result.minStepSize = sourceKeys[0].minStepSize; for (int i = 1; i < srcSize; i++) if (result.minStepSize > sourceKeys[i].minStepSize) result.minStepSize = sourceKeys[i].minStepSize; RawComparable[][] its = new RawComparable[srcSize][]; for (int i = 0; i < srcSize; i++) its[i] = sourceKeys[i].getKeys(); RawComparable min, current; int minIndex = -1; int[] index = new int[srcSize]; boolean[] dirty = new boolean[srcSize]; while (true) { min = null; BlockDistribution bd = new BlockDistribution(); for (int i = 0; i < srcSize; i++) { if (index[i] >= its[i].length) continue; current = its[i][index[i]]; bd.add(sourceKeys[i].getBlockDistribution(current)); if (min == null || comp.compare(min, current) > 0) { min = current; minIndex = i; } } if (min == null) break; result.add(min, bd); for (int i = 0; i < srcSize; i++) { if (index[i] >= its[i].length) continue; current = its[i][index[i]]; if (i != minIndex) { if (comp.compare(min, current) != 0) { if (!dirty[i]) { dirty[i] = true; index[i]++; } else if (comp.compare(min, its[i][index[i] - 1]) > 0 ) index[i]++; } else { if (dirty[i]) dirty[i] = false; index[i]++; } } else { if (dirty[i]) dirty[i] = false; index[i]++; } } } return result; } public int resize(BlockDistribution lastBd) { Iterator<Map.Entry<RawComparable, BlockDistribution>> it = data.entrySet().iterator(); KeyDistribution adjusted = new KeyDistribution(data.comparator()); long realSize = 0, mySize = 0; RawComparable key = null; BlockDistribution bd = null, bd0 = null; while (it.hasNext()) { Map.Entry<RawComparable, BlockDistribution> mapEntry = it.next(); bd0 = mapEntry.getValue(); mySize = bd0.getLength(); if (realSize >= minStepSize/2 || (realSize + mySize >= minStepSize*ColumnGroup.SPLIT_SLOP && realSize >= minStepSize * (ColumnGroup.SPLIT_SLOP-1))) { adjusted.add(key, bd); bd = null; realSize = 0; } key = mapEntry.getKey(); realSize += mySize; bd = BlockDistribution.sum(bd, bd0); } if (bd != null) { realSize += lastBd.getLength(); if (realSize >= minStepSize/2 || adjusted.size() == 0) { // the last plus would contain more than liked, don't merge them. adjusted.add(key, bd); } else BlockDistribution.sum(lastBd, bd); } swap(adjusted); return data.size(); } private void swap(KeyDistribution other) { long tmp = minStepSize; minStepSize = other.minStepSize; other.minStepSize = tmp; SortedMap<RawComparable, BlockDistribution> tmp2 = data; data = other.data; other.data = tmp2; } }