/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.hbase.wd;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import java.io.IOException;
import java.util.Arrays;
/**
* Defines the way row keys are distributed.
*/
public abstract class AbstractRowKeyDistributor implements Parametrizable {
public abstract byte[] getDistributedKey(byte[] originalKey);
public abstract byte[] getOriginalKey(byte[] adjustedKey);
public abstract byte[][] getAllDistributedKeys(byte[] originalKey);
/**
* Gets all distributed intervals based on the original start & stop keys.
* Used when scanning all buckets based on start/stop row keys. Should return keys so that all buckets in which
* records between originalStartKey and originalStopKey were distributed are "covered".
* @param originalStartKey start key
* @param originalStopKey stop key
* @return array[Pair(startKey, stopKey)]
*/
@SuppressWarnings("unchecked")
public Pair<byte[], byte[]>[] getDistributedIntervals(byte[] originalStartKey, byte[] originalStopKey) {
byte[][] startKeys = getAllDistributedKeys(originalStartKey);
byte[][] stopKeys;
if (Arrays.equals(originalStopKey, HConstants.EMPTY_END_ROW)) {
Arrays.sort(startKeys, Bytes.BYTES_RAWCOMPARATOR);
// stop keys are the start key of the next interval
stopKeys = getAllDistributedKeys(HConstants.EMPTY_BYTE_ARRAY);
Arrays.sort(stopKeys, Bytes.BYTES_RAWCOMPARATOR);
for (int i = 0; i < stopKeys.length - 1; i++) {
stopKeys[i] = stopKeys[i + 1];
}
stopKeys[stopKeys.length - 1] = HConstants.EMPTY_END_ROW;
} else {
stopKeys = getAllDistributedKeys(originalStopKey);
assert stopKeys.length == startKeys.length;
}
Pair<byte[], byte[]>[] intervals = new Pair[startKeys.length];
for (int i = 0; i < startKeys.length; i++) {
intervals[i] = new Pair<>(startKeys[i], stopKeys[i]);
}
return intervals;
}
public final Scan[] getDistributedScans(Scan original) throws IOException {
Pair<byte[], byte[]>[] intervals = getDistributedIntervals(original.getStartRow(), original.getStopRow());
Scan[] scans = new Scan[intervals.length];
for (int i = 0; i < intervals.length; i++) {
scans[i] = new Scan(original);
scans[i].setStartRow(intervals[i].getFirst());
scans[i].setStopRow(intervals[i].getSecond());
}
return scans;
}
public void addInfo(Configuration conf) {
conf.set(WdTableInputFormat.ROW_KEY_DISTRIBUTOR_CLASS, this.getClass().getCanonicalName());
String paramsToStore = getParamsToStore();
if (paramsToStore != null) {
conf.set(WdTableInputFormat.ROW_KEY_DISTRIBUTOR_PARAMS, paramsToStore);
}
}
}