package mil.nga.giat.geowave.datastore.hbase.mapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.ByteArrayRange;
import mil.nga.giat.geowave.core.index.NumericIndexStrategy;
import mil.nga.giat.geowave.core.index.sfc.data.MultiDimensionalNumericData;
import mil.nga.giat.geowave.core.store.DataStoreOperations;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatisticsStore;
import mil.nga.giat.geowave.core.store.adapter.statistics.RowRangeHistogramStatistics;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.query.DistributableQuery;
import mil.nga.giat.geowave.core.store.util.DataStoreUtils;
import mil.nga.giat.geowave.datastore.hbase.operations.BasicHBaseOperations;
import mil.nga.giat.geowave.mapreduce.splits.GeoWaveInputSplit;
import mil.nga.giat.geowave.mapreduce.splits.GeoWaveRowRange;
import mil.nga.giat.geowave.mapreduce.splits.IntermediateSplitInfo;
import mil.nga.giat.geowave.mapreduce.splits.RangeLocationPair;
import mil.nga.giat.geowave.mapreduce.splits.SplitsProvider;
public class HBaseSplitsProvider extends
SplitsProvider
{
private final static Logger LOGGER = LoggerFactory.getLogger(HBaseSplitsProvider.class);
public static GeoWaveRowRange wrapRange(
final ByteArrayRange range ) {
return new HBaseRowRange(
range);
}
public static ByteArrayRange unwrapRange(
final GeoWaveRowRange range ) {
if (range instanceof HBaseRowRange) {
return ((HBaseRowRange) range).getRange();
}
LOGGER.error("HBaseSplitsProvider requires use of HBaseRowRange type.");
return null;
}
@Override
protected GeoWaveRowRange constructRange(
final byte[] startKey,
final boolean isStartKeyInclusive,
final byte[] endKey,
final boolean isEndKeyInclusive ) {
return new HBaseRowRange(
new ByteArrayRange(
new ByteArrayId(
startKey),
new ByteArrayId(
endKey)));
}
@Override
protected GeoWaveRowRange defaultConstructRange() {
return new HBaseRowRange();
}
@Override
protected RangeLocationPair constructRangeLocationPair(
final GeoWaveRowRange range,
final String location,
final double cardinality ) {
return new HBaseRangeLocationPair(
range,
location,
cardinality);
}
@Override
public GeoWaveInputSplit constructInputSplit(
final Map<PrimaryIndex, List<RangeLocationPair>> splitInfo,
final String[] locations ) {
return new GeoWaveHBaseInputSplit(
splitInfo,
locations);
}
public static RangeLocationPair defaultConstructRangeLocationPair() {
return new HBaseRangeLocationPair();
}
@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(
final TreeSet<IntermediateSplitInfo> splits,
final DataStoreOperations operations,
final PrimaryIndex index,
final List<DataAdapter<Object>> adapters,
final Map<PrimaryIndex, RowRangeHistogramStatistics<?>> statsCache,
final AdapterStore adapterStore,
final DataStatisticsStore statsStore,
final Integer maxSplits,
final DistributableQuery query,
final String[] authorizations )
throws IOException {
BasicHBaseOperations hbaseOperations = null;
if (operations instanceof BasicHBaseOperations) {
hbaseOperations = (BasicHBaseOperations) operations;
}
else {
LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
return splits;
}
if ((query != null) && !query.isSupported(index)) {
return splits;
}
final ByteArrayRange fullrange = unwrapRange(getRangeMax(
index,
adapterStore,
statsStore,
authorizations));
final String tableName = index.getId().getString();
final NumericIndexStrategy indexStrategy = index.getIndexStrategy();
// Build list of row ranges from query
List<ByteArrayRange> ranges = new ArrayList<ByteArrayRange>();
final List<ByteArrayRange> constraintRanges;
if (query != null) {
final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(indexStrategy);
if ((maxSplits != null) && (maxSplits > 0)) {
constraintRanges = DataStoreUtils.constraintsToByteArrayRanges(
indexConstraints,
indexStrategy,
maxSplits);
}
else {
constraintRanges = DataStoreUtils.constraintsToByteArrayRanges(
indexConstraints,
indexStrategy,
-1);
}
for (final ByteArrayRange constraintRange : constraintRanges) {
ranges.add(constraintRange);
}
}
else {
ranges.add(fullrange);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Protected range: " + fullrange);
}
}
final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>>();
final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
if (regionLocator == null) {
LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
return splits;
}
while (!ranges.isEmpty()) {
ranges = binRanges(
ranges,
binnedRanges,
regionLocator);
}
for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges
.entrySet()) {
final String hostname = locationEntry.getKey().getHostname();
for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
final Map<PrimaryIndex, List<RangeLocationPair>> splitInfo = new HashMap<PrimaryIndex, List<RangeLocationPair>>();
final List<RangeLocationPair> rangeList = new ArrayList<RangeLocationPair>();
for (final ByteArrayRange range : regionEntry.getValue()) {
GeoWaveRowRange rowRange = wrapRange(range);
final double cardinality = getCardinality(
getHistStats(
index,
adapters,
adapterStore,
statsStore,
statsCache,
authorizations),
rowRange);
if (range.intersects(fullrange)) {
rangeList.add(constructRangeLocationPair(
rowRange,
hostname,
cardinality < 1 ? 1.0 : cardinality));
}
else {
LOGGER.info("Query split outside of range");
}
if (LOGGER.isTraceEnabled()) {
LOGGER.warn("Clipped range: " + rangeList.get(
rangeList.size() - 1).getRange());
}
}
if (!rangeList.isEmpty()) {
splitInfo.put(
index,
rangeList);
splits.add(new IntermediateSplitInfo(
splitInfo,
this));
}
}
}
return splits;
}
private static List<ByteArrayRange> binRanges(
final List<ByteArrayRange> inputRanges,
final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges,
final RegionLocator regionLocator )
throws IOException {
// Loop through ranges, getting RegionLocation and RegionInfo for
// startKey, clipping range by that regionInfo's extent, and leaving
// remainder in the List to be region'd
final ListIterator<ByteArrayRange> i = inputRanges.listIterator();
while (i.hasNext()) {
final ByteArrayRange range = i.next();
final HRegionLocation location = regionLocator.getRegionLocation(range.getStart().getBytes());
Map<HRegionInfo, List<ByteArrayRange>> regionInfoMap = binnedRanges.get(location);
if (regionInfoMap == null) {
regionInfoMap = new HashMap<HRegionInfo, List<ByteArrayRange>>();
binnedRanges.put(
location,
regionInfoMap);
}
final HRegionInfo regionInfo = location.getRegionInfo();
List<ByteArrayRange> rangeList = regionInfoMap.get(regionInfo);
if (rangeList == null) {
rangeList = new ArrayList<ByteArrayRange>();
regionInfoMap.put(
regionInfo,
rangeList);
}
if (regionInfo.containsRange(
range.getStart().getBytes(),
range.getEnd().getBytes())) {
rangeList.add(range);
i.remove();
}
else {
final ByteArrayRange overlappingRange = range.intersection(new ByteArrayRange(
new ByteArrayId(
regionInfo.getStartKey()),
new ByteArrayId(
regionInfo.getEndKey())));
rangeList.add(overlappingRange);
i.remove();
final ByteArrayRange uncoveredRange = new ByteArrayRange(
new ByteArrayId(
regionInfo.getEndKey()),
range.getEnd());
i.add(uncoveredRange);
}
}
return inputRanges;
}
}