package mil.nga.giat.geowave.datastore.hbase.query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
import org.apache.hadoop.hbase.filter.MultiRowRangeFilter.RowRange;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterators;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.ByteArrayRange;
import mil.nga.giat.geowave.core.index.IndexUtils;
import mil.nga.giat.geowave.core.index.MultiDimensionalCoordinateRangesArray;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.core.store.CloseableIterator;
import mil.nga.giat.geowave.core.store.CloseableIteratorWrapper;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.adapter.RowMergingDataAdapter;
import mil.nga.giat.geowave.core.store.callback.ScanCallback;
import mil.nga.giat.geowave.core.store.filter.DistributableQueryFilter;
import mil.nga.giat.geowave.core.store.filter.QueryFilter;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.query.FilteredIndexQuery;
import mil.nga.giat.geowave.datastore.hbase.operations.BasicHBaseOperations;
import mil.nga.giat.geowave.datastore.hbase.util.HBaseEntryIteratorWrapper;
import mil.nga.giat.geowave.datastore.hbase.util.HBaseUtils;
import mil.nga.giat.geowave.datastore.hbase.util.HBaseUtils.MultiScannerClosableWrapper;
import mil.nga.giat.geowave.datastore.hbase.util.MergingEntryIterator;
public abstract class HBaseFilteredIndexQuery extends
HBaseQuery implements
FilteredIndexQuery
{
protected final ScanCallback<?> scanCallback;
protected List<QueryFilter> clientFilters;
private final static Logger LOGGER = LoggerFactory.getLogger(HBaseFilteredIndexQuery.class);
private boolean hasSkippingFilter = false;
public HBaseFilteredIndexQuery(
final List<ByteArrayId> adapterIds,
final PrimaryIndex index,
final ScanCallback<?> scanCallback,
final Pair<List<String>, DataAdapter<?>> fieldIds,
final String... authorizations ) {
super(
adapterIds,
index,
fieldIds,
authorizations);
this.scanCallback = scanCallback;
}
@Override
public void setClientFilters(
final List<QueryFilter> clientFilters ) {
this.clientFilters = clientFilters;
}
private boolean validateAdapters(
final BasicHBaseOperations operations )
throws IOException {
if ((adapterIds == null) || adapterIds.isEmpty()) {
return true;
}
final Iterator<ByteArrayId> i = adapterIds.iterator();
while (i.hasNext()) {
final ByteArrayId adapterId = i.next();
if (!operations.columnFamilyExists(
index.getId().getString(),
adapterId.getString())) {
i.remove();
}
}
if (adapterIds.isEmpty()) {
return false;
}
return true;
}
public CloseableIterator<Object> query(
final BasicHBaseOperations operations,
final AdapterStore adapterStore,
final double[] maxResolutionSubsamplingPerDimension,
final Integer limit ) {
return internalQuery(
operations,
adapterStore,
maxResolutionSubsamplingPerDimension,
limit,
true);
}
protected CloseableIterator<Object> internalQuery(
final BasicHBaseOperations operations,
final AdapterStore adapterStore,
final double[] maxResolutionSubsamplingPerDimension,
final Integer limit,
final boolean decodePersistenceEncoding ) {
try {
if (!validateAdapters(operations)) {
LOGGER.warn("Query contains no valid adapters.");
return new CloseableIterator.Empty();
}
if (!operations.tableExists(StringUtils.stringFromBinary(index.getId().getBytes()))) {
LOGGER.warn("Table does not exist " + StringUtils.stringFromBinary(index.getId().getBytes()));
return new CloseableIterator.Empty();
}
}
catch (final IOException ex) {
LOGGER.warn(
"Unabe to check if " + StringUtils.stringFromBinary(index.getId().getBytes()) + " table exists",
ex);
return new CloseableIterator.Empty();
}
final String tableName = StringUtils.stringFromBinary(index.getId().getBytes());
final List<Iterator<Result>> resultsIterators = new ArrayList<Iterator<Result>>();
final List<ResultScanner> results = new ArrayList<ResultScanner>();
if (isBigtable()) {
final List<Scan> scanners = getScannerList(limit);
for (final Scan scanner : scanners) {
try {
final ResultScanner rs = operations.getScannedResults(
scanner,
tableName,
authorizations);
if (rs != null) {
results.add(rs);
final Iterator<Result> it = rs.iterator();
if (it.hasNext()) {
resultsIterators.add(it);
}
}
}
catch (final IOException e) {
LOGGER.warn("Could not get the results from scanner " + e);
}
}
}
else {
final FilterList filterList = new FilterList();
final Scan multiScanner = getMultiScanner(
filterList,
limit,
maxResolutionSubsamplingPerDimension);
if (isEnableCustomFilters()) {
// Add skipping filter if requested
hasSkippingFilter = false;
if (maxResolutionSubsamplingPerDimension != null) {
if (maxResolutionSubsamplingPerDimension.length != index
.getIndexStrategy()
.getOrderedDimensionDefinitions().length) {
LOGGER.warn("Unable to subsample for table '" + tableName + "'. Subsample dimensions = "
+ maxResolutionSubsamplingPerDimension.length + " when indexed dimensions = "
+ index.getIndexStrategy().getOrderedDimensionDefinitions().length);
}
else {
final int cardinalityToSubsample = IndexUtils.getBitPositionFromSubsamplingArray(
index.getIndexStrategy(),
maxResolutionSubsamplingPerDimension);
final FixedCardinalitySkippingFilter skippingFilter = new FixedCardinalitySkippingFilter(
cardinalityToSubsample);
filterList.addFilter(skippingFilter);
hasSkippingFilter = true;
}
}
// Add distributable filters if requested, this has to be last
// in the filter list for the dedupe filter to work correctly
final List<DistributableQueryFilter> distFilters = getDistributableFilters();
if ((distFilters != null) && !distFilters.isEmpty()) {
final HBaseDistributableFilter hbdFilter = new HBaseDistributableFilter();
hbdFilter.init(
distFilters,
index.getIndexModel());
filterList.addFilter(hbdFilter);
}
else {
final List<MultiDimensionalCoordinateRangesArray> coords = getCoordinateRanges();
if ((coords != null) && !coords.isEmpty()) {
final HBaseNumericIndexStrategyFilter numericIndexFilter = new HBaseNumericIndexStrategyFilter(
index.getIndexStrategy(),
coords.toArray(new MultiDimensionalCoordinateRangesArray[] {}));
filterList.addFilter(numericIndexFilter);
}
}
}
if (!filterList.getFilters().isEmpty()) {
multiScanner.setFilter(filterList);
}
try {
final ResultScanner rs = operations.getScannedResults(
multiScanner,
tableName,
authorizations);
if (rs != null) {
results.add(rs);
final Iterator<Result> it = rs.iterator();
if (it.hasNext()) {
resultsIterators.add(it);
}
}
}
catch (final IOException e) {
LOGGER.warn(
"Could not get the results from scanner",
e);
}
}
if (results.iterator().hasNext()) {
Iterator it = initIterator(
adapterStore,
Iterators.concat(resultsIterators.iterator()),
maxResolutionSubsamplingPerDimension,
decodePersistenceEncoding);
if ((limit != null) && (limit > 0)) {
it = Iterators.limit(
it,
limit);
}
return new CloseableIteratorWrapper(
new MultiScannerClosableWrapper(
results),
it);
}
LOGGER.error("Results were empty");
return new CloseableIterator.Empty();
}
private boolean isEnableCustomFilters() {
return (options != null && options.isEnableCustomFilters());
}
private boolean isBigtable() {
return (options != null && options.isBigTable());
}
// Bigtable does not support MultiRowRangeFilters. This method returns a
// single scan per range
protected List<Scan> getScannerList(
final Integer limit ) {
List<ByteArrayRange> ranges = getRanges();
if ((ranges == null) || ranges.isEmpty()) {
ranges = Collections.singletonList(new ByteArrayRange(
null,
null));
}
final List<Scan> scanners = new ArrayList<Scan>();
if ((ranges != null) && (ranges.size() > 0)) {
for (final ByteArrayRange range : ranges) {
final Scan scanner = createStandardScanner(limit);
if (range.getStart() != null) {
scanner.setStartRow(range.getStart().getBytes());
if (!range.isSingleValue()) {
scanner.setStopRow(HBaseUtils.getNextPrefix(range.getEnd().getBytes()));
}
else {
scanner.setStopRow(HBaseUtils.getNextPrefix(range.getStart().getBytes()));
}
}
scanners.add(scanner);
}
}
return scanners;
}
// Default (not Bigtable) case - use a single multi-row-range filter
protected Scan getMultiScanner(
final FilterList filterList,
final Integer limit,
final double[] maxResolutionSubsamplingPerDimension ) {
// Single scan w/ multiple ranges
final Scan multiScanner = createStandardScanner(limit);
final List<ByteArrayRange> ranges = getRanges();
final MultiRowRangeFilter filter = getMultiRowRangeFilter(ranges);
if (filter != null) {
filterList.addFilter(filter);
final List<RowRange> rowRanges = filter.getRowRanges();
multiScanner.setStartRow(rowRanges.get(
0).getStartRow());
final RowRange stopRowRange = rowRanges.get(rowRanges.size() - 1);
byte[] stopRowExclusive;
if (stopRowRange.isStopRowInclusive()) {
// because the end is always exclusive, to make an inclusive
// stop row into exlusive all we need to do is add a traling 0
stopRowExclusive = new byte[stopRowRange.getStopRow().length + 1];
System.arraycopy(
stopRowRange.getStopRow(),
0,
stopRowExclusive,
0,
stopRowExclusive.length - 1);
}
else {
stopRowExclusive = stopRowRange.getStopRow();
}
multiScanner.setStopRow(stopRowExclusive);
}
return multiScanner;
}
protected Scan createStandardScanner(
final Integer limit ) {
final Scan scanner = new Scan();
// Performance tuning per store options
scanner.setCaching(getScanCacheSize());
scanner.setCacheBlocks(isEnableBlockCache());
// Only return the most recent version
scanner.setMaxVersions(1);
if ((adapterIds != null) && !adapterIds.isEmpty()) {
for (final ByteArrayId adapterId : adapterIds) {
scanner.addFamily(adapterId.getBytes());
}
}
if ((limit != null) && (limit > 0) && (limit < scanner.getBatch())) {
scanner.setBatch(limit);
}
return scanner;
}
private int getScanCacheSize() {
if (options != null) {
if (options.getScanCacheSize() != HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING) {
return options.getScanCacheSize();
}
}
// Need to get default from config.
return 10000;
}
private boolean isEnableBlockCache() {
if (options != null) {
return options.isEnableBlockCache();
}
return true;
}
protected MultiRowRangeFilter getMultiRowRangeFilter(
final List<ByteArrayRange> ranges ) {
// create the multi-row filter
final List<RowRange> rowRanges = new ArrayList<RowRange>();
if ((ranges == null) || ranges.isEmpty()) {
rowRanges.add(new RowRange(
HConstants.EMPTY_BYTE_ARRAY,
true,
HConstants.EMPTY_BYTE_ARRAY,
false));
}
else {
for (final ByteArrayRange range : ranges) {
if (range.getStart() != null) {
final byte[] startRow = range.getStart().getBytes();
byte[] stopRow;
if (!range.isSingleValue()) {
stopRow = HBaseUtils.getNextPrefix(range.getEnd().getBytes());
}
else {
stopRow = HBaseUtils.getNextPrefix(range.getStart().getBytes());
}
final RowRange rowRange = new RowRange(
startRow,
true,
stopRow,
false);
rowRanges.add(rowRange);
}
}
}
// Create the multi-range filter
try {
return new MultiRowRangeFilter(
rowRanges);
}
catch (final IOException e) {
LOGGER.error(
"Error creating range filter.",
e);
}
return null;
}
// Override this (see HBaseConstraintsQuery)
protected List<DistributableQueryFilter> getDistributableFilters() {
return null;
}
// Override this (see HBaseConstraintsQuery)
protected List<MultiDimensionalCoordinateRangesArray> getCoordinateRanges() {
return null;
}
protected Iterator initIterator(
final AdapterStore adapterStore,
final Iterator<Result> resultsIterator,
final double[] maxResolutionSubsamplingPerDimension,
final boolean decodePersistenceEncoding ) {
// TODO Since currently we are not supporting server side
// iterator/coprocessors, we also cannot run
// server side filters and hence they have to run on clients itself. So
// need to add server side filters also in list of client filters.
final List<QueryFilter> filters = getAllFiltersList();
final QueryFilter queryFilter = filters.isEmpty() ? null : filters.size() == 1 ? filters.get(0)
: new mil.nga.giat.geowave.core.store.filter.FilterList<QueryFilter>(
filters);
final Map<ByteArrayId, RowMergingDataAdapter> mergingAdapters = new HashMap<ByteArrayId, RowMergingDataAdapter>();
for (final ByteArrayId adapterId : adapterIds) {
final DataAdapter adapter = adapterStore.getAdapter(adapterId);
if ((adapter instanceof RowMergingDataAdapter)
&& (((RowMergingDataAdapter) adapter).getTransform() != null)) {
mergingAdapters.put(
adapterId,
(RowMergingDataAdapter) adapter);
}
}
if (mergingAdapters.isEmpty()) {
return new HBaseEntryIteratorWrapper(
adapterStore,
index,
resultsIterator,
queryFilter,
scanCallback,
fieldIds,
maxResolutionSubsamplingPerDimension,
decodePersistenceEncoding,
hasSkippingFilter);
}
else {
return new MergingEntryIterator(
adapterStore,
index,
resultsIterator,
queryFilter,
scanCallback,
mergingAdapters,
fieldIds,
maxResolutionSubsamplingPerDimension,
hasSkippingFilter);
}
}
protected List<QueryFilter> getAllFiltersList() {
// This method is so that it can be overridden to also add distributed
// filter list
final List<QueryFilter> filters = new ArrayList<QueryFilter>();
filters.addAll(clientFilters);
return filters;
}
}