/** * */ package mil.nga.giat.geowave.datastore.hbase; import java.io.Closeable; import java.io.Flushable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Iterators; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.core.index.StringUtils; import mil.nga.giat.geowave.core.store.CloseableIterator; import mil.nga.giat.geowave.core.store.CloseableIteratorWrapper; import mil.nga.giat.geowave.core.store.DataStoreOperations; import mil.nga.giat.geowave.core.store.DataStoreOptions; import mil.nga.giat.geowave.core.store.IndexWriter; import mil.nga.giat.geowave.core.store.adapter.AdapterIndexMappingStore; import mil.nga.giat.geowave.core.store.adapter.AdapterStore; import mil.nga.giat.geowave.core.store.adapter.DataAdapter; import mil.nga.giat.geowave.core.store.adapter.WritableDataAdapter; import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatisticsStore; import mil.nga.giat.geowave.core.store.adapter.statistics.DuplicateEntryCount; import mil.nga.giat.geowave.core.store.base.BaseDataStore; import mil.nga.giat.geowave.core.store.base.DataStoreEntryInfo; import mil.nga.giat.geowave.core.store.callback.IngestCallback; import mil.nga.giat.geowave.core.store.callback.ScanCallback; import mil.nga.giat.geowave.core.store.filter.DedupeFilter; import mil.nga.giat.geowave.core.store.index.IndexMetaDataSet; import mil.nga.giat.geowave.core.store.index.IndexStore; import mil.nga.giat.geowave.core.store.index.PrimaryIndex; import mil.nga.giat.geowave.core.store.query.DistributableQuery; import mil.nga.giat.geowave.core.store.query.Query; import mil.nga.giat.geowave.core.store.query.QueryOptions; import mil.nga.giat.geowave.datastore.hbase.index.secondary.HBaseSecondaryIndexDataStore; import mil.nga.giat.geowave.datastore.hbase.io.HBaseWriter; import mil.nga.giat.geowave.datastore.hbase.mapreduce.GeoWaveHBaseRecordReader; import mil.nga.giat.geowave.datastore.hbase.mapreduce.HBaseSplitsProvider; import mil.nga.giat.geowave.datastore.hbase.metadata.HBaseAdapterIndexMappingStore; import mil.nga.giat.geowave.datastore.hbase.metadata.HBaseAdapterStore; import mil.nga.giat.geowave.datastore.hbase.metadata.HBaseDataStatisticsStore; import mil.nga.giat.geowave.datastore.hbase.metadata.HBaseIndexStore; import mil.nga.giat.geowave.datastore.hbase.operations.BasicHBaseOperations; import mil.nga.giat.geowave.datastore.hbase.operations.config.HBaseOptions; import mil.nga.giat.geowave.datastore.hbase.query.HBaseConstraintsQuery; import mil.nga.giat.geowave.datastore.hbase.query.HBaseRowIdsQuery; import mil.nga.giat.geowave.datastore.hbase.query.HBaseRowPrefixQuery; import mil.nga.giat.geowave.datastore.hbase.query.SingleEntryFilter; import mil.nga.giat.geowave.datastore.hbase.util.HBaseEntryIteratorWrapper; import mil.nga.giat.geowave.datastore.hbase.util.HBaseUtils; import mil.nga.giat.geowave.datastore.hbase.util.HBaseUtils.MultiScannerClosableWrapper; import mil.nga.giat.geowave.mapreduce.MapReduceDataStore; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; public class HBaseDataStore extends BaseDataStore implements MapReduceDataStore { public final static String TYPE = "hbase"; private final static Logger LOGGER = LoggerFactory.getLogger(HBaseDataStore.class); private final BasicHBaseOperations operations; private final HBaseOptions options; private final HBaseSplitsProvider splitsProvider = new HBaseSplitsProvider(); public HBaseDataStore( final BasicHBaseOperations operations ) { this( new HBaseIndexStore( operations), new HBaseAdapterStore( operations), new HBaseDataStatisticsStore( operations), new HBaseAdapterIndexMappingStore( operations), new HBaseSecondaryIndexDataStore( operations), operations); } public HBaseDataStore( final BasicHBaseOperations operations, final HBaseOptions options ) { this( new HBaseIndexStore( operations), new HBaseAdapterStore( operations), new HBaseDataStatisticsStore( operations), new HBaseAdapterIndexMappingStore( operations), new HBaseSecondaryIndexDataStore( operations), operations, options); } public HBaseDataStore( final IndexStore indexStore, final AdapterStore adapterStore, final DataStatisticsStore statisticsStore, final AdapterIndexMappingStore indexMappingStore, final HBaseSecondaryIndexDataStore secondaryIndexDataStore, final BasicHBaseOperations operations ) { this( indexStore, adapterStore, statisticsStore, indexMappingStore, secondaryIndexDataStore, operations, new HBaseOptions()); } public HBaseDataStore( final IndexStore indexStore, final AdapterStore adapterStore, final DataStatisticsStore statisticsStore, final AdapterIndexMappingStore indexMappingStore, final HBaseSecondaryIndexDataStore secondaryIndexDataStore, final BasicHBaseOperations operations, final HBaseOptions options ) { super( indexStore, adapterStore, statisticsStore, indexMappingStore, secondaryIndexDataStore, operations, options); this.operations = operations; this.options = options; secondaryIndexDataStore.setDataStore(this); } @Override protected void initOnIndexWriterCreate( final DataAdapter adapter, final PrimaryIndex index ) {} @Override protected IndexWriter createIndexWriter( final DataAdapter adapter, final PrimaryIndex index, final DataStoreOperations baseOperations, final DataStoreOptions baseOptions, final IngestCallback callback, final Closeable closable ) { return new HBaseIndexWriter( adapter, index, operations, options, callback, closable); } @Override protected <T> void addAltIndexCallback( final List<IngestCallback<T>> callbacks, final String indexName, final DataAdapter<T> adapter, final ByteArrayId primaryIndexId ) { try { callbacks.add(new AltIndexCallback<T>( indexName, (WritableDataAdapter<T>) adapter, options)); } catch (final Exception e) { LOGGER.error( "Unable to create table table for alt index to [" + indexName + "]", e); } } @Override protected CloseableIterator<Object> getEntryRows( final PrimaryIndex index, final AdapterStore tempAdapterStore, final List<ByteArrayId> dataIds, final DataAdapter<?> adapter, final ScanCallback<Object> scanCallback, final DedupeFilter dedupeFilter, final String[] authorizations, boolean delete ) { final String tableName = StringUtils.stringFromBinary(index.getId().getBytes()); final List<Iterator<Result>> resultList = new ArrayList<Iterator<Result>>(); final List<ResultScanner> resultScanners = new ArrayList<ResultScanner>(); Iterator<Result> iterator = null; try { final Scan scanner = new Scan(); scanner.setMaxVersions(1); scanner.addFamily(adapter.getAdapterId().getBytes()); if (options.isEnableCustomFilters()) { final FilterList filterList = new FilterList(); for (final ByteArrayId dataId : dataIds) { filterList.addFilter(new SingleEntryFilter( dataId.getBytes(), adapter.getAdapterId().getBytes())); } if (!filterList.getFilters().isEmpty()) { scanner.setFilter(filterList); } } final ResultScanner results = operations.getScannedResults( scanner, tableName, authorizations); Iterator<Result> resultIt; if (!options.isEnableCustomFilters()) { ArrayList<Result> filteredResults = new ArrayList<Result>(); for (Result result = results.next(); result != null; result = results.next()) { byte[] rowId = result.getRow(); if (rowHasData( rowId, dataIds)) { filteredResults.add(result); } } resultIt = filteredResults.iterator(); } else { resultIt = results.iterator(); } resultScanners.add(results); if (resultIt.hasNext()) { resultList.add(resultIt); } iterator = Iterators.concat(resultList.iterator()); } catch (final IOException e) { LOGGER.warn( "Unable to query table '" + tableName + "'. Table does not exist.", e); } return new CloseableIteratorWrapper<Object>( new MultiScannerClosableWrapper( resultScanners), new HBaseEntryIteratorWrapper( tempAdapterStore, index, iterator, dedupeFilter, scanCallback, null, null, true, false)); } protected boolean rowHasData( final byte[] rowId, final List<ByteArrayId> dataIds ) throws IOException { final byte[] metadata = Arrays.copyOfRange( rowId, rowId.length - 12, rowId.length); final ByteBuffer metadataBuf = ByteBuffer.wrap(metadata); final int adapterIdLength = metadataBuf.getInt(); final int dataIdLength = metadataBuf.getInt(); final ByteBuffer buf = ByteBuffer.wrap( rowId, 0, rowId.length - 12); final byte[] indexId = new byte[rowId.length - 12 - adapterIdLength - dataIdLength]; final byte[] rawAdapterId = new byte[adapterIdLength]; final byte[] rawDataId = new byte[dataIdLength]; buf.get(indexId); buf.get(rawAdapterId); buf.get(rawDataId); for (ByteArrayId dataId : dataIds) { if (Arrays.equals( rawDataId, dataId.getBytes())) { return true; } } return false; } @Override protected List<ByteArrayId> getAltIndexRowIds( final String tableName, final List<ByteArrayId> dataIds, final ByteArrayId adapterId, final String... authorizations ) { final List<ByteArrayId> result = new ArrayList<ByteArrayId>(); try { if (options.isUseAltIndex() && operations.tableExists(tableName)) { for (final ByteArrayId dataId : dataIds) { final Scan scanner = new Scan(); scanner.setStartRow(dataId.getBytes()); scanner.setStopRow(dataId.getBytes()); scanner.addFamily(adapterId.getBytes()); final ResultScanner results = operations.getScannedResults( scanner, tableName, authorizations); final Iterator<Result> iterator = results.iterator(); while (iterator.hasNext()) { result.add(new ByteArrayId( CellUtil.cloneQualifier(iterator.next().listCells().get( 0)))); } } } } catch (final IOException e) { LOGGER.warn( "Unable to query table '" + tableName + "'. Table does not exist.", e); } return result; } @Override protected CloseableIterator<Object> queryConstraints( final List<ByteArrayId> adapterIdsToQuery, final PrimaryIndex index, final Query sanitizedQuery, final DedupeFilter filter, final QueryOptions sanitizedQueryOptions, final AdapterStore tempAdapterStore, boolean delete ) { final HBaseConstraintsQuery hbaseQuery = new HBaseConstraintsQuery( adapterIdsToQuery, index, sanitizedQuery, filter, sanitizedQueryOptions.getScanCallback(), sanitizedQueryOptions.getAggregation(), IndexMetaDataSet.getIndexMetadata( index, adapterIdsToQuery, statisticsStore, sanitizedQueryOptions.getAuthorizations()), DuplicateEntryCount.getDuplicateCounts( index, adapterIdsToQuery, statisticsStore, sanitizedQueryOptions.getAuthorizations()), sanitizedQueryOptions.getFieldIdsAdapterPair(), sanitizedQueryOptions.getAuthorizations()); hbaseQuery.setOptions(options); return hbaseQuery.query( operations, tempAdapterStore, sanitizedQueryOptions.getMaxResolutionSubsamplingPerDimension(), sanitizedQueryOptions.getLimit()); } @Override protected CloseableIterator<Object> queryRowPrefix( final PrimaryIndex index, final ByteArrayId rowPrefix, final QueryOptions sanitizedQueryOptions, final AdapterStore tempAdapterStore, final List<ByteArrayId> adapterIdsToQuery, boolean delete ) { final HBaseRowPrefixQuery<Object> prefixQuery = new HBaseRowPrefixQuery<Object>( index, rowPrefix, (ScanCallback<Object>) sanitizedQueryOptions.getScanCallback(), sanitizedQueryOptions.getLimit(), sanitizedQueryOptions.getAuthorizations()); prefixQuery.setOptions(options); return prefixQuery.query( operations, sanitizedQueryOptions.getMaxResolutionSubsamplingPerDimension(), tempAdapterStore); } @Override protected CloseableIterator<Object> queryRowIds( final DataAdapter<Object> adapter, final PrimaryIndex index, final List<ByteArrayId> rowIds, final DedupeFilter filter, final QueryOptions sanitizedQueryOptions, final AdapterStore tempAdapterStore, boolean delete ) { final HBaseRowIdsQuery<Object> q = new HBaseRowIdsQuery<Object>( adapter, index, rowIds, (ScanCallback<Object>) sanitizedQueryOptions.getScanCallback(), filter, sanitizedQueryOptions.getAuthorizations()); q.setOptions(options); return q.query( operations, tempAdapterStore, sanitizedQueryOptions.getMaxResolutionSubsamplingPerDimension(), -1); } @Override protected void addToBatch( final Closeable idxDeleter, final List<ByteArrayId> rowIds ) throws Exception { final List<Delete> deletes = new ArrayList<Delete>(); for (final ByteArrayId id : rowIds) { deletes.add(new Delete( id.getBytes())); } if (idxDeleter instanceof HBaseWriter) { ((HBaseWriter) idxDeleter).delete(deletes); } } @Override protected Closeable createIndexDeleter( final String indexTableName, final String[] authorizations ) throws Exception { return operations.createWriter( indexTableName, new String[] {}, false); } @Override protected boolean deleteAll( final String tableName, final String columnFamily, final String... additionalAuthorizations ) { HBaseWriter deleter = null; try { deleter = operations.createWriter( tableName, new String[] {}, false); final Scan scanner = new Scan(); try (ResultScanner results = operations.getScannedResults( scanner, tableName, additionalAuthorizations)) { for (final Result r : results) { final Delete delete = new Delete( r.getRow()); delete.addFamily(StringUtils.stringToBinary(columnFamily)); deleter.delete(delete); } } return true; } catch (final IOException e) { LOGGER.warn( "Unable to delete row from table [" + tableName + "].", e); return false; } finally { if (deleter != null) { deleter.close(); } } } @Override public List<InputSplit> getSplits( final DistributableQuery query, final QueryOptions queryOptions, final AdapterStore adapterStore, final DataStatisticsStore statsStore, final IndexStore indexStore, final Integer minSplits, final Integer maxSplits ) throws IOException, InterruptedException { return splitsProvider.getSplits( operations, query, queryOptions, adapterStore, statsStore, indexStore, indexMappingStore, minSplits, maxSplits); } @Override public RecordReader<GeoWaveInputKey, ?> createRecordReader( final DistributableQuery query, final QueryOptions queryOptions, final AdapterStore adapterStore, final DataStatisticsStore statsStore, final IndexStore indexStore, final boolean isOutputWritable, final InputSplit inputSplit ) throws IOException, InterruptedException { return new GeoWaveHBaseRecordReader( query, queryOptions, isOutputWritable, adapterStore, operations); } private class AltIndexCallback<T> implements IngestCallback<T>, Closeable, Flushable { private final WritableDataAdapter<T> adapter; private HBaseWriter altIdxWriter; private final String altIdxTableName; public AltIndexCallback( final String indexName, final WritableDataAdapter<T> adapter, final HBaseOptions hbaseOptions ) throws IOException { this.adapter = adapter; altIdxTableName = indexName + ALT_INDEX_TABLE; if (operations.tableExists(indexName)) { if (!operations.tableExists(altIdxTableName)) { throw new TableNotFoundException( altIdxTableName); } } else { // index table does not exist yet if (operations.tableExists(altIdxTableName)) { operations.deleteTable(altIdxTableName); LOGGER.warn("Deleting current alternate index table [" + altIdxTableName + "] as main table does not yet exist."); } } altIdxWriter = operations.createWriter( altIdxTableName, new String[] { adapter.getAdapterId().getString() }, hbaseOptions.isCreateTable()); } @Override public void close() throws IOException { altIdxWriter.close(); altIdxWriter = null; } @Override public void entryIngested( final DataStoreEntryInfo entryInfo, final T entry ) { HBaseUtils.writeAltIndex( adapter, entryInfo, entry, altIdxWriter); } @Override public void flush() { // HBase writer does not require/support flush } } }