package mil.nga.giat.geowave.datastore.accumulo.query; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeSet; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.Filter; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import mil.nga.giat.geowave.core.index.ByteArrayUtils; import mil.nga.giat.geowave.core.index.Mergeable; import mil.nga.giat.geowave.core.index.NumericIndexStrategy; import mil.nga.giat.geowave.core.index.Persistable; import mil.nga.giat.geowave.core.index.PersistenceUtils; import mil.nga.giat.geowave.core.store.adapter.AbstractAdapterPersistenceEncoding; import mil.nga.giat.geowave.core.store.adapter.DataAdapter; import mil.nga.giat.geowave.core.store.adapter.IndexedAdapterPersistenceEncoding; import mil.nga.giat.geowave.core.store.data.CommonIndexedPersistenceEncoding; import mil.nga.giat.geowave.core.store.data.PersistentDataset; import mil.nga.giat.geowave.core.store.data.PersistentValue; import mil.nga.giat.geowave.core.store.flatten.FlattenedUnreadData; import mil.nga.giat.geowave.core.store.index.CommonIndexModel; import mil.nga.giat.geowave.core.store.index.CommonIndexValue; import mil.nga.giat.geowave.core.store.index.PrimaryIndex; import mil.nga.giat.geowave.core.store.query.ConstraintsQuery; import mil.nga.giat.geowave.core.store.query.aggregate.Aggregation; import mil.nga.giat.geowave.core.store.util.DataStoreUtils; import mil.nga.giat.geowave.datastore.accumulo.util.AccumuloUtils; public class AggregationIterator extends Filter { private static final Logger LOGGER = LoggerFactory.getLogger(AggregationIterator.class); public static final String AGGREGATION_QUERY_ITERATOR_NAME = "GEOWAVE_AGGREGATION_ITERATOR"; public static final String AGGREGATION_OPTION_NAME = "AGGREGATION"; public static final String PARAMETER_OPTION_NAME = "PARAMETER"; public static final String ADAPTER_OPTION_NAME = "ADAPTER"; public static final String INDEX_STRATEGY_OPTION_NAME = "INDEX_STRATEGY"; public static final String CONSTRAINTS_OPTION_NAME = "CONSTRAINTS"; public static final String MAX_DECOMPOSITION_OPTION_NAME = "MAX_DECOMP"; public static final int AGGREGATION_QUERY_ITERATOR_PRIORITY = 25; protected QueryFilterIterator queryFilterIterator; private Aggregation aggregationFunction; private DataAdapter adapter; private boolean aggregationReturned = false; private Text startRowOfAggregation = null; private Text currentRow = new Text(); private SortedKeyValueIterator<Key, Value> parent = new SortedKeyValueIterator<Key, Value>() { @Override public void init( final SortedKeyValueIterator<Key, Value> source, final Map<String, String> options, final IteratorEnvironment env ) throws IOException { AggregationIterator.super.init( source, options, env); } @Override public boolean hasTop() { return AggregationIterator.super.hasTop(); } @Override public void next() throws IOException { AggregationIterator.super.next(); } @Override public void seek( final Range range, final Collection<ByteSequence> columnFamilies, final boolean inclusive ) throws IOException { AggregationIterator.super.seek( range, columnFamilies, inclusive); } @Override public Key getTopKey() { return AggregationIterator.super.getTopKey(); } @Override public Value getTopValue() { return AggregationIterator.super.getTopValue(); } @Override public SortedKeyValueIterator<Key, Value> deepCopy( final IteratorEnvironment env ) { return AggregationIterator.super.deepCopy(env); } }; private TreeSet<Range> ranges; @Override public boolean accept( final Key key, final Value value ) { if (queryFilterIterator != null) { final PersistentDataset<CommonIndexValue> commonData = new PersistentDataset<CommonIndexValue>(); key.getRow(currentRow); final FlattenedUnreadData unreadData = queryFilterIterator.aggregateFieldData( key, value, commonData); final CommonIndexedPersistenceEncoding encoding = QueryFilterIterator.getEncoding( currentRow, commonData, unreadData); boolean queryFilterResult = true; if (queryFilterIterator.isSet()) { queryFilterResult = queryFilterIterator.applyRowFilter(encoding); } if (queryFilterResult) { aggregateRow( currentRow, queryFilterIterator.model, encoding); } } // we don't want to return anything but the aggregation result return false; } public void setParent( final SortedKeyValueIterator<Key, Value> parent ) { this.parent = parent; } protected void aggregateRow( final Text currentRow, final CommonIndexModel model, final CommonIndexedPersistenceEncoding persistenceEncoding ) { if (adapter == null) { aggregationFunction.aggregate(persistenceEncoding); if (startRowOfAggregation == null) { startRowOfAggregation = currentRow; } } else if (persistenceEncoding.getAdapterId().getString().equals( adapter.getAdapterId().getString())) { final PersistentDataset<Object> adapterExtendedValues = new PersistentDataset<Object>(); if (persistenceEncoding instanceof AbstractAdapterPersistenceEncoding) { ((AbstractAdapterPersistenceEncoding) persistenceEncoding).convertUnknownValues( adapter, model); final PersistentDataset<Object> existingExtValues = ((AbstractAdapterPersistenceEncoding) persistenceEncoding) .getAdapterExtendedData(); if (existingExtValues != null) { for (final PersistentValue<Object> val : existingExtValues.getValues()) { adapterExtendedValues.addValue(val); } } } final IndexedAdapterPersistenceEncoding encoding = new IndexedAdapterPersistenceEncoding( persistenceEncoding.getAdapterId(), persistenceEncoding.getDataId(), persistenceEncoding.getIndexInsertionId(), persistenceEncoding.getDuplicateCount(), persistenceEncoding.getCommonData(), new PersistentDataset<byte[]>(), adapterExtendedValues); // the data adapter can't use the numeric index strategy and only // the common index model to decode which is the case for feature // data, we pass along a null strategy to eliminate the necessity to // send a serialization of the strategy in the options of this // iterator final Object row = adapter.decode( encoding, new PrimaryIndex( null, model)); if (row != null) { // for now ignore field info aggregationFunction.aggregate(row); if (startRowOfAggregation == null) { startRowOfAggregation = currentRow; } } } } public void setOptions( final Map<String, String> options ) { try { final String className = options.get(AGGREGATION_OPTION_NAME); aggregationFunction = PersistenceUtils.classFactory( className, Aggregation.class); final String parameterStr = options.get(PARAMETER_OPTION_NAME); if ((parameterStr != null) && !parameterStr.isEmpty()) { final byte[] parameterBytes = ByteArrayUtils.byteArrayFromString(parameterStr); final Persistable aggregationParams = PersistenceUtils.fromBinary( parameterBytes, Persistable.class); aggregationFunction.setParameters(aggregationParams); } if (options.containsKey(ADAPTER_OPTION_NAME)) { final String adapterStr = options.get(ADAPTER_OPTION_NAME); final byte[] adapterBytes = ByteArrayUtils.byteArrayFromString(adapterStr); adapter = PersistenceUtils.fromBinary( adapterBytes, DataAdapter.class); } // now go from index strategy, constraints, and max decomp to a set // of accumulo ranges final String indexStrategyStr = options.get(INDEX_STRATEGY_OPTION_NAME); final byte[] indexStrategyBytes = ByteArrayUtils.byteArrayFromString(indexStrategyStr); final NumericIndexStrategy strategy = PersistenceUtils.fromBinary( indexStrategyBytes, NumericIndexStrategy.class); final String contraintsStr = options.get(CONSTRAINTS_OPTION_NAME); final byte[] constraintsBytes = ByteArrayUtils.byteArrayFromString(contraintsStr); final List constraints = PersistenceUtils.fromBinary(constraintsBytes); final String maxDecomp = options.get(MAX_DECOMPOSITION_OPTION_NAME); Integer maxDecompInt = ConstraintsQuery.MAX_RANGE_DECOMPOSITION; if (maxDecomp != null) { try { maxDecompInt = Integer.parseInt(maxDecomp); } catch (final Exception e) { LOGGER.warn( "Unable to parse '" + MAX_DECOMPOSITION_OPTION_NAME + "' as integer", e); } } ranges = AccumuloUtils.byteArrayRangesToAccumuloRanges(DataStoreUtils.constraintsToByteArrayRanges( constraints, strategy, maxDecompInt)); } catch (final Exception e) { throw new IllegalArgumentException( e); } } @Override public Key getTopKey() { if (hasTopOriginal()) { return getTopOriginalKey(); } else if (hasTopStat()) { return getTopStatKey(); } return null; } @Override public Value getTopValue() { if (hasTopOriginal()) { return getTopOriginalValue(); } else if (hasTopStat()) { return getTopStatValue(); } return null; } @Override public boolean hasTop() { // firstly iterate through all of the original data values final boolean hasTopOriginal = hasTopOriginal(); if (hasTopOriginal) { return true; } return hasTopStat(); } @Override public void next() throws IOException { if (parent.hasTop()) { parent.next(); } else { // there's only one instance of stat that we want to return // return it and finish aggregationReturned = true; } } @Override public void init( final SortedKeyValueIterator<Key, Value> source, final Map<String, String> options, final IteratorEnvironment env ) throws IOException { setOptions(options); queryFilterIterator = new QueryFilterIterator(); queryFilterIterator.setOptions(options); parent.init( source, options, env); } protected Key getTopOriginalKey() { return parent.getTopKey(); } protected Value getTopOriginalValue() { return parent.getTopValue(); } protected boolean hasTopOriginal() { return parent.hasTop(); } protected Key getTopStatKey() { if (hasTopStat()) { return new Key( startRowOfAggregation); } return null; } protected Value getTopStatValue() { if (hasTopStat()) { final Mergeable result = aggregationFunction.getResult(); if (result == null) { return null; } return new Value( PersistenceUtils.toBinary(result)); } return null; } protected boolean hasTopStat() { return !aggregationReturned && (startRowOfAggregation != null); } @Override public SortedKeyValueIterator<Key, Value> deepCopy( final IteratorEnvironment env ) { final SortedKeyValueIterator<Key, Value> iterator = parent.deepCopy(env); deepCopyIterator(iterator); return iterator; } public void deepCopyIterator( final SortedKeyValueIterator<Key, Value> iterator ) { if (iterator instanceof AggregationIterator) { ((AggregationIterator) iterator).startRowOfAggregation = startRowOfAggregation; ((AggregationIterator) iterator).adapter = adapter; ((AggregationIterator) iterator).queryFilterIterator = queryFilterIterator; ((AggregationIterator) iterator).parent = parent; ((AggregationIterator) iterator).aggregationFunction = aggregationFunction; ((AggregationIterator) iterator).aggregationReturned = aggregationReturned; } } @Override protected void findTop() { QueryFilterIterator.findTopEnhanced( getSource(), this); } protected static void findEnd( final Iterator<Range> rangeIt, final Collection<Range> internalRanges, final Range seekRange ) { // find the first range in the set whose end key is after this // range's end key, clip its end to this range end if its start // is not also greater than this end, and stop // after that while (rangeIt.hasNext()) { final Range internalRange = rangeIt.next(); if ((internalRange.getEndKey() == null) || (internalRange.getEndKey().compareTo( seekRange.getEndKey()) > 0)) { if ((internalRange.getStartKey() != null) && (internalRange.getStartKey().compareTo( seekRange.getEndKey()) > 0)) { return; } else { internalRanges.add(new Range( internalRange.getStartKey(), seekRange.getEndKey())); return; } } else { internalRanges.add(internalRange); } } } protected static void findStart( final Iterator<Range> rangeIt, final Collection<Range> internalRanges, final Range seekRange ) { // find the first range whose end key is after this range's start key // and clip its start to this range start key, and start on that while (rangeIt.hasNext()) { final Range internalRange = rangeIt.next(); if ((internalRange.getEndKey() == null) || (internalRange.getEndKey().compareTo( seekRange.getStartKey()) > 0)) { if ((internalRange.getStartKey() != null) && (internalRange.getStartKey().compareTo( seekRange.getStartKey()) > 0)) { internalRanges.add(internalRange); return; } else { internalRanges.add(new Range( seekRange.getStartKey(), internalRange.getEndKey())); return; } } } } @Override public void seek( final Range seekRange, final Collection<ByteSequence> columnFamilies, final boolean inclusive ) throws IOException { aggregationReturned = false; aggregationFunction.clearResult(); startRowOfAggregation = null; Collection<Range> internalRanges = new ArrayList<Range>(); if (seekRange.isInfiniteStartKey()) { if (seekRange.isInfiniteStopKey()) { internalRanges = ranges; } else { findEnd( ranges.iterator(), internalRanges, seekRange); } } else if (seekRange.isInfiniteStopKey()) { final Iterator<Range> rangeIt = ranges.iterator(); findStart( rangeIt, internalRanges, seekRange); while (rangeIt.hasNext()) { internalRanges.add(rangeIt.next()); } } else { final Iterator<Range> rangeIt = ranges.iterator(); findStart( rangeIt, internalRanges, seekRange); findEnd( rangeIt, internalRanges, seekRange); } final Iterator<Range> rangeIt = internalRanges.iterator(); while (rangeIt.hasNext()) { parent.seek( rangeIt.next(), columnFamilies, inclusive); } } }