package com.feedly.cassandra.dao;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import me.prettyprint.cassandra.model.IndexedSlicesQuery;
import me.prettyprint.hector.api.beans.AbstractComposite.ComponentEquality;
import me.prettyprint.hector.api.beans.OrderedRows;
import me.prettyprint.hector.api.beans.Row;
import me.prettyprint.hector.api.factory.HFactory;
import com.feedly.cassandra.EConsistencyLevel;
import com.feedly.cassandra.IKeyspaceFactory;
import com.feedly.cassandra.entity.EntityMetadata;
import com.feedly.cassandra.entity.IndexMetadata;
import com.feedly.cassandra.entity.SimplePropertyMetadata;
/*
* used to fetch data using native cassandra indexes. Lazy loading is supported.
*/
class HashIndexFindHelper<K, V> extends LoadHelper<K, V>
{
private OperationStatistics _indexStats;
HashIndexFindHelper(EntityMetadata<V> meta, IKeyspaceFactory factory, int statsSize)
{
super(meta, factory, statsSize);
_indexStats = new OperationStatistics(statsSize);
}
private V uniqueValue(Collection<V> values)
{
if(values == null || values.isEmpty())
return null;
if(values.size() > 1)
throw new IllegalStateException("non-unique value");
return values.iterator().next();
}
public OperationStatistics indexStats()
{
return _indexStats;
}
public V find(V template, FindOptions options, IndexMetadata index)
{
return uniqueValue(mfind(template, options, index));
}
public Collection<V> mfind(V template, FindOptions options, IndexMetadata index)
{
Collection<V> values = null;
switch(options.getColumnFilterStrategy())
{
case UNFILTERED:
values = bulkFindByIndexPartial(template, null, null, null, null, null, options.getMaxRows(), index, options.getConsistencyLevel());
break;
case RANGE:
byte[] startCol = propertyName(options.getStartColumn(), ComponentEquality.EQUAL);
byte[] endCol = propertyName(options.getEndColumn(), ComponentEquality.GREATER_THAN_EQUAL);
values = bulkFindByIndexPartial(template, startCol, endCol, null, null, null, options.getMaxRows(), index, options.getConsistencyLevel());
break;
case INCLUDES:
values = mfind(template, options.getIncludes(), options.getExcludes(), options.getMaxRows(), index, options.getConsistencyLevel());
break;
}
_stats.incrNumOps(1);
_indexStats.incrNumOps(1);
return values;
}
private Collection<V> mfind(V template, Set<? extends Object> includes, Set<String> excludes, int maxRows, IndexMetadata index, EConsistencyLevel level)
{
if(includes != null && excludes != null)
throw new IllegalArgumentException("either includes or excludes should be specified, not both");
if(includes != null && excludes != null)
throw new IllegalArgumentException("either includes or excludes should be specified, not both");
List<byte[]> colNames = new ArrayList<byte[]>();
List<byte[]> counterColNames = new ArrayList<byte[]>();
List<CollectionRange> ranges = derivePartialColumns(colNames, counterColNames, includes, excludes);
return bulkFindByIndexPartial(template, null, null, colNames, counterColNames, ranges, maxRows, index, level);
}
private Collection<V> bulkFindByIndexPartial(V template,
byte[] startBytes,
byte[] endBytes,
List<byte[]> colNames,
List<byte[]> counterColNames,
List<CollectionRange> ranges,
int maxRows,
IndexMetadata index,
EConsistencyLevel level)
{
SimplePropertyMetadata pm = index.getIndexedProperties().get(0); //must be exactly 1
Object propVal = invokeGetter(pm, template);
if(propVal == null)
throw new IllegalArgumentException("null values not supported for hash indexes");
IndexedSlicesQuery<byte[], byte[], byte[]> query = HFactory.createIndexedSlicesQuery(_keyspaceFactory.createKeyspace(level), SER_BYTES, SER_BYTES, SER_BYTES);
query.setColumnFamily(_entityMeta.getFamilyName());
query.setRowCount(CassandraDaoBase.ROW_RANGE_SIZE);
query.addEqualsExpression(pm.getPhysicalNameBytes(), serialize(propVal, false, pm.getSerializer()));
if(colNames != null)
query.setColumnNames(colNames);
else
query.setRange(startBytes, endBytes, false, CassandraDaoBase.COL_RANGE_SIZE);
return new LazyLoadedCollection(query,
startBytes,
endBytes,
ranges,
new EqualityValueFilter<V>(_entityMeta, template, index),
counterColNames,
maxRows,
index,
level);
}
@SuppressWarnings("unchecked")
private byte[] fetchBatch(IndexedSlicesQuery<byte[],byte[],byte[]> query,
byte[] startRowKey,
byte[] startColBytes,
byte[] endColBytes,
K lastKey,
int maxRows,
List<V> values,
List<byte[]> counterColNames,
List<CollectionRange> ranges,
EConsistencyLevel level)
{
long indexStartTime = System.nanoTime();
SimplePropertyMetadata keyMeta = _entityMeta.getKeyMetadata();
int fetchRowCount = Math.min(maxRows, CassandraDaoBase.ROW_RANGE_SIZE);
query.setRowCount(fetchRowCount);
if(startRowKey != null)
query.setStartKey(startRowKey);
OrderedRows<byte[],byte[],byte[]> rows = query.execute().get();
long indexEndTime = System.nanoTime();
long startTime = indexEndTime;
List<K> keys = new ArrayList<K>();
K first = null, last = null;
/*
* the last key of the previous range and the first key of the current range may overlap
*/
for(Row<byte[], byte[], byte[]> row : rows)
{
K key = (K) keyMeta.getSerializer().fromBytes(row.getKey());
if(lastKey != null)
{
if(key.equals(lastKey))
{
lastKey = null;
continue;
}
lastKey = null;
}
V value = fromColumnSlice(key, null, keyMeta, row.getKey(), null, row.getColumnSlice(), endColBytes, level);
keys.add(key);
values.add(value);
if(first == null)
first = key;
last = key;
}
_logger.debug("range {} - {}", first, last);
if(ranges != null && !values.isEmpty())
{
_logger.debug("adding full collections to {} values: ({})", values.size(), ranges);
addCollectionRanges(keys, values, ranges, level);
}
int cnt = rows.getCount();
byte[] lastKeyBytes = cnt == 0 ? null : rows.getList().get(cnt - 1).getKey();
//if counters exist and must be fetched
if(_entityMeta.hasCounterColumns() && (counterColNames == null || !counterColNames.isEmpty()))
bulkLoadFromMultiCounterGet(keys, values, counterColNames, startColBytes, endColBytes, true, level);
int nonNull = 0;
for(int i = values.size() - 1; i >= 0; i--)
{
if(values.get(i) == null)
values.remove(i);
else
nonNull++;
}
int size = values.size();
_stats.addRecentTiming(System.nanoTime() - startTime);
_indexStats.addRecentTiming(indexEndTime - indexStartTime);
_stats.incrNumRows(size);
_indexStats.incrNumRows(size);
_indexStats.incrNumCassandraOps(1);
_logger.debug("{} rows, {} values, ({} non null) fetched", new Object[] {cnt, size, nonNull});
return lastKeyBytes;
}
private class LazyLoadedIterator implements Iterator<V>
{
private final LazyLoadedCollection _parent;
private final IndexedSlicesQuery<byte[],byte[],byte[]> _query;
private final IValueFilter<V> _filter;
private final IndexMetadata _index;
private byte[] _nextStartKey;
private final byte[] _startCol, _endCol;
private int _remRows; //remaining rows left to fetch, based on max set by user and if the last batch fetched was maximal
private List<V> _current;
private Iterator<V> _currentIter;
private V _next;
private final List<byte[]> _counterColNames;
private List<CollectionRange> _ranges;
private int _iteratedCount = 0;
private final EConsistencyLevel _level;
public LazyLoadedIterator(LazyLoadedCollection parent,
List<V> first,
IndexedSlicesQuery<byte[],byte[],byte[]> query,
byte[] nextStartKey,
byte[] startCol,
byte[] endCol,
List<byte[]> counterColNames,
List<CollectionRange> ranges,
IValueFilter<V> filter,
int maxRows,
IndexMetadata index,
EConsistencyLevel level)
{
_parent = parent;
_filter = filter;
_index = index;
_current = new ArrayList<V>(first);
_currentIter = _current.iterator();
_next = _currentIter.next();
_query = query;
_nextStartKey = nextStartKey;
_ranges = ranges;
_counterColNames = counterColNames;
_level = level;
if(first.size() < CassandraDaoBase.ROW_RANGE_SIZE)
_remRows = 0;
else
_remRows = maxRows - first.size();
_startCol = startCol;
_endCol = endCol;
}
@Override
public boolean hasNext()
{
return _next != null;
}
@SuppressWarnings("unchecked")
@Override
public V next()
{
if(_next == null)
throw new NoSuchElementException();
_iteratedCount++;
V rv = _next;
V next = null;
while(next == null)
{
while(_currentIter.hasNext())
{
next = _currentIter.next();
if(_filter.isFiltered(indexedValue(next, _index)) == EFilterResult.PASS)
break;
else
_logger.debug("filtered {}", next);
}
if(next == null)
{
if(_remRows > 0) //fetch next batch
{
SimplePropertyMetadata keyMeta = _entityMeta.getKeyMetadata();
V last = _current.get(_current.size() - 1);
K lastKey = (K) invokeGetter(keyMeta, last);
_current.clear();
_nextStartKey = fetchBatch(_query, _nextStartKey, _startCol, _endCol, lastKey, _remRows, _current, _counterColNames, _ranges, _level);
int cnt = _current.size();
if(cnt == 0) //get yielded no rows
{
next = null;
_remRows = 0;
_logger.debug("empty fetch, no more values");
break;
}
else
{
if(cnt < CassandraDaoBase.ROW_RANGE_SIZE - 1) //allow for query range boundary duplications
_remRows = 0;
else
_remRows -= cnt;
_currentIter = _current.iterator();
}
}
else //no more rows
{
_logger.debug("remaining rows zero");
break;
}
}
}
if(next == null) //have iterated through all results, cache size
_parent.setSize(_iteratedCount);
_next = next;
return rv;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
}
private IndexedValue<V> indexedValue(V value, IndexMetadata index)
{
SimplePropertyMetadata pm = index.getIndexedProperties().get(0);
return new IndexedValue<V>(Collections.singletonList(invokeGetter(pm, value)), value);
}
private class LazyLoadedCollection extends AbstractCollection<V>
{
private byte[] _nextRowKeyBytes;
private byte[] _startColBytes;
private byte[] _endColBytes;
private int _maxRows;
private IndexedSlicesQuery<byte[], byte[], byte[]> _query;
private final IValueFilter<V> _filter;
private final IndexMetadata _index;
private Integer _size;
private List<V> _all = null; //if it is known all rows have been fetched, this field is set
private List<V> _first = new ArrayList<V>();
private final List<CollectionRange> _ranges;
private final EConsistencyLevel _level;
private final List<byte[]> _counterColNames;
@SuppressWarnings("unchecked")
public LazyLoadedCollection(IndexedSlicesQuery<byte[], byte[], byte[]> query,
byte[] startColBytes,
byte[] endColBytes,
List<CollectionRange> ranges,
IValueFilter<V> filter,
List<byte[]> counterColNames,
int maxRows,
IndexMetadata index,
EConsistencyLevel level)
{
_query = query;
_startColBytes = startColBytes;
_endColBytes = endColBytes;
_maxRows = maxRows;
_filter = filter;
_counterColNames = counterColNames;
_index = index;
_ranges = ranges;
_level = level;
K lastKey = null;
while(true) //loop until unfiltered rows are found
{
_nextRowKeyBytes = fetchBatch(query, _nextRowKeyBytes,
startColBytes, endColBytes,
lastKey, maxRows, _first,
_counterColNames, _ranges,
_level);
if(_first.isEmpty())
break;
Iterator<V> iter = _first.iterator();
if(!_first.isEmpty())
lastKey = (K) invokeGetter(_entityMeta.getKeyMetadata(), _first.get(_first.size() - 1));
while(iter.hasNext())
{
V next = iter.next();
if(filter.isFiltered(indexedValue(next, index)) != EFilterResult.PASS)
iter.remove();
}
if(!_first.isEmpty())
break;
}
if(_first.size() == maxRows || _first.isEmpty()) //loaded all rows or none exist
{
_all = _first;
}
}
//override, don't want to invoke size, just to check if empty
@Override
public boolean isEmpty()
{
return _first.isEmpty();
}
void setSize(int size)
{
_size = size;
}
//may aggressively fetch and retain all values, use with caution
@Override
public int size()
{
if(_size != null)
return _size;
if(_all == null)
{
Iterator<V> iter = iterator();
_all = new ArrayList<V>();
while(iter.hasNext())
_all.add(iter.next());
}
return _all.size();
}
@Override
public java.util.Iterator<V> iterator()
{
if(_all != null)
return _all.iterator();
return new LazyLoadedIterator(this, _first, _query, _nextRowKeyBytes, _startColBytes, _endColBytes, _counterColNames, _ranges, _filter, _maxRows, _index, _level);
}
}
}