MultiRowShardColumnIterator.java example

Explorer
usergrid-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.usergrid.persistence.core.astyanax;


import java.util.*;

import com.google.common.base.Optional;
import org.apache.usergrid.persistence.core.shard.SmartShard;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.netflix.astyanax.Keyspace;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.model.Column;
import com.netflix.astyanax.model.ColumnFamily;
import com.netflix.astyanax.model.ColumnList;
import com.netflix.astyanax.model.ConsistencyLevel;
import com.netflix.astyanax.model.Rows;
import com.netflix.astyanax.query.RowSliceQuery;
import com.netflix.astyanax.util.RangeBuilder;


/**
 *
 *
 */
public class MultiRowShardColumnIterator<R, C, T> implements Iterator<T> {

    private static final Logger logger = LoggerFactory.getLogger( MultiRowShardColumnIterator.class );

    private final int pageSize;

    private final ColumnFamily<R, C> cf;


    private final ColumnParser<C, T> columnParser;

    private final ColumnSearch<T> columnSearch;

    private final Comparator<T> comparator;


    private final Keyspace keyspace;

    private final ConsistencyLevel consistencyLevel;

    private T startColumn;

    private boolean moreToReturn;

    private Iterator<T> currentColumnIterator;

    private Iterator<SmartShard> currentShardIterator;

    private List<SmartShard> rowKeysWithShardEnd;

    private SmartShard currentShard;

    private List<T> resultsTracking; // use for de-duping results that are possible during shard transition

    private int skipSize = 0; // used for determining if we've skipped a whole page during shard transition

    private boolean ascending = false;

    private Optional<Long> lastTimestamp;


    public MultiRowShardColumnIterator( final Keyspace keyspace, final ColumnFamily<R, C> cf,
                                        final ConsistencyLevel consistencyLevel, final ColumnParser<C, T> columnParser,
                                        final ColumnSearch<T> columnSearch, final Comparator<T> comparator,
                                        final int pageSize, final List<SmartShard> rowKeysWithShardEnd,
                                        final boolean ascending, final Optional<Long> lastTimestamp) {
        this.cf = cf;
        this.pageSize = pageSize;
        this.columnParser = columnParser;
        this.columnSearch = columnSearch;
        this.comparator = comparator;
        this.keyspace = keyspace;
        this.consistencyLevel = consistencyLevel;
        this.moreToReturn = true;
        this.rowKeysWithShardEnd = rowKeysWithShardEnd;
        this.resultsTracking = new ArrayList<>();
        this.ascending = ascending;
        this.lastTimestamp = lastTimestamp;


    }


    @Override
    public boolean hasNext() {

        // if column iterator is null, initialize with first call to advance()
        // advance if we know there more columns exist in the current shard but we've exhausted this page fetch from c*
        if ( currentColumnIterator == null || ( !currentColumnIterator.hasNext() && moreToReturn ) ) {
            advance();
        }

        // when there are no more columns, nothing reported to return, but more shards available, go to the next shard
        if( currentColumnIterator != null && !currentColumnIterator.hasNext() &&
            !moreToReturn && currentShardIterator.hasNext()){

            if(logger.isTraceEnabled()){
                logger.trace("Advancing shard iterator");
                logger.trace("Shard before advance: {}", currentShard);
            }


            // advance to the next shard
            currentShard = currentShardIterator.next();

            // handle marked deleted shards
            while ( currentShard.isDeleted() && currentShardIterator.hasNext()){

                if(logger.isTraceEnabled()) {
                    logger.trace("Shard is marked deleted, advancing to next - {}", currentShard);
                }

                currentShard = currentShardIterator.next();
            }

            // if the last shard is deleted, return false, there is no next to seek
            if ( currentShard.isDeleted() && !currentShardIterator.hasNext()){

                if(logger.isTraceEnabled()) {
                    logger.trace("Shard is marked deleted, and there are no more shards - {}", currentShard);
                }

                return false;
            }

            if(logger.isTraceEnabled()){
                logger.trace("Shard after advance: {}", currentShard);

            }

            advance();

        }
        return currentColumnIterator.hasNext();
    }


    @Override
    public T next() {
        if ( !hasNext() ) {
            throw new NoSuchElementException( "No new element exists" );
        }

        final T next = currentColumnIterator.next();


        return next;
    }


    @Override
    public void remove() {
        throw new UnsupportedOperationException( "Remove is unsupported this is a read only iterator" );
    }


    public void advance() {

        if (logger.isTraceEnabled()) logger.trace( "Advancing multi row column iterator" );

        /**
         * If the edge is present, we need to being seeking from this
         */

        final boolean skipFirstColumn = startColumn != null;

        final int selectSize = skipFirstColumn ? pageSize + 1 : pageSize;

        final RangeBuilder rangeBuilder = new RangeBuilder();

        SmartShard startShard = null;



        if(currentShardIterator == null){

            // create a copy that we use to search for our 'starting shard'
            final List<SmartShard> shards = new ArrayList<>(rowKeysWithShardEnd);


            // flip the order of our shards if ascending
            if(ascending){
                Collections.reverse(rowKeysWithShardEnd);
            }


            if(lastTimestamp.isPresent()) {

                //always seek from 0 to find out where our cursor last should fall
                Collections.reverse(shards);

                for ( SmartShard shard : shards){

                    if ( lastTimestamp.get().compareTo(shard.getShardIndex()) > 0) {
                        startShard = shard;
                    }

                }

            }

            currentShardIterator = rowKeysWithShardEnd.iterator();

        }

        if(currentShard == null){

            if(logger.isTraceEnabled()){
                logger.trace("currentShard: {}", currentShard);
            }

            currentShard = currentShardIterator.next();

            if (startShard != null){
                while(!currentShard.equals(startShard)){
                    currentShard = currentShardIterator.next();
                }
            }

            // skip over shards that are marked deleted
            while ( currentShard.isDeleted() && currentShardIterator.hasNext() ){

                if(logger.isTraceEnabled()){
                    logger.trace("Shard is marked deleted - {}", currentShard);
                }

                currentShard = currentShardIterator.next();
            }


            if(logger.isTraceEnabled()){
                logger.trace("all shards when starting: {}", rowKeysWithShardEnd);
                logger.trace("initializing iterator with shard: {}", currentShard);
            }


        }



        // initial request, build the range with no start and no end
        if ( startColumn == null && currentShard.getShardEnd() == null ){

            columnSearch.buildRange( rangeBuilder );

            if(logger.isTraceEnabled()){
                logger.trace("initial search (no start or shard end)");
            }

        }
        // if there's only a startColumn set the range start startColumn always
        else if ( startColumn != null && currentShard.getShardEnd() == null ){

            columnSearch.buildRange( rangeBuilder, startColumn, null );

            if(logger.isTraceEnabled()){
                logger.trace("search (no shard end) with start: {}", startColumn);
            }

        }
        // if there's only a shardEnd, set the start/end according based on the search order
        else if ( startColumn == null && currentShard.getShardEnd() != null ){

            T shardEnd = (T) currentShard.getShardEnd();

            // if we have a shardEnd and it's not an ascending search, use the shardEnd as a start
            if(!ascending) {

                columnSearch.buildRange(rangeBuilder, shardEnd, null);

                if(logger.isTraceEnabled()){
                    logger.trace("search descending with start: {}", shardEnd);
                }

            }
            // if we have a shardEnd and it is an ascending search, use the shardEnd as the end
            else{

                columnSearch.buildRange( rangeBuilder, null, shardEnd );

                if(logger.isTraceEnabled()){
                    logger.trace("search ascending with end: {}", shardEnd);
                }

            }

        }
        // if there's both a startColumn and a shardEnd, decide which should be used as start/end based on search order
        else if ( startColumn != null && currentShard.getShardEnd() != null) {

            T shardEnd = (T) currentShard.getShardEnd();


            // if the search is not ascending, set the start to be the older edge
            if(!ascending){

                T searchStart = comparator.compare(shardEnd, startColumn) > 0 ? shardEnd : startColumn;
                columnSearch.buildRange( rangeBuilder, searchStart, null);

                if(logger.isTraceEnabled()){
                    logger.trace("search descending with start: {} in shard", searchStart, currentShard);
                }

            }
            // if the search is ascending, then always use the startColumn for the start and shardEnd for the range end
            else{

                columnSearch.buildRange( rangeBuilder, startColumn , shardEnd);

                if(logger.isTraceEnabled()){
                    logger.trace("search with start: {}, end: {}", startColumn, shardEnd);
                }



            }

        }

        rangeBuilder.setLimit( selectSize );

        if (logger.isTraceEnabled()) logger.trace( "Executing cassandra query with shard {}", currentShard );

        /**
         * Get our list of slices
         */
        final RowSliceQuery<R, C> query =
            keyspace.prepareQuery( cf ).setConsistencyLevel( consistencyLevel ).getKeySlice( (R) currentShard.getRowKey() )
                .withColumnRange( rangeBuilder.build() );

        final Rows<R, C> result;
        try {
            result = query.execute().getResult();
        }
        catch ( ConnectionException e ) {
            throw new RuntimeException( "Unable to connect to casandra", e );
        }




        final List<T> mergedResults;

        skipSize = 0;

        mergedResults = processResults( result, selectSize );

        if(logger.isTraceEnabled()){
            logger.trace("skipped amount: {}", skipSize);
        }



        final int size = mergedResults.size();



        if(logger.isTraceEnabled()){
            logger.trace("current shard: {}, retrieved size: {}", currentShard, size);
            logger.trace("selectSize={}, size={}, ", selectSize, size);


        }

        moreToReturn = size == selectSize;

        if(selectSize == 1001 && mergedResults.size() == 1000){
            moreToReturn = true;
        }


        // if a whole page is skipped OR the result size equals the the difference of what's skipped,
        // it is likely during a shard transition and we should assume there is more to read
        if( skipSize == selectSize || skipSize == selectSize - 1 || size == selectSize - skipSize || size == (selectSize -1) - skipSize ){
            moreToReturn = true;
        }

        //we have a first column to to check
        if( size > 0) {

            final T firstResult = mergedResults.get( 0 );

            //The search has either told us to skip the first element, or it matches our last, therefore we disregard it
            if(columnSearch.skipFirst( firstResult ) || (skipFirstColumn && comparator.compare( startColumn, firstResult ) == 0)){
                if(logger.isTraceEnabled()){
                    logger.trace("removing an entry");

                }
                mergedResults.remove( 0 );
            }

        }


        // set the start column for the enxt query
        if(moreToReturn && mergedResults.size() > 0){
            startColumn = mergedResults.get( mergedResults.size()  - 1 );

        }


        currentColumnIterator = mergedResults.iterator();


        //force an advance of this iterator when there are still shards to read but result set on current shard is 0
        if(size == 0 && currentShardIterator.hasNext()){
            hasNext();
        }

        if(logger.isTraceEnabled()){
            logger.trace("currentColumnIterator.hasNext()={}, " +
                    "moreToReturn={}, currentShardIterator.hasNext()={}",
                currentColumnIterator.hasNext(), moreToReturn, currentShardIterator.hasNext());
        }


    }


    /**
     * Process the result set and filter any duplicates that may have already been seen in previous shards.  During
     * a shard transition, there could be the same columns in multiple shards (rows).  This will also allow for
     * filtering the startColumn (the seek starting point) when paging a row in Cassandra.
     *
     * @param result
     * @return
     */
    private List<T> processResults(final Rows<R, C> result, final int maxSize ) {

        final List<T> mergedResults = new ArrayList<>(maxSize);

        for ( final R key : result.getKeys() ) {
            final ColumnList<C> columns = result.getRow( key ).getColumns();


            for (final Column<C> column :columns  ) {

                final T returnedValue = columnParser.parseColumn( column );

                // use an O(log n) search, same as a tree, but with fast access to indexes for later operations
                int searchIndex = Collections.binarySearch( resultsTracking, returnedValue, comparator );


                //we've already seen the column, filter it out as we might be in a shard transition or our start column
                if(searchIndex > -1){
                    if(logger.isTraceEnabled()){
                        logger.trace("skipping column as it was already retrieved before");
                    }
                    skipSize++;
                    continue;
                }


                resultsTracking.add(returnedValue);
                mergedResults.add(returnedValue );


            }

            if (logger.isTraceEnabled()) logger.trace( "Candidate result set size is {}", mergedResults.size() );

        }
        return mergedResults;
    }

}