MultiRowColumnIterator.java example

Explorer
usergrid-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.usergrid.persistence.core.astyanax;


import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.netflix.astyanax.Keyspace;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.model.Column;
import com.netflix.astyanax.model.ColumnFamily;
import com.netflix.astyanax.model.ColumnList;
import com.netflix.astyanax.model.ConsistencyLevel;
import com.netflix.astyanax.model.Rows;
import com.netflix.astyanax.query.RowSliceQuery;
import com.netflix.astyanax.util.RangeBuilder;


/**
 *
 *
 */
public class MultiRowColumnIterator<R, C, T> implements Iterator<T> {

    private static final Logger logger = LoggerFactory.getLogger( MultiRowColumnIterator.class );

    private final int pageSize;

    private final ColumnFamily<R, C> cf;


    private final ColumnParser<C, T> columnParser;

    private final ColumnSearch<T> columnSearch;

    private final Comparator<T> comparator;


    private final Collection<R> rowKeys;

    private final Keyspace keyspace;

    private final ConsistencyLevel consistencyLevel;


    private T startColumn;


    private boolean moreToReturn;


    private Iterator<T> currentColumnIterator;


    /**
     * Remove after finding bug
     */


    //    private int advanceCount;
    //
    //    private final HashMap<T, SeekPosition> seenResults;

    /**
     * Complete Remove
     */


    /**
     * Create the iterator
     */
    public MultiRowColumnIterator( final Keyspace keyspace, final ColumnFamily<R, C> cf,
                                   final ConsistencyLevel consistencyLevel, final ColumnParser<C, T> columnParser,
                                   final ColumnSearch<T> columnSearch, final Comparator<T> comparator,
                                   final Collection<R> rowKeys, final int pageSize ) {
        this.cf = cf;
        this.pageSize = pageSize;
        this.columnParser = columnParser;
        this.columnSearch = columnSearch;
        this.comparator = comparator;
        this.rowKeys = rowKeys;
        this.keyspace = keyspace;
        this.consistencyLevel = consistencyLevel;
        this.moreToReturn = true;

        //        seenResults = new HashMap<>( pageSize * 10 );
    }


    @Override
    public boolean hasNext() {

        if ( currentColumnIterator == null || ( !currentColumnIterator.hasNext() && moreToReturn ) ) {
            advance();
        }


        return currentColumnIterator.hasNext();
    }


    @Override
    public T next() {
        if ( !hasNext() ) {
            throw new NoSuchElementException( "No new element exists" );
        }

        final T next = currentColumnIterator.next();


        return next;
    }


    @Override
    public void remove() {
        throw new UnsupportedOperationException( "Remove is unsupported this is a read only iterator" );
    }


    public void advance() {


        if (logger.isTraceEnabled()) logger.trace( "Advancing multi row column iterator" );

        /**
         * If the edge is present, we need to being seeking from this
         */

        final boolean skipFirstColumn = startColumn != null;



        final int selectSize = skipFirstColumn ? pageSize + 1 : pageSize;

        final RangeBuilder rangeBuilder = new RangeBuilder();


        //set the range into the search

        if ( startColumn == null ) {
            columnSearch.buildRange( rangeBuilder );
        }
        else {
            columnSearch.buildRange( rangeBuilder, startColumn, null );
        }


        rangeBuilder.setLimit( selectSize );

        if (logger.isTraceEnabled()) logger.trace( "Executing cassandra query" );

        /**
         * Get our list of slices
         */
        final RowSliceQuery<R, C> query =
            keyspace.prepareQuery( cf ).setConsistencyLevel( consistencyLevel ).getKeySlice( rowKeys )
                .withColumnRange( rangeBuilder.build() );

        final Rows<R, C> result;
        try {
            result = query.execute().getResult();
        }
        catch ( ConnectionException e ) {
            throw new RuntimeException( "Unable to connect to casandra", e );
        }


        //now aggregate them together

        //this is an optimization.  It's faster to see if we only have values for one row,
        // then return the iterator of those columns than
        //do a merge if only one row has data.


        final List<T> mergedResults;

        if ( containsSingleRowOnly( result ) ) {
            mergedResults = singleRowResult( result );
        }
        else {
            mergedResults = mergeResults( result, selectSize );
        }





        //we've parsed everything truncate to the first pageSize, it's all we can ensure is correct without another
        //trip back to cassandra

        //discard our first element (maybe)



        final int size = mergedResults.size();

        moreToReturn = size == selectSize;

        //we have a first column to to check
        if( size > 0) {

            final T firstResult = mergedResults.get( 0 );

            //The search has either told us to skip the first element, or it matches our last, therefore we disregard it
            if(columnSearch.skipFirst( firstResult ) || (skipFirstColumn && comparator.compare( startColumn, firstResult ) == 0)){
                mergedResults.remove( 0 );
            }

        }


        if(moreToReturn && mergedResults.size() > 0){
            startColumn = mergedResults.get( mergedResults.size()  - 1 );
        }


        currentColumnIterator = mergedResults.iterator();

        if (logger.isTraceEnabled()) logger.trace( "Finished parsing {} rows for results", rowKeys.size() );
    }


    /**
     * Return true if we have < 2 rows with columns, false otherwise
     */
    private boolean containsSingleRowOnly( final Rows<R, C> result ) {

        int count = 0;

        for ( R key : result.getKeys() ) {
            if ( result.getRow( key ).getColumns().size() > 0 ) {
                count++;

                //we have more than 1 row with values, return them
                if ( count > 1 ) {
                    return false;
                }
            }
        }

        return true;
    }


    /**
     * A single row is present, only parse the single row
     * @param result
     * @return
     */
    private List<T> singleRowResult( final Rows<R, C> result ) {

        if (logger.isTraceEnabled()) logger.trace( "Only a single row has columns.  Parsing directly" );

        for ( R key : result.getKeys() ) {
            final ColumnList<C> columnList = result.getRow( key ).getColumns();

            final int size = columnList.size();

            if ( size > 0 ) {

                final List<T> results = new ArrayList<>(size);

                for(Column<C> column: columnList){
                    results.add(columnParser.parseColumn( column ));
                }

                return results;


            }
        }

        //we didn't have any results, just return nothing
        return Collections.<T>emptyList();
    }


    /**
     * Multiple rows are present, merge them into a single result set
     * @param result
     * @return
     */
    private List<T> mergeResults( final Rows<R, C> result, final int maxSize ) {

        if (logger.isTraceEnabled()) logger.trace( "Multiple rows have columns.  Merging" );


        final List<T> mergedResults = new ArrayList<>(maxSize);




        for ( final R key : result.getKeys() ) {
            final ColumnList<C> columns = result.getRow( key ).getColumns();


            for (final Column<C> column :columns  ) {

                final T returnedValue = columnParser.parseColumn( column );

                //Use an O(log n) search, same as a tree, but with fast access to indexes for later operations
                int searchIndex = Collections.binarySearch( mergedResults, returnedValue, comparator );

                /**
                 * DO NOT remove this section of code. If you're seeing inconsistent results during shard transition,
                 * you'll
                 * need to enable this
                 */
                //
                //                if ( previous != null && comparator.compare( previous, returnedValue ) == 0 ) {
                //                    throw new RuntimeException( String.format(
                //                            "Cassandra returned 2 unique columns,
                // but your comparator marked them as equal.  This " +
                //                                    "indicates a bug in your comparator.  Previous value was %s and
                // current value is " +
                //                                    "%s",
                //                            previous, returnedValue ) );
                //                }
                //
                //                previous = returnedValue;

                //we've already seen it, no-op
                if(searchIndex > -1){
                    continue;
                }

                final int insertIndex = (searchIndex+1)*-1;

                //it's at the end of the list, don't bother inserting just to remove it
                if(insertIndex >= maxSize){
                    continue;
                }

                if (logger.isTraceEnabled()) logger.trace( "Adding value {} to merged set at index {}", returnedValue, insertIndex );

                mergedResults.add( insertIndex, returnedValue );


                //prune the mergedResults
                while ( mergedResults.size() > maxSize ) {

                    if (logger.isTraceEnabled()) logger.trace( "Trimming results to size {}", maxSize );

                    //just remove from our tail until the size falls to the correct value
                    mergedResults.remove(mergedResults.size()-1);
                }
            }

            if (logger.isTraceEnabled()) logger.trace( "Candidate result set size is {}", mergedResults.size() );

        }
        return mergedResults;
    }


}