/* * * * Licensed to the Apache Software Foundation (ASF) under one * * or more contributor license agreements. See the NOTICE file * * distributed with this work for additional information * * regarding copyright ownership. The ASF licenses this file * * to you under the Apache License, Version 2.0 (the * * "License"); you may not use this file except in compliance * * with the License. You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * * KIND, either express or implied. See the License for the * * specific language governing permissions and limitations * * under the License. * */ package org.apache.usergrid.persistence.core.astyanax; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.CountDownLatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.usergrid.persistence.core.rx.OrderedMerge; import rx.Observable; import rx.Subscriber; import rx.schedulers.Schedulers; /** * Simple iterator that wraps a collection of ColumnNameIterators. We do this because we can't page with a * multiRangeScan correctly for multiple round trips. As a result, we do this since only 1 iterator with minimum values * could potentially feed the entire result set. * * Compares the parsed values and puts them in order. If more than one row key emits the same value the first value is * selected, and ignored from subsequent iterators. */ public class MultiKeyColumnNameIterator<C, T> implements Iterable<T>, Iterator<T> { private static final Logger logger = LoggerFactory.getLogger( MultiKeyColumnNameIterator.class ); private Iterator<T> iterator; public MultiKeyColumnNameIterator( final Collection<ColumnNameIterator<C, T>> columnNameIterators, final Comparator<T> comparator, final int bufferSize ) { //optimization for single use case if ( columnNameIterators.size() == 1 ) { iterator = columnNameIterators.iterator().next(); return; } /** * We have more than 1 iterator, subscribe to all of them on their own thread so they can * produce in parallel. This way our inner iterator will be filled and processed the fastest */ Observable<T>[] observables = new Observable[columnNameIterators.size()]; int i = 0; for ( ColumnNameIterator<C, T> columnNameIterator : columnNameIterators ) { observables[i] = Observable.from( columnNameIterator ).subscribeOn( Schedulers.io() ); i++; } //merge them into 1 observable, and remove duplicates from the stream Observable<T> merged = OrderedMerge.orderedMerge( comparator, bufferSize, observables ).distinctUntilChanged(); InnerIterator innerIterator = new InnerIterator( bufferSize ); merged.subscribe( innerIterator ); iterator = innerIterator; } @Override public Iterator<T> iterator() { return this; } @Override public boolean hasNext() { return iterator.hasNext(); } @Override public T next() { return iterator.next(); } @Override public void remove() { throw new UnsupportedOperationException( "You cannot remove elements from a merged iterator, it is read only" ); } /** * Internal iterator that will put next elements into a blocking queue until it reaches capacity. At this point it * will block then emitting thread until more elements are taken. Assumed the Observable is run on a I/O thread, * NOT the current thread. */ private final class InnerIterator<T> extends Subscriber<T> implements Iterator<T> { private final CountDownLatch startLatch = new CountDownLatch( 1 ); /** * Use an ArrayBlockingQueue for faster access since our upper bounds is static */ private final ArrayBlockingQueue<T> queue; private Throwable error; private boolean done = false; private T next; private InnerIterator( int maxSize ) { queue = new ArrayBlockingQueue<>( maxSize ); } @Override public boolean hasNext() { //we're done if ( next != null ) { return true; } try { startLatch.await(); } catch ( InterruptedException e ) { throw new RuntimeException( "Unable to wait for start of submission" ); } //this is almost a busy wait, and is intentional, if we have nothing to poll, we want to get it as soon //as it's available. We generally only hit this once do { next = queue.poll(); } while ( next == null && !done ); return next != null; } @Override public T next() { if ( error != null ) { throw new RuntimeException( "An error occurred when populating the iterator", error ); } if ( !hasNext() ) { throw new NoSuchElementException( "No more elements are present" ); } T toReturn = next; next = null; return toReturn; } @Override public void remove() { throw new UnsupportedOperationException( "Remove is unsupported" ); } @Override public void onCompleted() { done = true; startLatch.countDown(); } @Override public void onError( final Throwable e ) { error = e; done = true; startLatch.countDown(); } @Override public void onNext( final T t ) { //may block if we get full, that's expected behavior try { if (logger.isTraceEnabled()) logger.trace( "Received element {}" , t ); queue.put( t ); } catch ( InterruptedException e ) { throw new RuntimeException( "Unable to insert to queue" ); } startLatch.countDown(); } } }