MergeSortIterator.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.rdf.graph.impl.bd;

import java.util.Iterator;
import java.util.NoSuchElementException;

import org.openrdf.model.Value;

import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;

/**
 * An N-way merge sort of N source iterators visiting {@link Value}s (which are
 * actually {@link IV}s).
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 */
public class MergeSortIterator implements Iterator<Value> {

    // private final static Logger log =
    // Logger.getLogger(MergeSortIterator.class);

    /**
     * The #of source iterators.
     */
    private final int n;

    /**
     * The source iterators in the order given to the ctor.
     */
    private final Iterator<Value>[] sourceIterator;

    /**
     * The current value from each source and <code>null</code> if we need to
     * get another value from that source. The value for a source iterator that
     * has been exhausted will remain <code>null</code>. When all entries in
     * this array are <code>null</code> there are no more values to be visited
     * and we are done.
     */
    private final Value[] sourceTuple;

    /**
     * Index into {@link #sourceIterator} and {@link #sourceTuple} of the
     * iterator whose value will be returned next -or- <code>-1</code> if we
     * need to choose the next value to be visited.
     */
    private int current = -1;

    /**
     * 
     * @param sourceIterators
     *            Each source iterator MUST be in ascending {@link Value} order.
     */
    public MergeSortIterator(final Iterator<Value>[] sourceIterators) {

        assert sourceIterators != null;

        assert sourceIterators.length > 0;

        this.n = sourceIterators.length;

        for (int i = 0; i < n; i++) {

            assert sourceIterators[i] != null;

        }

        this.sourceIterator = sourceIterators;

        sourceTuple = new Value[n];

    }

    @Override
    public boolean hasNext() {

        /*
         * Until we find an undeleted tuple (or any tuple if DELETED is true).
         */
        while (true) {

            if (current != -1) {

                // if (log.isTraceEnabled())
                // log.trace("Already matched: source=" + current);

                return true;

            }

            /*
             * First, make sure that we have a tuple for each source iterator
             * (unless that iterator is exhausted).
             */

            int nexhausted = 0;

            for (int i = 0; i < n; i++) {

                if (sourceTuple[i] == null) {

                    if (sourceIterator[i].hasNext()) {

                        sourceTuple[i] = sourceIterator[i].next();

                        // if (log.isTraceEnabled())
                        // log.trace("read sourceTuple[" + i + "]="
                        // + sourceTuple[i]);

                    } else {

                        nexhausted++;

                    }

                }

            }

            if (nexhausted == n) {

                // the aggregate iterator is exhausted.

                return false;

            }

            /*
             * Now consider the current tuple for each source iterator in turn
             * and choose the _first_ iterator having a tuple whose key orders
             * LTE all the others (or GTE if [reverseScan == true]). This is the
             * next tuple to be visited by the aggregate iterator.
             */
            {

                // current is index of the smallest key so far.
                assert current == -1;

                Value key = null; // smallest key so far.

                for (int i = 0; i < n; i++) {

                    if (sourceTuple[i] == null) {

                        // This source is exhausted.

                        continue;

                    }

                    if (current == -1) {

                        current = i;

                        key = sourceTuple[i];

                        assert key != null;

                    } else {

                        final Value tmp = sourceTuple[i];

                        final int ret = compare(tmp, key);

                        if (ret < 0) {

                            /*
                             * This key orders LT the current key.
                             * 
                             * Note: This test MUST be strictly LT since LTE
                             * would break the precedence in which we are
                             * processing the source iterators and give us the
                             * key from the last source by preference when we
                             * need the key from the first source by preference.
                             */

                            current = i;

                            key = tmp;

                        }

                    }

                }

                assert current != -1;

            }

            // if (log.isDebugEnabled())
            // log.debug("Will visit next: source=" + current + ", tuple: "
            // + sourceTuple[current]);

            return true;

        }

    }

    @Override
    public Value next() {

        if (!hasNext())
            throw new NoSuchElementException();

        return consumeLookaheadTuple();

    }

    /**
     * Consume the {@link #current} source value.
     * 
     * @return The {@link #current} tuple.
     */
    private Value consumeLookaheadTuple() {

        final Value t = sourceTuple[current];

        // clear tuples from other sources having the same key as the
        // current tuple.
        clearCurrent();

        return t;

    }

    /**
     * <p>
     * Clear tuples from other sources having the same key as the current tuple
     * (eliminates duplicates).
     * </p>
     */
    protected void clearCurrent() {

        assert current != -1;

        final Value key = sourceTuple[current];

        for (int i = current + 1; i < n; i++) {

            if (sourceTuple[i] == null) {

                // this iterator is exhausted.

                continue;

            }

            final Value tmp = sourceTuple[i];

            final int ret = compare(key, tmp);

            if (ret == 0) {

                // discard tuple.

                sourceTuple[i] = null;

            }

        }

        // clear the tuple that we are returning so that we will read
        // another from that source.
        sourceTuple[current] = null;

        // clear so that we will look again.
        current = -1;

    }

    @Override
    public void remove() {

        throw new UnsupportedOperationException();

    }

    /**
     * Compare two {@link Value}s (which are actually {@link IV}s).
     */
    @SuppressWarnings("rawtypes")
    private int compare(final Value a, final Value b) {

        return IVUtility.compare((IV) a, (IV) b);
        
    }

}