/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.core.iterators;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Range;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
* An iterator that supports iterating over key and value pairs. Anything implementing this interface should return keys in sorted order.
*/
public interface SortedKeyValueIterator<K extends WritableComparable<?>,V extends Writable> {
/**
* Initializes the iterator. Data should not be read from the source in this method.
*
* @param source
* <tt>SortedKeyValueIterator</tt> source to read data from.
* @param options
* <tt>Map</tt> map of string option names to option values.
* @param env
* <tt>IteratorEnvironment</tt> environment in which iterator is being run.
* @throws IOException
* unused.
* @exception IllegalArgumentException
* if there are problems with the options.
* @exception UnsupportedOperationException
* if not supported.
*/
void init(SortedKeyValueIterator<K,V> source, Map<String,String> options, IteratorEnvironment env) throws IOException;
/**
* Returns true if the iterator has more elements.
*
* @return <tt>true</tt> if the iterator has more elements.
* @exception IllegalStateException
* if called before seek.
*/
boolean hasTop();
/**
* Advances to the next K,V pair. Note that in minor compaction scope and in non-full major compaction scopes the iterator may see deletion entries. These
* entries should be preserved by all iterators except ones that are strictly scan-time iterators that will never be configured for the minc or majc scopes.
* Deletion entries are only removed during full major compactions.
*
* @throws IOException
* if an I/O error occurs.
* @exception IllegalStateException
* if called before seek.
* @exception NoSuchElementException
* if next element doesn't exist.
*/
void next() throws IOException;
/**
* Seeks to the first key in the Range, restricting the resulting K,V pairs to those with the specified columns. An iterator does not have to stop at the end
* of the range. The whole range is provided so that iterators can make optimizations.
*
* Seek may be called multiple times with different parameters after {@link #init} is called.
*
* Iterators that examine groups of adjacent key/value pairs (e.g. rows) to determine their top key and value should be sure that they properly handle a seek
* to a key in the middle of such a group (e.g. the middle of a row). Even if the client always seeks to a range containing an entire group (a,c), the tablet
* server could send back a batch of entries corresponding to (a,b], then reseek the iterator to range (b,c) when the scan is continued.
*
* {@code columnFamilies} is used, at the lowest level, to determine which data blocks inside of an RFile need to be opened for this iterator. This set of
* data blocks is also the set of locality groups defined for the given table. If no columnFamilies are provided, the data blocks for all locality groups
* inside of the correct RFile will be opened and seeked in an attempt to find the correct start key, regardless of the startKey in the {@code range}.
*
* In an Accumulo instance in which multiple locality groups exist for a table, it is important to ensure that {@code columnFamilies} is properly set to the
* minimum required column families to ensure that data from separate locality groups is not inadvertently read.
*
* @param range
* <tt>Range</tt> of keys to iterate over.
* @param columnFamilies
* <tt>Collection</tt> of column families to include or exclude.
* @param inclusive
* <tt>boolean</tt> that indicates whether to include (true) or exclude (false) column families.
* @throws IOException
* if an I/O error occurs.
* @exception IllegalArgumentException
* if there are problems with the parameters.
*/
void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException;
/**
* Returns top key. Can be called 0 or more times without affecting behavior of next() or hasTop(). Note that in minor compaction scope and in non-full major
* compaction scopes the iterator may see deletion entries. These entries should be preserved by all iterators except ones that are strictly scan-time
* iterators that will never be configured for the minc or majc scopes. Deletion entries are only removed during full major compactions.
* <p>
* For performance reasons, iterators reserve the right to reuse objects returned by <tt>getTopKey</tt> when {@link #next()} is called, changing the data that
* the object references. Iterators that need to save an object returned by <tt>getTopKey</tt> ought to copy the object's data into a new object in order to
* avoid aliasing bugs.
*
* @return <tt>K</tt>
* @exception IllegalStateException
* if called before seek.
* @exception NoSuchElementException
* if top element doesn't exist.
*/
K getTopKey();
/**
* Returns top value. Can be called 0 or more times without affecting behavior of next() or hasTop().
* <p>
* For performance reasons, iterators reserve the right to reuse objects returned by <tt>getTopValue</tt> when {@link #next()} is called, changing the
* underlying data that the object references. Iterators that need to save an object returned by <tt>getTopValue</tt> ought to copy the object's data into a
* new object in order to avoid aliasing bugs.
*
* @return <tt>V</tt>
* @exception IllegalStateException
* if called before seek.
* @exception NoSuchElementException
* if top element doesn't exist.
*/
V getTopValue();
/**
* Creates a deep copy of this iterator as though seek had not yet been called. init should be called on an iterator before deepCopy is called. init should
* not need to be called on the copy that is returned by deepCopy; that is, when necessary init should be called in the deepCopy method on the iterator it
* returns. The behavior is unspecified if init is called after deepCopy either on the original or the copy. A proper implementation would call deepCopy on
* the source.
*
* @param env
* <tt>IteratorEnvironment</tt> environment in which iterator is being run.
* @return <tt>SortedKeyValueIterator</tt> a copy of this iterator (with the same source and settings).
* @exception UnsupportedOperationException
* if not supported.
*/
SortedKeyValueIterator<K,V> deepCopy(IteratorEnvironment env);
}