/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.core.client;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import org.apache.accumulo.core.client.IteratorSetting.Column;
import org.apache.accumulo.core.client.sample.SamplerConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;
/**
* This class hosts configuration methods that are shared between different types of scanners.
*
*/
public interface ScannerBase extends Iterable<Entry<Key,Value>>, AutoCloseable {
/**
* Add a server-side scan iterator.
*
* @param cfg
* fully specified scan-time iterator, including all options for the iterator. Any changes to the iterator setting after this call are not propagated
* to the stored iterator.
* @throws IllegalArgumentException
* if the setting conflicts with existing iterators
*/
void addScanIterator(IteratorSetting cfg);
/**
* Remove an iterator from the list of iterators.
*
* @param iteratorName
* nickname used for the iterator
*/
void removeScanIterator(String iteratorName);
/**
* Update the options for an iterator. Note that this does <b>not</b> change the iterator options during a scan, it just replaces the given option on a
* configured iterator before a scan is started.
*
* @param iteratorName
* the name of the iterator to change
* @param key
* the name of the option
* @param value
* the new value for the named option
*/
void updateScanIteratorOption(String iteratorName, String key, String value);
/**
* Adds a column family to the list of columns that will be fetched by this scanner. By default when no columns have been added the scanner fetches all
* columns. To fetch multiple column families call this function multiple times.
*
* <p>
* This can help limit which locality groups are read on the server side.
*
* <p>
* When used in conjunction with custom iterators, the set of column families fetched is passed to the top iterator's seek method. Custom iterators may change
* this set of column families when calling seek on their source.
*
* @param col
* the column family to be fetched
*/
void fetchColumnFamily(Text col);
/**
* Adds a column to the list of columns that will be fetched by this scanner. The column is identified by family and qualifier. By default when no columns
* have been added the scanner fetches all columns.
*
* <p>
* <b>WARNING</b>. Using this method with custom iterators may have unexpected results. Iterators have control over which column families are fetched. However
* iterators have no control over which column qualifiers are fetched. When this method is called it activates a system iterator that only allows the
* requested family/qualifier pairs through. This low level filtering prevents custom iterators from requesting additional column families when calling seek.
*
* <p>
* For an example, assume fetchColumns(A, Q1) and fetchColumns(B,Q1) is called on a scanner and a custom iterator is configured. The families (A,B) will be
* passed to the seek method of the custom iterator. If the custom iterator seeks its source iterator using the families (A,B,C), it will never see any data
* from C because the system iterator filtering A:Q1 and B:Q1 will prevent the C family from getting through. ACCUMULO-3905 also has an example of the type of
* problem this method can cause.
*
* <p>
* tl;dr If using a custom iterator with a seek method that adds column families, then may want to avoid using this method.
*
* @param colFam
* the column family of the column to be fetched
* @param colQual
* the column qualifier of the column to be fetched
*/
void fetchColumn(Text colFam, Text colQual);
/**
* Adds a column to the list of columns that will be fetch by this scanner.
*
* @param column
* the {@link Column} to fetch
* @since 1.7.0
*/
void fetchColumn(Column column);
/**
* Clears the columns to be fetched (useful for resetting the scanner for reuse). Once cleared, the scanner will fetch all columns.
*/
void clearColumns();
/**
* Clears scan iterators prior to returning a scanner to the pool.
*/
void clearScanIterators();
/**
* Returns an iterator over an accumulo table. This iterator uses the options that are currently set for its lifetime, so setting options will have no effect
* on existing iterators.
*
* <p>
* Keys returned by the iterator are not guaranteed to be in sorted order.
*
* @return an iterator over Key,Value pairs which meet the restrictions set on the scanner
*/
@Override
Iterator<Entry<Key,Value>> iterator();
/**
* This setting determines how long a scanner will automatically retry when a failure occurs. By default, a scanner will retry forever.
*
* <p>
* Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with {@link TimeUnit#MILLISECONDS}) means no timeout.
*
* @param timeOut
* the length of the timeout
* @param timeUnit
* the units of the timeout
* @since 1.5.0
*/
void setTimeout(long timeOut, TimeUnit timeUnit);
/**
* Returns the setting for how long a scanner will automatically retry when a failure occurs.
*
* @return the timeout configured for this scanner
* @since 1.5.0
*/
long getTimeout(TimeUnit timeUnit);
/**
* Closes any underlying connections on the scanner. This may invalidate any iterators derived from the Scanner, causing them to throw exceptions.
*
* @since 1.5.0
*/
@Override
void close();
/**
* Returns the authorizations that have been set on the scanner
*
* @since 1.7.0
* @return The authorizations set on the scanner instance
*/
Authorizations getAuthorizations();
/**
* Setting this will cause the scanner to read sample data, as long as that sample data was generated with the given configuration. By default this is not set
* and all data is read.
*
* <p>
* One way to use this method is as follows, where the sampler configuration is obtained from the table configuration. Sample data can be generated in many
* different ways, so its important to verify the sample data configuration meets expectations.
*
* <pre>
* <code>
* // could cache this if creating many scanners to avoid RPCs.
* SamplerConfiguration samplerConfig = connector.tableOperations().getSamplerConfiguration(table);
* // verify table's sample data is generated in an expected way before using
* userCode.verifySamplerConfig(samplerConfig);
* scanner.setSamplerCongiguration(samplerConfig);
* </code>
* </pre>
*
* <p>
* Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a constant, configuration, etc.
*
* <p>
* If sample data is not present or sample data was generated with a different configuration, then the scanner iterator will throw a
* {@link SampleNotPresentException}. Also if a table's sampler configuration is changed while a scanner is iterating over a table, a
* {@link SampleNotPresentException} may be thrown.
*
* @since 1.8.0
*/
void setSamplerConfiguration(SamplerConfiguration samplerConfig);
/**
* @return currently set sampler configuration. Returns null if no sampler configuration is set.
* @since 1.8.0
*/
SamplerConfiguration getSamplerConfiguration();
/**
* Clears sampler configuration making a scanner read all data. After calling this, {@link #getSamplerConfiguration()} should return null.
*
* @since 1.8.0
*/
void clearSamplerConfiguration();
/**
* This setting determines how long a scanner will wait to fill the returned batch. By default, a scanner wait until the batch is full.
*
* <p>
* Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with {@link TimeUnit#MILLISECONDS}) means no timeout.
*
* @param timeOut
* the length of the timeout
* @param timeUnit
* the units of the timeout
* @since 1.8.0
*/
void setBatchTimeout(long timeOut, TimeUnit timeUnit);
/**
* Returns the timeout to fill a batch in the given TimeUnit.
*
* @return the batch timeout configured for this scanner
* @since 1.8.0
*/
long getBatchTimeout(TimeUnit timeUnit);
/**
* Sets the name of the classloader context on this scanner. See the administration chapter of the user manual for details on how to configure and use
* classloader contexts.
*
* @param classLoaderContext
* name of the classloader context
* @throws NullPointerException
* if context is null
* @since 1.8.0
*/
void setClassLoaderContext(String classLoaderContext);
/**
* Clears the current classloader context set on this scanner
*
* @since 1.8.0
*/
void clearClassLoaderContext();
/**
* Returns the name of the current classloader context set on this scanner
*
* @return name of the current context
* @since 1.8.0
*/
String getClassLoaderContext();
}