package eu.fbk.knowledgestore.datastore.hbase;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.openrdf.model.URI;
import eu.fbk.knowledgestore.data.Record;
import eu.fbk.knowledgestore.datastore.hbase.utils.AbstractHBaseUtils;
/**
* Performs batch lookup of records by ID from a given HBase table, optionally returning only a
* subset of record properties.
*/
public class HBaseIterator extends AbstractIterator<Record> {
/** The batch size. */
private static final int BATCH_SIZE = 100;
/** Object referencing transactional layer. */
private final AbstractHBaseUtils hbaseUtils;
/** HBase table name to be used */
private final String tableName;
/** Properties to be looked up. */
private final URI[] properties;
/** An iterator over the IDs of the records to lookup. */
private final Iterator<URI> idIterator;
/** An iterator over records buffered in the last batch looked up from HBase. */
private Iterator<Record> recordIterator;
/**
* Creates a new {@code HBaseStream} for the parameters supplied.
*
* @param hbaseUtils
* the {@code AbstractHBaseUtils} object for accessing HBase, not null
* @param tableName
* the name of the HBase table to access, not null
* @param ids
* the IDs of records to fetch from HBase, not null
* @param properties
* the properties of records to return, null if all properties should be returned
*/
@SuppressWarnings("unchecked")
public HBaseIterator(final AbstractHBaseUtils hbaseUtils, final String tableName,
final Set<? extends URI> ids, @Nullable final Set<? extends URI> properties) {
Preconditions.checkNotNull(hbaseUtils);
Preconditions.checkNotNull(tableName);
Preconditions.checkNotNull(ids);
this.hbaseUtils = hbaseUtils;
this.tableName = tableName;
this.properties = properties == null ? null : Iterables.toArray(properties, URI.class);
this.idIterator = (Iterator<URI>) ImmutableList.copyOf(ids).iterator();
this.recordIterator = Collections.emptyIterator();
}
@Override
protected Record computeNext() {
while (true) {
// Return a record previously buffered, if available
if (this.recordIterator.hasNext()) {
return this.recordIterator.next();
}
// Otherwise, retrieve next batch of IDs
final List<URI> ids = Lists.newArrayListWithCapacity(BATCH_SIZE);
while (this.idIterator.hasNext() && ids.size() < BATCH_SIZE) {
ids.add(this.idIterator.next());
}
// EOF reached if there are no more IDs to retrieve
if (ids.isEmpty()) {
return endOfData();
}
// Retrieve next batch of records corresponding to IDs batch
final List<Record> records;
try {
records = this.hbaseUtils.get(this.tableName, ids);
} catch (final IOException ex) {
throw Throwables.propagate(ex);
}
// Perform client-side projection, if requested
if (this.properties != null) {
for (int i = 0; i < records.size(); ++i) {
records.set(i, records.get(i).retain(this.properties));
}
}
// Store fetched record in record iterator and return first one
this.recordIterator = records.iterator();
}
}
}