/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.accumulo.io;
import com.facebook.presto.accumulo.Types;
import com.facebook.presto.accumulo.model.AccumuloColumnConstraint;
import com.facebook.presto.accumulo.model.AccumuloColumnHandle;
import com.facebook.presto.accumulo.serializers.AccumuloRowSerializer;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.RecordCursor;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spi.type.VarbinaryType;
import com.facebook.presto.spi.type.VarcharType;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.FirstEntryInRowIterator;
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import static com.facebook.presto.accumulo.AccumuloErrorCode.IO_ERROR;
import static com.facebook.presto.accumulo.io.AccumuloPageSink.ROW_ID_COLUMN;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.BooleanType.BOOLEAN;
import static com.facebook.presto.spi.type.DateType.DATE;
import static com.facebook.presto.spi.type.DoubleType.DOUBLE;
import static com.facebook.presto.spi.type.IntegerType.INTEGER;
import static com.facebook.presto.spi.type.RealType.REAL;
import static com.facebook.presto.spi.type.SmallintType.SMALLINT;
import static com.facebook.presto.spi.type.TimeType.TIME;
import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.spi.type.TinyintType.TINYINT;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
/**
* Implementation of Presto RecordCursor, responsible for iterating over a Presto split,
* reading rows of data and then implementing various methods to retrieve columns within each row.
*
* @see AccumuloRecordSet
* @see AccumuloRecordSetProvider
*/
public class AccumuloRecordCursor
implements RecordCursor
{
private static final int WHOLE_ROW_ITERATOR_PRIORITY = Integer.MAX_VALUE;
private final List<AccumuloColumnHandle> columnHandles;
private final String[] fieldToColumnName;
private final BatchScanner scanner;
private final Iterator<Entry<Key, Value>> iterator;
private final AccumuloRowSerializer serializer;
private long bytesRead;
private long nanoStart;
private long nanoEnd;
public AccumuloRecordCursor(
AccumuloRowSerializer serializer,
BatchScanner scanner,
String rowIdName,
List<AccumuloColumnHandle> columnHandles,
List<AccumuloColumnConstraint> constraints)
{
this.columnHandles = requireNonNull(columnHandles, "columnHandles is null");
this.scanner = requireNonNull(scanner, "scanner is null");
this.serializer = requireNonNull(serializer, "serializer is null");
this.serializer.setRowIdName(requireNonNull(rowIdName, "rowIdName is null"));
requireNonNull(columnHandles, "columnHandles is null");
requireNonNull(constraints, "constraints is null");
if (retrieveOnlyRowIds(rowIdName)) {
this.scanner.addScanIterator(new IteratorSetting(1, "firstentryiter", FirstEntryInRowIterator.class));
fieldToColumnName = new String[1];
fieldToColumnName[0] = rowIdName;
// Set a flag on the serializer saying we are only going to be retrieving the row ID
this.serializer.setRowOnly(true);
}
else {
// Else, we will be scanning some more columns here
this.serializer.setRowOnly(false);
// Fetch the reserved row ID column
this.scanner.fetchColumn(ROW_ID_COLUMN, ROW_ID_COLUMN);
Text family = new Text();
Text qualifier = new Text();
// Create an array which maps the column ordinal to the name of the column
fieldToColumnName = new String[columnHandles.size()];
for (int i = 0; i < columnHandles.size(); ++i) {
AccumuloColumnHandle columnHandle = columnHandles.get(i);
fieldToColumnName[i] = columnHandle.getName();
// Make sure to skip the row ID!
if (!columnHandle.getName().equals(rowIdName)) {
// Set the mapping of presto column name to the family/qualifier
this.serializer.setMapping(columnHandle.getName(), columnHandle.getFamily().get(), columnHandle.getQualifier().get());
// Set our scanner to fetch this family/qualifier column
// This will help us prune which data we receive from Accumulo
family.set(columnHandle.getFamily().get());
qualifier.set(columnHandle.getQualifier().get());
this.scanner.fetchColumn(family, qualifier);
}
}
}
IteratorSetting setting = new IteratorSetting(WHOLE_ROW_ITERATOR_PRIORITY, WholeRowIterator.class);
scanner.addScanIterator(setting);
iterator = this.scanner.iterator();
}
@Override
public long getTotalBytes()
{
return 0L; // unknown value
}
@Override
public long getCompletedBytes()
{
return bytesRead;
}
@Override
public long getReadTimeNanos()
{
return nanoStart > 0L ? (nanoEnd == 0 ? System.nanoTime() : nanoEnd) - nanoStart : 0L;
}
@Override
public Type getType(int field)
{
checkArgument(field >= 0 && field < columnHandles.size(), "Invalid field index");
return columnHandles.get(field).getType();
}
@Override
public boolean advanceNextPosition()
{
if (nanoStart == 0) {
nanoStart = System.nanoTime();
}
try {
if (iterator.hasNext()) {
serializer.reset();
Entry<Key, Value> row = iterator.next();
for (Entry<Key, Value> entry : WholeRowIterator.decodeRow(row.getKey(), row.getValue()).entrySet()) {
bytesRead += entry.getKey().getSize() + entry.getValue().getSize();
serializer.deserialize(entry);
}
return true;
}
else {
return false;
}
}
catch (IOException e) {
throw new PrestoException(IO_ERROR, "Caught IO error from serializer on read", e);
}
}
@Override
public boolean isNull(int field)
{
checkArgument(field < columnHandles.size(), "Invalid field index");
return serializer.isNull(fieldToColumnName[field]);
}
@Override
public boolean getBoolean(int field)
{
checkFieldType(field, BOOLEAN);
return serializer.getBoolean(fieldToColumnName[field]);
}
@Override
public double getDouble(int field)
{
checkFieldType(field, DOUBLE);
return serializer.getDouble(fieldToColumnName[field]);
}
@Override
public long getLong(int field)
{
checkFieldType(field, BIGINT, DATE, INTEGER, REAL, SMALLINT, TIME, TIMESTAMP, TINYINT);
Type type = getType(field);
if (type.equals(BIGINT)) {
return serializer.getLong(fieldToColumnName[field]);
}
else if (type.equals(DATE)) {
return MILLISECONDS.toDays(serializer.getDate(fieldToColumnName[field]).getTime());
}
else if (type.equals(INTEGER)) {
return serializer.getInt(fieldToColumnName[field]);
}
else if (type.equals(REAL)) {
return Float.floatToIntBits(serializer.getFloat(fieldToColumnName[field]));
}
else if (type.equals(SMALLINT)) {
return serializer.getShort(fieldToColumnName[field]);
}
else if (type.equals(TIME)) {
return serializer.getTime(fieldToColumnName[field]).getTime();
}
else if (type.equals(TIMESTAMP)) {
return serializer.getTimestamp(fieldToColumnName[field]).getTime();
}
else if (type.equals(TINYINT)) {
return serializer.getByte(fieldToColumnName[field]);
}
else {
throw new PrestoException(NOT_SUPPORTED, "Unsupported type " + getType(field));
}
}
@Override
public Object getObject(int field)
{
Type type = getType(field);
checkArgument(Types.isArrayType(type) || Types.isMapType(type), "Expected field %s to be a type of array or map but is %s", field, type);
if (Types.isArrayType(type)) {
return serializer.getArray(fieldToColumnName[field], type);
}
return serializer.getMap(fieldToColumnName[field], type);
}
@Override
public Slice getSlice(int field)
{
Type type = getType(field);
if (type instanceof VarbinaryType) {
return Slices.wrappedBuffer(serializer.getVarbinary(fieldToColumnName[field]));
}
else if (type instanceof VarcharType) {
return Slices.utf8Slice(serializer.getVarchar(fieldToColumnName[field]));
}
else {
throw new PrestoException(NOT_SUPPORTED, "Unsupported type " + type);
}
}
@Override
public void close()
{
scanner.close();
nanoEnd = System.nanoTime();
}
/**
* Gets a Boolean value indicating whether or not the scanner should only return row IDs.
* <p>
* This can occur in cases such as SELECT COUNT(*) or the table only has one column.
* Presto doesn't need the entire contents of the row to count them,
* so we can configure Accumulo to only give us the first key/value pair in the row
*
* @param rowIdName Row ID column name
* @return True if scanner should retriev eonly row IDs, false otherwise
*/
private boolean retrieveOnlyRowIds(String rowIdName)
{
return columnHandles.isEmpty() || (columnHandles.size() == 1 && columnHandles.get(0).getName().equals(rowIdName));
}
/**
* Checks that the given field is one of the provided types.
*
* @param field Ordinal of the field
* @param expected An array of expected types
* @throws IllegalArgumentException If the given field does not match one of the types
*/
private void checkFieldType(int field, Type... expected)
{
Type actual = getType(field);
for (Type type : expected) {
if (actual.equals(type)) {
return;
}
}
throw new IllegalArgumentException(format("Expected field %s to be a type of %s but is %s", field, StringUtils.join(expected, ","), actual));
}
}