/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.accumulo.io; import com.facebook.presto.accumulo.Types; import com.facebook.presto.accumulo.model.AccumuloColumnConstraint; import com.facebook.presto.accumulo.model.AccumuloColumnHandle; import com.facebook.presto.accumulo.serializers.AccumuloRowSerializer; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.RecordCursor; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.VarbinaryType; import com.facebook.presto.spi.type.VarcharType; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.FirstEntryInRowIterator; import org.apache.accumulo.core.iterators.user.WholeRowIterator; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.Text; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import static com.facebook.presto.accumulo.AccumuloErrorCode.IO_ERROR; import static com.facebook.presto.accumulo.io.AccumuloPageSink.ROW_ID_COLUMN; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.type.DateType.DATE; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; import static com.facebook.presto.spi.type.IntegerType.INTEGER; import static com.facebook.presto.spi.type.RealType.REAL; import static com.facebook.presto.spi.type.SmallintType.SMALLINT; import static com.facebook.presto.spi.type.TimeType.TIME; import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; import static com.facebook.presto.spi.type.TinyintType.TINYINT; import static com.google.common.base.Preconditions.checkArgument; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; /** * Implementation of Presto RecordCursor, responsible for iterating over a Presto split, * reading rows of data and then implementing various methods to retrieve columns within each row. * * @see AccumuloRecordSet * @see AccumuloRecordSetProvider */ public class AccumuloRecordCursor implements RecordCursor { private static final int WHOLE_ROW_ITERATOR_PRIORITY = Integer.MAX_VALUE; private final List<AccumuloColumnHandle> columnHandles; private final String[] fieldToColumnName; private final BatchScanner scanner; private final Iterator<Entry<Key, Value>> iterator; private final AccumuloRowSerializer serializer; private long bytesRead; private long nanoStart; private long nanoEnd; public AccumuloRecordCursor( AccumuloRowSerializer serializer, BatchScanner scanner, String rowIdName, List<AccumuloColumnHandle> columnHandles, List<AccumuloColumnConstraint> constraints) { this.columnHandles = requireNonNull(columnHandles, "columnHandles is null"); this.scanner = requireNonNull(scanner, "scanner is null"); this.serializer = requireNonNull(serializer, "serializer is null"); this.serializer.setRowIdName(requireNonNull(rowIdName, "rowIdName is null")); requireNonNull(columnHandles, "columnHandles is null"); requireNonNull(constraints, "constraints is null"); if (retrieveOnlyRowIds(rowIdName)) { this.scanner.addScanIterator(new IteratorSetting(1, "firstentryiter", FirstEntryInRowIterator.class)); fieldToColumnName = new String[1]; fieldToColumnName[0] = rowIdName; // Set a flag on the serializer saying we are only going to be retrieving the row ID this.serializer.setRowOnly(true); } else { // Else, we will be scanning some more columns here this.serializer.setRowOnly(false); // Fetch the reserved row ID column this.scanner.fetchColumn(ROW_ID_COLUMN, ROW_ID_COLUMN); Text family = new Text(); Text qualifier = new Text(); // Create an array which maps the column ordinal to the name of the column fieldToColumnName = new String[columnHandles.size()]; for (int i = 0; i < columnHandles.size(); ++i) { AccumuloColumnHandle columnHandle = columnHandles.get(i); fieldToColumnName[i] = columnHandle.getName(); // Make sure to skip the row ID! if (!columnHandle.getName().equals(rowIdName)) { // Set the mapping of presto column name to the family/qualifier this.serializer.setMapping(columnHandle.getName(), columnHandle.getFamily().get(), columnHandle.getQualifier().get()); // Set our scanner to fetch this family/qualifier column // This will help us prune which data we receive from Accumulo family.set(columnHandle.getFamily().get()); qualifier.set(columnHandle.getQualifier().get()); this.scanner.fetchColumn(family, qualifier); } } } IteratorSetting setting = new IteratorSetting(WHOLE_ROW_ITERATOR_PRIORITY, WholeRowIterator.class); scanner.addScanIterator(setting); iterator = this.scanner.iterator(); } @Override public long getTotalBytes() { return 0L; // unknown value } @Override public long getCompletedBytes() { return bytesRead; } @Override public long getReadTimeNanos() { return nanoStart > 0L ? (nanoEnd == 0 ? System.nanoTime() : nanoEnd) - nanoStart : 0L; } @Override public Type getType(int field) { checkArgument(field >= 0 && field < columnHandles.size(), "Invalid field index"); return columnHandles.get(field).getType(); } @Override public boolean advanceNextPosition() { if (nanoStart == 0) { nanoStart = System.nanoTime(); } try { if (iterator.hasNext()) { serializer.reset(); Entry<Key, Value> row = iterator.next(); for (Entry<Key, Value> entry : WholeRowIterator.decodeRow(row.getKey(), row.getValue()).entrySet()) { bytesRead += entry.getKey().getSize() + entry.getValue().getSize(); serializer.deserialize(entry); } return true; } else { return false; } } catch (IOException e) { throw new PrestoException(IO_ERROR, "Caught IO error from serializer on read", e); } } @Override public boolean isNull(int field) { checkArgument(field < columnHandles.size(), "Invalid field index"); return serializer.isNull(fieldToColumnName[field]); } @Override public boolean getBoolean(int field) { checkFieldType(field, BOOLEAN); return serializer.getBoolean(fieldToColumnName[field]); } @Override public double getDouble(int field) { checkFieldType(field, DOUBLE); return serializer.getDouble(fieldToColumnName[field]); } @Override public long getLong(int field) { checkFieldType(field, BIGINT, DATE, INTEGER, REAL, SMALLINT, TIME, TIMESTAMP, TINYINT); Type type = getType(field); if (type.equals(BIGINT)) { return serializer.getLong(fieldToColumnName[field]); } else if (type.equals(DATE)) { return MILLISECONDS.toDays(serializer.getDate(fieldToColumnName[field]).getTime()); } else if (type.equals(INTEGER)) { return serializer.getInt(fieldToColumnName[field]); } else if (type.equals(REAL)) { return Float.floatToIntBits(serializer.getFloat(fieldToColumnName[field])); } else if (type.equals(SMALLINT)) { return serializer.getShort(fieldToColumnName[field]); } else if (type.equals(TIME)) { return serializer.getTime(fieldToColumnName[field]).getTime(); } else if (type.equals(TIMESTAMP)) { return serializer.getTimestamp(fieldToColumnName[field]).getTime(); } else if (type.equals(TINYINT)) { return serializer.getByte(fieldToColumnName[field]); } else { throw new PrestoException(NOT_SUPPORTED, "Unsupported type " + getType(field)); } } @Override public Object getObject(int field) { Type type = getType(field); checkArgument(Types.isArrayType(type) || Types.isMapType(type), "Expected field %s to be a type of array or map but is %s", field, type); if (Types.isArrayType(type)) { return serializer.getArray(fieldToColumnName[field], type); } return serializer.getMap(fieldToColumnName[field], type); } @Override public Slice getSlice(int field) { Type type = getType(field); if (type instanceof VarbinaryType) { return Slices.wrappedBuffer(serializer.getVarbinary(fieldToColumnName[field])); } else if (type instanceof VarcharType) { return Slices.utf8Slice(serializer.getVarchar(fieldToColumnName[field])); } else { throw new PrestoException(NOT_SUPPORTED, "Unsupported type " + type); } } @Override public void close() { scanner.close(); nanoEnd = System.nanoTime(); } /** * Gets a Boolean value indicating whether or not the scanner should only return row IDs. * <p> * This can occur in cases such as SELECT COUNT(*) or the table only has one column. * Presto doesn't need the entire contents of the row to count them, * so we can configure Accumulo to only give us the first key/value pair in the row * * @param rowIdName Row ID column name * @return True if scanner should retriev eonly row IDs, false otherwise */ private boolean retrieveOnlyRowIds(String rowIdName) { return columnHandles.isEmpty() || (columnHandles.size() == 1 && columnHandles.get(0).getName().equals(rowIdName)); } /** * Checks that the given field is one of the provided types. * * @param field Ordinal of the field * @param expected An array of expected types * @throws IllegalArgumentException If the given field does not match one of the types */ private void checkFieldType(int field, Type... expected) { Type actual = getType(field); for (Type type : expected) { if (actual.equals(type)) { return; } } throw new IllegalArgumentException(format("Expected field %s to be a type of %s but is %s", field, StringUtils.join(expected, ","), actual)); } }